GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 /*(DEBLOBBED)*/
40
41 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
42 static void si_pcie_gen3_enable(struct radeon_device *rdev);
43 static void si_program_aspm(struct radeon_device *rdev);
44 extern void sumo_rlc_fini(struct radeon_device *rdev);
45 extern int sumo_rlc_init(struct radeon_device *rdev);
46 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
47 extern void r600_ih_ring_fini(struct radeon_device *rdev);
48 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
49 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
50 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
51 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
52 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
53 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
54 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
55                                          bool enable);
56 static void si_init_pg(struct radeon_device *rdev);
57 static void si_init_cg(struct radeon_device *rdev);
58 static void si_fini_pg(struct radeon_device *rdev);
59 static void si_fini_cg(struct radeon_device *rdev);
60 static void si_rlc_stop(struct radeon_device *rdev);
61
62 static const u32 crtc_offsets[] =
63 {
64         EVERGREEN_CRTC0_REGISTER_OFFSET,
65         EVERGREEN_CRTC1_REGISTER_OFFSET,
66         EVERGREEN_CRTC2_REGISTER_OFFSET,
67         EVERGREEN_CRTC3_REGISTER_OFFSET,
68         EVERGREEN_CRTC4_REGISTER_OFFSET,
69         EVERGREEN_CRTC5_REGISTER_OFFSET
70 };
71
72 static const u32 si_disp_int_status[] =
73 {
74         DISP_INTERRUPT_STATUS,
75         DISP_INTERRUPT_STATUS_CONTINUE,
76         DISP_INTERRUPT_STATUS_CONTINUE2,
77         DISP_INTERRUPT_STATUS_CONTINUE3,
78         DISP_INTERRUPT_STATUS_CONTINUE4,
79         DISP_INTERRUPT_STATUS_CONTINUE5
80 };
81
82 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
83 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
84 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
85
86 static const u32 verde_rlc_save_restore_register_list[] =
87 {
88         (0x8000 << 16) | (0x98f4 >> 2),
89         0x00000000,
90         (0x8040 << 16) | (0x98f4 >> 2),
91         0x00000000,
92         (0x8000 << 16) | (0xe80 >> 2),
93         0x00000000,
94         (0x8040 << 16) | (0xe80 >> 2),
95         0x00000000,
96         (0x8000 << 16) | (0x89bc >> 2),
97         0x00000000,
98         (0x8040 << 16) | (0x89bc >> 2),
99         0x00000000,
100         (0x8000 << 16) | (0x8c1c >> 2),
101         0x00000000,
102         (0x8040 << 16) | (0x8c1c >> 2),
103         0x00000000,
104         (0x9c00 << 16) | (0x98f0 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0xe7c >> 2),
107         0x00000000,
108         (0x8000 << 16) | (0x9148 >> 2),
109         0x00000000,
110         (0x8040 << 16) | (0x9148 >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0x9150 >> 2),
113         0x00000000,
114         (0x9c00 << 16) | (0x897c >> 2),
115         0x00000000,
116         (0x9c00 << 16) | (0x8d8c >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0xac54 >> 2),
119         0X00000000,
120         0x3,
121         (0x9c00 << 16) | (0x98f8 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x9910 >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9914 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9918 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x991c >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9920 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9924 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9928 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x992c >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9930 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9934 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9938 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x993c >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9940 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9944 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9948 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x994c >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9950 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9954 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9958 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x995c >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9960 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9964 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9968 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x996c >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9970 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9974 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9978 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x997c >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9980 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9984 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x9988 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x998c >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c00 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x8c14 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x8c04 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x8c08 >> 2),
194         0x00000000,
195         (0x8000 << 16) | (0x9b7c >> 2),
196         0x00000000,
197         (0x8040 << 16) | (0x9b7c >> 2),
198         0x00000000,
199         (0x8000 << 16) | (0xe84 >> 2),
200         0x00000000,
201         (0x8040 << 16) | (0xe84 >> 2),
202         0x00000000,
203         (0x8000 << 16) | (0x89c0 >> 2),
204         0x00000000,
205         (0x8040 << 16) | (0x89c0 >> 2),
206         0x00000000,
207         (0x8000 << 16) | (0x914c >> 2),
208         0x00000000,
209         (0x8040 << 16) | (0x914c >> 2),
210         0x00000000,
211         (0x8000 << 16) | (0x8c20 >> 2),
212         0x00000000,
213         (0x8040 << 16) | (0x8c20 >> 2),
214         0x00000000,
215         (0x8000 << 16) | (0x9354 >> 2),
216         0x00000000,
217         (0x8040 << 16) | (0x9354 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9060 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x9364 >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x9100 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x913c >> 2),
226         0x00000000,
227         (0x8000 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8000 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8000 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x8040 << 16) | (0x90e0 >> 2),
234         0x00000000,
235         (0x8040 << 16) | (0x90e4 >> 2),
236         0x00000000,
237         (0x8040 << 16) | (0x90e8 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8bcc >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8b24 >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x88c4 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e50 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8c0c >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x8e58 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x8e5c >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x9508 >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x950c >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x9494 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xac0c >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac10 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0xac14 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0xae00 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0xac08 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x88d4 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x88c8 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x88cc >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x89b0 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x8b10 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x8a14 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9830 >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x9834 >> 2),
284         0x00000000,
285         (0x9c00 << 16) | (0x9838 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x9a10 >> 2),
288         0x00000000,
289         (0x8000 << 16) | (0x9870 >> 2),
290         0x00000000,
291         (0x8000 << 16) | (0x9874 >> 2),
292         0x00000000,
293         (0x8001 << 16) | (0x9870 >> 2),
294         0x00000000,
295         (0x8001 << 16) | (0x9874 >> 2),
296         0x00000000,
297         (0x8040 << 16) | (0x9870 >> 2),
298         0x00000000,
299         (0x8040 << 16) | (0x9874 >> 2),
300         0x00000000,
301         (0x8041 << 16) | (0x9870 >> 2),
302         0x00000000,
303         (0x8041 << 16) | (0x9874 >> 2),
304         0x00000000,
305         0x00000000
306 };
307
308 static const u32 tahiti_golden_rlc_registers[] =
309 {
310         0xc424, 0xffffffff, 0x00601005,
311         0xc47c, 0xffffffff, 0x10104040,
312         0xc488, 0xffffffff, 0x0100000a,
313         0xc314, 0xffffffff, 0x00000800,
314         0xc30c, 0xffffffff, 0x800000f4,
315         0xf4a8, 0xffffffff, 0x00000000
316 };
317
318 static const u32 tahiti_golden_registers[] =
319 {
320         0x9a10, 0x00010000, 0x00018208,
321         0x9830, 0xffffffff, 0x00000000,
322         0x9834, 0xf00fffff, 0x00000400,
323         0x9838, 0x0002021c, 0x00020200,
324         0xc78, 0x00000080, 0x00000000,
325         0xd030, 0x000300c0, 0x00800040,
326         0xd830, 0x000300c0, 0x00800040,
327         0x5bb0, 0x000000f0, 0x00000070,
328         0x5bc0, 0x00200000, 0x50100000,
329         0x7030, 0x31000311, 0x00000011,
330         0x277c, 0x00000003, 0x000007ff,
331         0x240c, 0x000007ff, 0x00000000,
332         0x8a14, 0xf000001f, 0x00000007,
333         0x8b24, 0xffffffff, 0x00ffffff,
334         0x8b10, 0x0000ff0f, 0x00000000,
335         0x28a4c, 0x07ffffff, 0x4e000000,
336         0x28350, 0x3f3f3fff, 0x2a00126a,
337         0x30, 0x000000ff, 0x0040,
338         0x34, 0x00000040, 0x00004040,
339         0x9100, 0x07ffffff, 0x03000000,
340         0x8e88, 0x01ff1f3f, 0x00000000,
341         0x8e84, 0x01ff1f3f, 0x00000000,
342         0x9060, 0x0000007f, 0x00000020,
343         0x9508, 0x00010000, 0x00010000,
344         0xac14, 0x00000200, 0x000002fb,
345         0xac10, 0xffffffff, 0x0000543b,
346         0xac0c, 0xffffffff, 0xa9210876,
347         0x88d0, 0xffffffff, 0x000fff40,
348         0x88d4, 0x0000001f, 0x00000010,
349         0x1410, 0x20000000, 0x20fffed8,
350         0x15c0, 0x000c0fc0, 0x000c0400
351 };
352
353 static const u32 tahiti_golden_registers2[] =
354 {
355         0xc64, 0x00000001, 0x00000001
356 };
357
358 static const u32 pitcairn_golden_rlc_registers[] =
359 {
360         0xc424, 0xffffffff, 0x00601004,
361         0xc47c, 0xffffffff, 0x10102020,
362         0xc488, 0xffffffff, 0x01000020,
363         0xc314, 0xffffffff, 0x00000800,
364         0xc30c, 0xffffffff, 0x800000a4
365 };
366
367 static const u32 pitcairn_golden_registers[] =
368 {
369         0x9a10, 0x00010000, 0x00018208,
370         0x9830, 0xffffffff, 0x00000000,
371         0x9834, 0xf00fffff, 0x00000400,
372         0x9838, 0x0002021c, 0x00020200,
373         0xc78, 0x00000080, 0x00000000,
374         0xd030, 0x000300c0, 0x00800040,
375         0xd830, 0x000300c0, 0x00800040,
376         0x5bb0, 0x000000f0, 0x00000070,
377         0x5bc0, 0x00200000, 0x50100000,
378         0x7030, 0x31000311, 0x00000011,
379         0x2ae4, 0x00073ffe, 0x000022a2,
380         0x240c, 0x000007ff, 0x00000000,
381         0x8a14, 0xf000001f, 0x00000007,
382         0x8b24, 0xffffffff, 0x00ffffff,
383         0x8b10, 0x0000ff0f, 0x00000000,
384         0x28a4c, 0x07ffffff, 0x4e000000,
385         0x28350, 0x3f3f3fff, 0x2a00126a,
386         0x30, 0x000000ff, 0x0040,
387         0x34, 0x00000040, 0x00004040,
388         0x9100, 0x07ffffff, 0x03000000,
389         0x9060, 0x0000007f, 0x00000020,
390         0x9508, 0x00010000, 0x00010000,
391         0xac14, 0x000003ff, 0x000000f7,
392         0xac10, 0xffffffff, 0x00000000,
393         0xac0c, 0xffffffff, 0x32761054,
394         0x88d4, 0x0000001f, 0x00000010,
395         0x15c0, 0x000c0fc0, 0x000c0400
396 };
397
398 static const u32 verde_golden_rlc_registers[] =
399 {
400         0xc424, 0xffffffff, 0x033f1005,
401         0xc47c, 0xffffffff, 0x10808020,
402         0xc488, 0xffffffff, 0x00800008,
403         0xc314, 0xffffffff, 0x00001000,
404         0xc30c, 0xffffffff, 0x80010014
405 };
406
407 static const u32 verde_golden_registers[] =
408 {
409         0x9a10, 0x00010000, 0x00018208,
410         0x9830, 0xffffffff, 0x00000000,
411         0x9834, 0xf00fffff, 0x00000400,
412         0x9838, 0x0002021c, 0x00020200,
413         0xc78, 0x00000080, 0x00000000,
414         0xd030, 0x000300c0, 0x00800040,
415         0xd030, 0x000300c0, 0x00800040,
416         0xd830, 0x000300c0, 0x00800040,
417         0xd830, 0x000300c0, 0x00800040,
418         0x5bb0, 0x000000f0, 0x00000070,
419         0x5bc0, 0x00200000, 0x50100000,
420         0x7030, 0x31000311, 0x00000011,
421         0x2ae4, 0x00073ffe, 0x000022a2,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x2ae4, 0x00073ffe, 0x000022a2,
424         0x240c, 0x000007ff, 0x00000000,
425         0x240c, 0x000007ff, 0x00000000,
426         0x240c, 0x000007ff, 0x00000000,
427         0x8a14, 0xf000001f, 0x00000007,
428         0x8a14, 0xf000001f, 0x00000007,
429         0x8a14, 0xf000001f, 0x00000007,
430         0x8b24, 0xffffffff, 0x00ffffff,
431         0x8b10, 0x0000ff0f, 0x00000000,
432         0x28a4c, 0x07ffffff, 0x4e000000,
433         0x28350, 0x3f3f3fff, 0x0000124a,
434         0x28350, 0x3f3f3fff, 0x0000124a,
435         0x28350, 0x3f3f3fff, 0x0000124a,
436         0x30, 0x000000ff, 0x0040,
437         0x34, 0x00000040, 0x00004040,
438         0x9100, 0x07ffffff, 0x03000000,
439         0x9100, 0x07ffffff, 0x03000000,
440         0x8e88, 0x01ff1f3f, 0x00000000,
441         0x8e88, 0x01ff1f3f, 0x00000000,
442         0x8e88, 0x01ff1f3f, 0x00000000,
443         0x8e84, 0x01ff1f3f, 0x00000000,
444         0x8e84, 0x01ff1f3f, 0x00000000,
445         0x8e84, 0x01ff1f3f, 0x00000000,
446         0x9060, 0x0000007f, 0x00000020,
447         0x9508, 0x00010000, 0x00010000,
448         0xac14, 0x000003ff, 0x00000003,
449         0xac14, 0x000003ff, 0x00000003,
450         0xac14, 0x000003ff, 0x00000003,
451         0xac10, 0xffffffff, 0x00000000,
452         0xac10, 0xffffffff, 0x00000000,
453         0xac10, 0xffffffff, 0x00000000,
454         0xac0c, 0xffffffff, 0x00001032,
455         0xac0c, 0xffffffff, 0x00001032,
456         0xac0c, 0xffffffff, 0x00001032,
457         0x88d4, 0x0000001f, 0x00000010,
458         0x88d4, 0x0000001f, 0x00000010,
459         0x88d4, 0x0000001f, 0x00000010,
460         0x15c0, 0x000c0fc0, 0x000c0400
461 };
462
463 static const u32 oland_golden_rlc_registers[] =
464 {
465         0xc424, 0xffffffff, 0x00601005,
466         0xc47c, 0xffffffff, 0x10104040,
467         0xc488, 0xffffffff, 0x0100000a,
468         0xc314, 0xffffffff, 0x00000800,
469         0xc30c, 0xffffffff, 0x800000f4
470 };
471
472 static const u32 oland_golden_registers[] =
473 {
474         0x9a10, 0x00010000, 0x00018208,
475         0x9830, 0xffffffff, 0x00000000,
476         0x9834, 0xf00fffff, 0x00000400,
477         0x9838, 0x0002021c, 0x00020200,
478         0xc78, 0x00000080, 0x00000000,
479         0xd030, 0x000300c0, 0x00800040,
480         0xd830, 0x000300c0, 0x00800040,
481         0x5bb0, 0x000000f0, 0x00000070,
482         0x5bc0, 0x00200000, 0x50100000,
483         0x7030, 0x31000311, 0x00000011,
484         0x2ae4, 0x00073ffe, 0x000022a2,
485         0x240c, 0x000007ff, 0x00000000,
486         0x8a14, 0xf000001f, 0x00000007,
487         0x8b24, 0xffffffff, 0x00ffffff,
488         0x8b10, 0x0000ff0f, 0x00000000,
489         0x28a4c, 0x07ffffff, 0x4e000000,
490         0x28350, 0x3f3f3fff, 0x00000082,
491         0x30, 0x000000ff, 0x0040,
492         0x34, 0x00000040, 0x00004040,
493         0x9100, 0x07ffffff, 0x03000000,
494         0x9060, 0x0000007f, 0x00000020,
495         0x9508, 0x00010000, 0x00010000,
496         0xac14, 0x000003ff, 0x000000f3,
497         0xac10, 0xffffffff, 0x00000000,
498         0xac0c, 0xffffffff, 0x00003210,
499         0x88d4, 0x0000001f, 0x00000010,
500         0x15c0, 0x000c0fc0, 0x000c0400
501 };
502
503 static const u32 hainan_golden_registers[] =
504 {
505         0x9a10, 0x00010000, 0x00018208,
506         0x9830, 0xffffffff, 0x00000000,
507         0x9834, 0xf00fffff, 0x00000400,
508         0x9838, 0x0002021c, 0x00020200,
509         0xd0c0, 0xff000fff, 0x00000100,
510         0xd030, 0x000300c0, 0x00800040,
511         0xd8c0, 0xff000fff, 0x00000100,
512         0xd830, 0x000300c0, 0x00800040,
513         0x2ae4, 0x00073ffe, 0x000022a2,
514         0x240c, 0x000007ff, 0x00000000,
515         0x8a14, 0xf000001f, 0x00000007,
516         0x8b24, 0xffffffff, 0x00ffffff,
517         0x8b10, 0x0000ff0f, 0x00000000,
518         0x28a4c, 0x07ffffff, 0x4e000000,
519         0x28350, 0x3f3f3fff, 0x00000000,
520         0x30, 0x000000ff, 0x0040,
521         0x34, 0x00000040, 0x00004040,
522         0x9100, 0x03e00000, 0x03600000,
523         0x9060, 0x0000007f, 0x00000020,
524         0x9508, 0x00010000, 0x00010000,
525         0xac14, 0x000003ff, 0x000000f1,
526         0xac10, 0xffffffff, 0x00000000,
527         0xac0c, 0xffffffff, 0x00003210,
528         0x88d4, 0x0000001f, 0x00000010,
529         0x15c0, 0x000c0fc0, 0x000c0400
530 };
531
532 static const u32 hainan_golden_registers2[] =
533 {
534         0x98f8, 0xffffffff, 0x02010001
535 };
536
537 static const u32 tahiti_mgcg_cgcg_init[] =
538 {
539         0xc400, 0xffffffff, 0xfffffffc,
540         0x802c, 0xffffffff, 0xe0000000,
541         0x9a60, 0xffffffff, 0x00000100,
542         0x92a4, 0xffffffff, 0x00000100,
543         0xc164, 0xffffffff, 0x00000100,
544         0x9774, 0xffffffff, 0x00000100,
545         0x8984, 0xffffffff, 0x06000100,
546         0x8a18, 0xffffffff, 0x00000100,
547         0x92a0, 0xffffffff, 0x00000100,
548         0xc380, 0xffffffff, 0x00000100,
549         0x8b28, 0xffffffff, 0x00000100,
550         0x9144, 0xffffffff, 0x00000100,
551         0x8d88, 0xffffffff, 0x00000100,
552         0x8d8c, 0xffffffff, 0x00000100,
553         0x9030, 0xffffffff, 0x00000100,
554         0x9034, 0xffffffff, 0x00000100,
555         0x9038, 0xffffffff, 0x00000100,
556         0x903c, 0xffffffff, 0x00000100,
557         0xad80, 0xffffffff, 0x00000100,
558         0xac54, 0xffffffff, 0x00000100,
559         0x897c, 0xffffffff, 0x06000100,
560         0x9868, 0xffffffff, 0x00000100,
561         0x9510, 0xffffffff, 0x00000100,
562         0xaf04, 0xffffffff, 0x00000100,
563         0xae04, 0xffffffff, 0x00000100,
564         0x949c, 0xffffffff, 0x00000100,
565         0x802c, 0xffffffff, 0xe0000000,
566         0x9160, 0xffffffff, 0x00010000,
567         0x9164, 0xffffffff, 0x00030002,
568         0x9168, 0xffffffff, 0x00040007,
569         0x916c, 0xffffffff, 0x00060005,
570         0x9170, 0xffffffff, 0x00090008,
571         0x9174, 0xffffffff, 0x00020001,
572         0x9178, 0xffffffff, 0x00040003,
573         0x917c, 0xffffffff, 0x00000007,
574         0x9180, 0xffffffff, 0x00060005,
575         0x9184, 0xffffffff, 0x00090008,
576         0x9188, 0xffffffff, 0x00030002,
577         0x918c, 0xffffffff, 0x00050004,
578         0x9190, 0xffffffff, 0x00000008,
579         0x9194, 0xffffffff, 0x00070006,
580         0x9198, 0xffffffff, 0x000a0009,
581         0x919c, 0xffffffff, 0x00040003,
582         0x91a0, 0xffffffff, 0x00060005,
583         0x91a4, 0xffffffff, 0x00000009,
584         0x91a8, 0xffffffff, 0x00080007,
585         0x91ac, 0xffffffff, 0x000b000a,
586         0x91b0, 0xffffffff, 0x00050004,
587         0x91b4, 0xffffffff, 0x00070006,
588         0x91b8, 0xffffffff, 0x0008000b,
589         0x91bc, 0xffffffff, 0x000a0009,
590         0x91c0, 0xffffffff, 0x000d000c,
591         0x91c4, 0xffffffff, 0x00060005,
592         0x91c8, 0xffffffff, 0x00080007,
593         0x91cc, 0xffffffff, 0x0000000b,
594         0x91d0, 0xffffffff, 0x000a0009,
595         0x91d4, 0xffffffff, 0x000d000c,
596         0x91d8, 0xffffffff, 0x00070006,
597         0x91dc, 0xffffffff, 0x00090008,
598         0x91e0, 0xffffffff, 0x0000000c,
599         0x91e4, 0xffffffff, 0x000b000a,
600         0x91e8, 0xffffffff, 0x000e000d,
601         0x91ec, 0xffffffff, 0x00080007,
602         0x91f0, 0xffffffff, 0x000a0009,
603         0x91f4, 0xffffffff, 0x0000000d,
604         0x91f8, 0xffffffff, 0x000c000b,
605         0x91fc, 0xffffffff, 0x000f000e,
606         0x9200, 0xffffffff, 0x00090008,
607         0x9204, 0xffffffff, 0x000b000a,
608         0x9208, 0xffffffff, 0x000c000f,
609         0x920c, 0xffffffff, 0x000e000d,
610         0x9210, 0xffffffff, 0x00110010,
611         0x9214, 0xffffffff, 0x000a0009,
612         0x9218, 0xffffffff, 0x000c000b,
613         0x921c, 0xffffffff, 0x0000000f,
614         0x9220, 0xffffffff, 0x000e000d,
615         0x9224, 0xffffffff, 0x00110010,
616         0x9228, 0xffffffff, 0x000b000a,
617         0x922c, 0xffffffff, 0x000d000c,
618         0x9230, 0xffffffff, 0x00000010,
619         0x9234, 0xffffffff, 0x000f000e,
620         0x9238, 0xffffffff, 0x00120011,
621         0x923c, 0xffffffff, 0x000c000b,
622         0x9240, 0xffffffff, 0x000e000d,
623         0x9244, 0xffffffff, 0x00000011,
624         0x9248, 0xffffffff, 0x0010000f,
625         0x924c, 0xffffffff, 0x00130012,
626         0x9250, 0xffffffff, 0x000d000c,
627         0x9254, 0xffffffff, 0x000f000e,
628         0x9258, 0xffffffff, 0x00100013,
629         0x925c, 0xffffffff, 0x00120011,
630         0x9260, 0xffffffff, 0x00150014,
631         0x9264, 0xffffffff, 0x000e000d,
632         0x9268, 0xffffffff, 0x0010000f,
633         0x926c, 0xffffffff, 0x00000013,
634         0x9270, 0xffffffff, 0x00120011,
635         0x9274, 0xffffffff, 0x00150014,
636         0x9278, 0xffffffff, 0x000f000e,
637         0x927c, 0xffffffff, 0x00110010,
638         0x9280, 0xffffffff, 0x00000014,
639         0x9284, 0xffffffff, 0x00130012,
640         0x9288, 0xffffffff, 0x00160015,
641         0x928c, 0xffffffff, 0x0010000f,
642         0x9290, 0xffffffff, 0x00120011,
643         0x9294, 0xffffffff, 0x00000015,
644         0x9298, 0xffffffff, 0x00140013,
645         0x929c, 0xffffffff, 0x00170016,
646         0x9150, 0xffffffff, 0x96940200,
647         0x8708, 0xffffffff, 0x00900100,
648         0xc478, 0xffffffff, 0x00000080,
649         0xc404, 0xffffffff, 0x0020003f,
650         0x30, 0xffffffff, 0x0000001c,
651         0x34, 0x000f0000, 0x000f0000,
652         0x160c, 0xffffffff, 0x00000100,
653         0x1024, 0xffffffff, 0x00000100,
654         0x102c, 0x00000101, 0x00000000,
655         0x20a8, 0xffffffff, 0x00000104,
656         0x264c, 0x000c0000, 0x000c0000,
657         0x2648, 0x000c0000, 0x000c0000,
658         0x55e4, 0xff000fff, 0x00000100,
659         0x55e8, 0x00000001, 0x00000001,
660         0x2f50, 0x00000001, 0x00000001,
661         0x30cc, 0xc0000fff, 0x00000104,
662         0xc1e4, 0x00000001, 0x00000001,
663         0xd0c0, 0xfffffff0, 0x00000100,
664         0xd8c0, 0xfffffff0, 0x00000100
665 };
666
667 static const u32 pitcairn_mgcg_cgcg_init[] =
668 {
669         0xc400, 0xffffffff, 0xfffffffc,
670         0x802c, 0xffffffff, 0xe0000000,
671         0x9a60, 0xffffffff, 0x00000100,
672         0x92a4, 0xffffffff, 0x00000100,
673         0xc164, 0xffffffff, 0x00000100,
674         0x9774, 0xffffffff, 0x00000100,
675         0x8984, 0xffffffff, 0x06000100,
676         0x8a18, 0xffffffff, 0x00000100,
677         0x92a0, 0xffffffff, 0x00000100,
678         0xc380, 0xffffffff, 0x00000100,
679         0x8b28, 0xffffffff, 0x00000100,
680         0x9144, 0xffffffff, 0x00000100,
681         0x8d88, 0xffffffff, 0x00000100,
682         0x8d8c, 0xffffffff, 0x00000100,
683         0x9030, 0xffffffff, 0x00000100,
684         0x9034, 0xffffffff, 0x00000100,
685         0x9038, 0xffffffff, 0x00000100,
686         0x903c, 0xffffffff, 0x00000100,
687         0xad80, 0xffffffff, 0x00000100,
688         0xac54, 0xffffffff, 0x00000100,
689         0x897c, 0xffffffff, 0x06000100,
690         0x9868, 0xffffffff, 0x00000100,
691         0x9510, 0xffffffff, 0x00000100,
692         0xaf04, 0xffffffff, 0x00000100,
693         0xae04, 0xffffffff, 0x00000100,
694         0x949c, 0xffffffff, 0x00000100,
695         0x802c, 0xffffffff, 0xe0000000,
696         0x9160, 0xffffffff, 0x00010000,
697         0x9164, 0xffffffff, 0x00030002,
698         0x9168, 0xffffffff, 0x00040007,
699         0x916c, 0xffffffff, 0x00060005,
700         0x9170, 0xffffffff, 0x00090008,
701         0x9174, 0xffffffff, 0x00020001,
702         0x9178, 0xffffffff, 0x00040003,
703         0x917c, 0xffffffff, 0x00000007,
704         0x9180, 0xffffffff, 0x00060005,
705         0x9184, 0xffffffff, 0x00090008,
706         0x9188, 0xffffffff, 0x00030002,
707         0x918c, 0xffffffff, 0x00050004,
708         0x9190, 0xffffffff, 0x00000008,
709         0x9194, 0xffffffff, 0x00070006,
710         0x9198, 0xffffffff, 0x000a0009,
711         0x919c, 0xffffffff, 0x00040003,
712         0x91a0, 0xffffffff, 0x00060005,
713         0x91a4, 0xffffffff, 0x00000009,
714         0x91a8, 0xffffffff, 0x00080007,
715         0x91ac, 0xffffffff, 0x000b000a,
716         0x91b0, 0xffffffff, 0x00050004,
717         0x91b4, 0xffffffff, 0x00070006,
718         0x91b8, 0xffffffff, 0x0008000b,
719         0x91bc, 0xffffffff, 0x000a0009,
720         0x91c0, 0xffffffff, 0x000d000c,
721         0x9200, 0xffffffff, 0x00090008,
722         0x9204, 0xffffffff, 0x000b000a,
723         0x9208, 0xffffffff, 0x000c000f,
724         0x920c, 0xffffffff, 0x000e000d,
725         0x9210, 0xffffffff, 0x00110010,
726         0x9214, 0xffffffff, 0x000a0009,
727         0x9218, 0xffffffff, 0x000c000b,
728         0x921c, 0xffffffff, 0x0000000f,
729         0x9220, 0xffffffff, 0x000e000d,
730         0x9224, 0xffffffff, 0x00110010,
731         0x9228, 0xffffffff, 0x000b000a,
732         0x922c, 0xffffffff, 0x000d000c,
733         0x9230, 0xffffffff, 0x00000010,
734         0x9234, 0xffffffff, 0x000f000e,
735         0x9238, 0xffffffff, 0x00120011,
736         0x923c, 0xffffffff, 0x000c000b,
737         0x9240, 0xffffffff, 0x000e000d,
738         0x9244, 0xffffffff, 0x00000011,
739         0x9248, 0xffffffff, 0x0010000f,
740         0x924c, 0xffffffff, 0x00130012,
741         0x9250, 0xffffffff, 0x000d000c,
742         0x9254, 0xffffffff, 0x000f000e,
743         0x9258, 0xffffffff, 0x00100013,
744         0x925c, 0xffffffff, 0x00120011,
745         0x9260, 0xffffffff, 0x00150014,
746         0x9150, 0xffffffff, 0x96940200,
747         0x8708, 0xffffffff, 0x00900100,
748         0xc478, 0xffffffff, 0x00000080,
749         0xc404, 0xffffffff, 0x0020003f,
750         0x30, 0xffffffff, 0x0000001c,
751         0x34, 0x000f0000, 0x000f0000,
752         0x160c, 0xffffffff, 0x00000100,
753         0x1024, 0xffffffff, 0x00000100,
754         0x102c, 0x00000101, 0x00000000,
755         0x20a8, 0xffffffff, 0x00000104,
756         0x55e4, 0xff000fff, 0x00000100,
757         0x55e8, 0x00000001, 0x00000001,
758         0x2f50, 0x00000001, 0x00000001,
759         0x30cc, 0xc0000fff, 0x00000104,
760         0xc1e4, 0x00000001, 0x00000001,
761         0xd0c0, 0xfffffff0, 0x00000100,
762         0xd8c0, 0xfffffff0, 0x00000100
763 };
764
765 static const u32 verde_mgcg_cgcg_init[] =
766 {
767         0xc400, 0xffffffff, 0xfffffffc,
768         0x802c, 0xffffffff, 0xe0000000,
769         0x9a60, 0xffffffff, 0x00000100,
770         0x92a4, 0xffffffff, 0x00000100,
771         0xc164, 0xffffffff, 0x00000100,
772         0x9774, 0xffffffff, 0x00000100,
773         0x8984, 0xffffffff, 0x06000100,
774         0x8a18, 0xffffffff, 0x00000100,
775         0x92a0, 0xffffffff, 0x00000100,
776         0xc380, 0xffffffff, 0x00000100,
777         0x8b28, 0xffffffff, 0x00000100,
778         0x9144, 0xffffffff, 0x00000100,
779         0x8d88, 0xffffffff, 0x00000100,
780         0x8d8c, 0xffffffff, 0x00000100,
781         0x9030, 0xffffffff, 0x00000100,
782         0x9034, 0xffffffff, 0x00000100,
783         0x9038, 0xffffffff, 0x00000100,
784         0x903c, 0xffffffff, 0x00000100,
785         0xad80, 0xffffffff, 0x00000100,
786         0xac54, 0xffffffff, 0x00000100,
787         0x897c, 0xffffffff, 0x06000100,
788         0x9868, 0xffffffff, 0x00000100,
789         0x9510, 0xffffffff, 0x00000100,
790         0xaf04, 0xffffffff, 0x00000100,
791         0xae04, 0xffffffff, 0x00000100,
792         0x949c, 0xffffffff, 0x00000100,
793         0x802c, 0xffffffff, 0xe0000000,
794         0x9160, 0xffffffff, 0x00010000,
795         0x9164, 0xffffffff, 0x00030002,
796         0x9168, 0xffffffff, 0x00040007,
797         0x916c, 0xffffffff, 0x00060005,
798         0x9170, 0xffffffff, 0x00090008,
799         0x9174, 0xffffffff, 0x00020001,
800         0x9178, 0xffffffff, 0x00040003,
801         0x917c, 0xffffffff, 0x00000007,
802         0x9180, 0xffffffff, 0x00060005,
803         0x9184, 0xffffffff, 0x00090008,
804         0x9188, 0xffffffff, 0x00030002,
805         0x918c, 0xffffffff, 0x00050004,
806         0x9190, 0xffffffff, 0x00000008,
807         0x9194, 0xffffffff, 0x00070006,
808         0x9198, 0xffffffff, 0x000a0009,
809         0x919c, 0xffffffff, 0x00040003,
810         0x91a0, 0xffffffff, 0x00060005,
811         0x91a4, 0xffffffff, 0x00000009,
812         0x91a8, 0xffffffff, 0x00080007,
813         0x91ac, 0xffffffff, 0x000b000a,
814         0x91b0, 0xffffffff, 0x00050004,
815         0x91b4, 0xffffffff, 0x00070006,
816         0x91b8, 0xffffffff, 0x0008000b,
817         0x91bc, 0xffffffff, 0x000a0009,
818         0x91c0, 0xffffffff, 0x000d000c,
819         0x9200, 0xffffffff, 0x00090008,
820         0x9204, 0xffffffff, 0x000b000a,
821         0x9208, 0xffffffff, 0x000c000f,
822         0x920c, 0xffffffff, 0x000e000d,
823         0x9210, 0xffffffff, 0x00110010,
824         0x9214, 0xffffffff, 0x000a0009,
825         0x9218, 0xffffffff, 0x000c000b,
826         0x921c, 0xffffffff, 0x0000000f,
827         0x9220, 0xffffffff, 0x000e000d,
828         0x9224, 0xffffffff, 0x00110010,
829         0x9228, 0xffffffff, 0x000b000a,
830         0x922c, 0xffffffff, 0x000d000c,
831         0x9230, 0xffffffff, 0x00000010,
832         0x9234, 0xffffffff, 0x000f000e,
833         0x9238, 0xffffffff, 0x00120011,
834         0x923c, 0xffffffff, 0x000c000b,
835         0x9240, 0xffffffff, 0x000e000d,
836         0x9244, 0xffffffff, 0x00000011,
837         0x9248, 0xffffffff, 0x0010000f,
838         0x924c, 0xffffffff, 0x00130012,
839         0x9250, 0xffffffff, 0x000d000c,
840         0x9254, 0xffffffff, 0x000f000e,
841         0x9258, 0xffffffff, 0x00100013,
842         0x925c, 0xffffffff, 0x00120011,
843         0x9260, 0xffffffff, 0x00150014,
844         0x9150, 0xffffffff, 0x96940200,
845         0x8708, 0xffffffff, 0x00900100,
846         0xc478, 0xffffffff, 0x00000080,
847         0xc404, 0xffffffff, 0x0020003f,
848         0x30, 0xffffffff, 0x0000001c,
849         0x34, 0x000f0000, 0x000f0000,
850         0x160c, 0xffffffff, 0x00000100,
851         0x1024, 0xffffffff, 0x00000100,
852         0x102c, 0x00000101, 0x00000000,
853         0x20a8, 0xffffffff, 0x00000104,
854         0x264c, 0x000c0000, 0x000c0000,
855         0x2648, 0x000c0000, 0x000c0000,
856         0x55e4, 0xff000fff, 0x00000100,
857         0x55e8, 0x00000001, 0x00000001,
858         0x2f50, 0x00000001, 0x00000001,
859         0x30cc, 0xc0000fff, 0x00000104,
860         0xc1e4, 0x00000001, 0x00000001,
861         0xd0c0, 0xfffffff0, 0x00000100,
862         0xd8c0, 0xfffffff0, 0x00000100
863 };
864
865 static const u32 oland_mgcg_cgcg_init[] =
866 {
867         0xc400, 0xffffffff, 0xfffffffc,
868         0x802c, 0xffffffff, 0xe0000000,
869         0x9a60, 0xffffffff, 0x00000100,
870         0x92a4, 0xffffffff, 0x00000100,
871         0xc164, 0xffffffff, 0x00000100,
872         0x9774, 0xffffffff, 0x00000100,
873         0x8984, 0xffffffff, 0x06000100,
874         0x8a18, 0xffffffff, 0x00000100,
875         0x92a0, 0xffffffff, 0x00000100,
876         0xc380, 0xffffffff, 0x00000100,
877         0x8b28, 0xffffffff, 0x00000100,
878         0x9144, 0xffffffff, 0x00000100,
879         0x8d88, 0xffffffff, 0x00000100,
880         0x8d8c, 0xffffffff, 0x00000100,
881         0x9030, 0xffffffff, 0x00000100,
882         0x9034, 0xffffffff, 0x00000100,
883         0x9038, 0xffffffff, 0x00000100,
884         0x903c, 0xffffffff, 0x00000100,
885         0xad80, 0xffffffff, 0x00000100,
886         0xac54, 0xffffffff, 0x00000100,
887         0x897c, 0xffffffff, 0x06000100,
888         0x9868, 0xffffffff, 0x00000100,
889         0x9510, 0xffffffff, 0x00000100,
890         0xaf04, 0xffffffff, 0x00000100,
891         0xae04, 0xffffffff, 0x00000100,
892         0x949c, 0xffffffff, 0x00000100,
893         0x802c, 0xffffffff, 0xe0000000,
894         0x9160, 0xffffffff, 0x00010000,
895         0x9164, 0xffffffff, 0x00030002,
896         0x9168, 0xffffffff, 0x00040007,
897         0x916c, 0xffffffff, 0x00060005,
898         0x9170, 0xffffffff, 0x00090008,
899         0x9174, 0xffffffff, 0x00020001,
900         0x9178, 0xffffffff, 0x00040003,
901         0x917c, 0xffffffff, 0x00000007,
902         0x9180, 0xffffffff, 0x00060005,
903         0x9184, 0xffffffff, 0x00090008,
904         0x9188, 0xffffffff, 0x00030002,
905         0x918c, 0xffffffff, 0x00050004,
906         0x9190, 0xffffffff, 0x00000008,
907         0x9194, 0xffffffff, 0x00070006,
908         0x9198, 0xffffffff, 0x000a0009,
909         0x919c, 0xffffffff, 0x00040003,
910         0x91a0, 0xffffffff, 0x00060005,
911         0x91a4, 0xffffffff, 0x00000009,
912         0x91a8, 0xffffffff, 0x00080007,
913         0x91ac, 0xffffffff, 0x000b000a,
914         0x91b0, 0xffffffff, 0x00050004,
915         0x91b4, 0xffffffff, 0x00070006,
916         0x91b8, 0xffffffff, 0x0008000b,
917         0x91bc, 0xffffffff, 0x000a0009,
918         0x91c0, 0xffffffff, 0x000d000c,
919         0x91c4, 0xffffffff, 0x00060005,
920         0x91c8, 0xffffffff, 0x00080007,
921         0x91cc, 0xffffffff, 0x0000000b,
922         0x91d0, 0xffffffff, 0x000a0009,
923         0x91d4, 0xffffffff, 0x000d000c,
924         0x9150, 0xffffffff, 0x96940200,
925         0x8708, 0xffffffff, 0x00900100,
926         0xc478, 0xffffffff, 0x00000080,
927         0xc404, 0xffffffff, 0x0020003f,
928         0x30, 0xffffffff, 0x0000001c,
929         0x34, 0x000f0000, 0x000f0000,
930         0x160c, 0xffffffff, 0x00000100,
931         0x1024, 0xffffffff, 0x00000100,
932         0x102c, 0x00000101, 0x00000000,
933         0x20a8, 0xffffffff, 0x00000104,
934         0x264c, 0x000c0000, 0x000c0000,
935         0x2648, 0x000c0000, 0x000c0000,
936         0x55e4, 0xff000fff, 0x00000100,
937         0x55e8, 0x00000001, 0x00000001,
938         0x2f50, 0x00000001, 0x00000001,
939         0x30cc, 0xc0000fff, 0x00000104,
940         0xc1e4, 0x00000001, 0x00000001,
941         0xd0c0, 0xfffffff0, 0x00000100,
942         0xd8c0, 0xfffffff0, 0x00000100
943 };
944
945 static const u32 hainan_mgcg_cgcg_init[] =
946 {
947         0xc400, 0xffffffff, 0xfffffffc,
948         0x802c, 0xffffffff, 0xe0000000,
949         0x9a60, 0xffffffff, 0x00000100,
950         0x92a4, 0xffffffff, 0x00000100,
951         0xc164, 0xffffffff, 0x00000100,
952         0x9774, 0xffffffff, 0x00000100,
953         0x8984, 0xffffffff, 0x06000100,
954         0x8a18, 0xffffffff, 0x00000100,
955         0x92a0, 0xffffffff, 0x00000100,
956         0xc380, 0xffffffff, 0x00000100,
957         0x8b28, 0xffffffff, 0x00000100,
958         0x9144, 0xffffffff, 0x00000100,
959         0x8d88, 0xffffffff, 0x00000100,
960         0x8d8c, 0xffffffff, 0x00000100,
961         0x9030, 0xffffffff, 0x00000100,
962         0x9034, 0xffffffff, 0x00000100,
963         0x9038, 0xffffffff, 0x00000100,
964         0x903c, 0xffffffff, 0x00000100,
965         0xad80, 0xffffffff, 0x00000100,
966         0xac54, 0xffffffff, 0x00000100,
967         0x897c, 0xffffffff, 0x06000100,
968         0x9868, 0xffffffff, 0x00000100,
969         0x9510, 0xffffffff, 0x00000100,
970         0xaf04, 0xffffffff, 0x00000100,
971         0xae04, 0xffffffff, 0x00000100,
972         0x949c, 0xffffffff, 0x00000100,
973         0x802c, 0xffffffff, 0xe0000000,
974         0x9160, 0xffffffff, 0x00010000,
975         0x9164, 0xffffffff, 0x00030002,
976         0x9168, 0xffffffff, 0x00040007,
977         0x916c, 0xffffffff, 0x00060005,
978         0x9170, 0xffffffff, 0x00090008,
979         0x9174, 0xffffffff, 0x00020001,
980         0x9178, 0xffffffff, 0x00040003,
981         0x917c, 0xffffffff, 0x00000007,
982         0x9180, 0xffffffff, 0x00060005,
983         0x9184, 0xffffffff, 0x00090008,
984         0x9188, 0xffffffff, 0x00030002,
985         0x918c, 0xffffffff, 0x00050004,
986         0x9190, 0xffffffff, 0x00000008,
987         0x9194, 0xffffffff, 0x00070006,
988         0x9198, 0xffffffff, 0x000a0009,
989         0x919c, 0xffffffff, 0x00040003,
990         0x91a0, 0xffffffff, 0x00060005,
991         0x91a4, 0xffffffff, 0x00000009,
992         0x91a8, 0xffffffff, 0x00080007,
993         0x91ac, 0xffffffff, 0x000b000a,
994         0x91b0, 0xffffffff, 0x00050004,
995         0x91b4, 0xffffffff, 0x00070006,
996         0x91b8, 0xffffffff, 0x0008000b,
997         0x91bc, 0xffffffff, 0x000a0009,
998         0x91c0, 0xffffffff, 0x000d000c,
999         0x91c4, 0xffffffff, 0x00060005,
1000         0x91c8, 0xffffffff, 0x00080007,
1001         0x91cc, 0xffffffff, 0x0000000b,
1002         0x91d0, 0xffffffff, 0x000a0009,
1003         0x91d4, 0xffffffff, 0x000d000c,
1004         0x9150, 0xffffffff, 0x96940200,
1005         0x8708, 0xffffffff, 0x00900100,
1006         0xc478, 0xffffffff, 0x00000080,
1007         0xc404, 0xffffffff, 0x0020003f,
1008         0x30, 0xffffffff, 0x0000001c,
1009         0x34, 0x000f0000, 0x000f0000,
1010         0x160c, 0xffffffff, 0x00000100,
1011         0x1024, 0xffffffff, 0x00000100,
1012         0x20a8, 0xffffffff, 0x00000104,
1013         0x264c, 0x000c0000, 0x000c0000,
1014         0x2648, 0x000c0000, 0x000c0000,
1015         0x2f50, 0x00000001, 0x00000001,
1016         0x30cc, 0xc0000fff, 0x00000104,
1017         0xc1e4, 0x00000001, 0x00000001,
1018         0xd0c0, 0xfffffff0, 0x00000100,
1019         0xd8c0, 0xfffffff0, 0x00000100
1020 };
1021
1022 static u32 verde_pg_init[] =
1023 {
1024         0x353c, 0xffffffff, 0x40000,
1025         0x3538, 0xffffffff, 0x200010ff,
1026         0x353c, 0xffffffff, 0x0,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x7007,
1032         0x3538, 0xffffffff, 0x300010ff,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x400000,
1039         0x3538, 0xffffffff, 0x100010ff,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x120200,
1046         0x3538, 0xffffffff, 0x500010ff,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x1e1e16,
1053         0x3538, 0xffffffff, 0x600010ff,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x171f1e,
1060         0x3538, 0xffffffff, 0x700010ff,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x3538, 0xffffffff, 0x9ff,
1068         0x3500, 0xffffffff, 0x0,
1069         0x3504, 0xffffffff, 0x10000800,
1070         0x3504, 0xffffffff, 0xf,
1071         0x3504, 0xffffffff, 0xf,
1072         0x3500, 0xffffffff, 0x4,
1073         0x3504, 0xffffffff, 0x1000051e,
1074         0x3504, 0xffffffff, 0xffff,
1075         0x3504, 0xffffffff, 0xffff,
1076         0x3500, 0xffffffff, 0x8,
1077         0x3504, 0xffffffff, 0x80500,
1078         0x3500, 0xffffffff, 0x12,
1079         0x3504, 0xffffffff, 0x9050c,
1080         0x3500, 0xffffffff, 0x1d,
1081         0x3504, 0xffffffff, 0xb052c,
1082         0x3500, 0xffffffff, 0x2a,
1083         0x3504, 0xffffffff, 0x1053e,
1084         0x3500, 0xffffffff, 0x2d,
1085         0x3504, 0xffffffff, 0x10546,
1086         0x3500, 0xffffffff, 0x30,
1087         0x3504, 0xffffffff, 0xa054e,
1088         0x3500, 0xffffffff, 0x3c,
1089         0x3504, 0xffffffff, 0x1055f,
1090         0x3500, 0xffffffff, 0x3f,
1091         0x3504, 0xffffffff, 0x10567,
1092         0x3500, 0xffffffff, 0x42,
1093         0x3504, 0xffffffff, 0x1056f,
1094         0x3500, 0xffffffff, 0x45,
1095         0x3504, 0xffffffff, 0x10572,
1096         0x3500, 0xffffffff, 0x48,
1097         0x3504, 0xffffffff, 0x20575,
1098         0x3500, 0xffffffff, 0x4c,
1099         0x3504, 0xffffffff, 0x190801,
1100         0x3500, 0xffffffff, 0x67,
1101         0x3504, 0xffffffff, 0x1082a,
1102         0x3500, 0xffffffff, 0x6a,
1103         0x3504, 0xffffffff, 0x1b082d,
1104         0x3500, 0xffffffff, 0x87,
1105         0x3504, 0xffffffff, 0x310851,
1106         0x3500, 0xffffffff, 0xba,
1107         0x3504, 0xffffffff, 0x891,
1108         0x3500, 0xffffffff, 0xbc,
1109         0x3504, 0xffffffff, 0x893,
1110         0x3500, 0xffffffff, 0xbe,
1111         0x3504, 0xffffffff, 0x20895,
1112         0x3500, 0xffffffff, 0xc2,
1113         0x3504, 0xffffffff, 0x20899,
1114         0x3500, 0xffffffff, 0xc6,
1115         0x3504, 0xffffffff, 0x2089d,
1116         0x3500, 0xffffffff, 0xca,
1117         0x3504, 0xffffffff, 0x8a1,
1118         0x3500, 0xffffffff, 0xcc,
1119         0x3504, 0xffffffff, 0x8a3,
1120         0x3500, 0xffffffff, 0xce,
1121         0x3504, 0xffffffff, 0x308a5,
1122         0x3500, 0xffffffff, 0xd3,
1123         0x3504, 0xffffffff, 0x6d08cd,
1124         0x3500, 0xffffffff, 0x142,
1125         0x3504, 0xffffffff, 0x2000095a,
1126         0x3504, 0xffffffff, 0x1,
1127         0x3500, 0xffffffff, 0x144,
1128         0x3504, 0xffffffff, 0x301f095b,
1129         0x3500, 0xffffffff, 0x165,
1130         0x3504, 0xffffffff, 0xc094d,
1131         0x3500, 0xffffffff, 0x173,
1132         0x3504, 0xffffffff, 0xf096d,
1133         0x3500, 0xffffffff, 0x184,
1134         0x3504, 0xffffffff, 0x15097f,
1135         0x3500, 0xffffffff, 0x19b,
1136         0x3504, 0xffffffff, 0xc0998,
1137         0x3500, 0xffffffff, 0x1a9,
1138         0x3504, 0xffffffff, 0x409a7,
1139         0x3500, 0xffffffff, 0x1af,
1140         0x3504, 0xffffffff, 0xcdc,
1141         0x3500, 0xffffffff, 0x1b1,
1142         0x3504, 0xffffffff, 0x800,
1143         0x3508, 0xffffffff, 0x6c9b2000,
1144         0x3510, 0xfc00, 0x2000,
1145         0x3544, 0xffffffff, 0xfc0,
1146         0x28d4, 0x00000100, 0x100
1147 };
1148
1149 static void si_init_golden_registers(struct radeon_device *rdev)
1150 {
1151         switch (rdev->family) {
1152         case CHIP_TAHITI:
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_golden_registers,
1155                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_rlc_registers,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1159                 radeon_program_register_sequence(rdev,
1160                                                  tahiti_mgcg_cgcg_init,
1161                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1162                 radeon_program_register_sequence(rdev,
1163                                                  tahiti_golden_registers2,
1164                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1165                 break;
1166         case CHIP_PITCAIRN:
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_golden_registers,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1170                 radeon_program_register_sequence(rdev,
1171                                                  pitcairn_golden_rlc_registers,
1172                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1173                 radeon_program_register_sequence(rdev,
1174                                                  pitcairn_mgcg_cgcg_init,
1175                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1176                 break;
1177         case CHIP_VERDE:
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_golden_registers,
1180                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_golden_rlc_registers,
1183                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1184                 radeon_program_register_sequence(rdev,
1185                                                  verde_mgcg_cgcg_init,
1186                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1187                 radeon_program_register_sequence(rdev,
1188                                                  verde_pg_init,
1189                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1190                 break;
1191         case CHIP_OLAND:
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_golden_registers,
1194                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1195                 radeon_program_register_sequence(rdev,
1196                                                  oland_golden_rlc_registers,
1197                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1198                 radeon_program_register_sequence(rdev,
1199                                                  oland_mgcg_cgcg_init,
1200                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1201                 break;
1202         case CHIP_HAINAN:
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_golden_registers,
1205                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1206                 radeon_program_register_sequence(rdev,
1207                                                  hainan_golden_registers2,
1208                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1209                 radeon_program_register_sequence(rdev,
1210                                                  hainan_mgcg_cgcg_init,
1211                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1212                 break;
1213         default:
1214                 break;
1215         }
1216 }
1217
1218 /**
1219  * si_get_allowed_info_register - fetch the register for the info ioctl
1220  *
1221  * @rdev: radeon_device pointer
1222  * @reg: register offset in bytes
1223  * @val: register value
1224  *
1225  * Returns 0 for success or -EINVAL for an invalid register
1226  *
1227  */
1228 int si_get_allowed_info_register(struct radeon_device *rdev,
1229                                  u32 reg, u32 *val)
1230 {
1231         switch (reg) {
1232         case GRBM_STATUS:
1233         case GRBM_STATUS2:
1234         case GRBM_STATUS_SE0:
1235         case GRBM_STATUS_SE1:
1236         case SRBM_STATUS:
1237         case SRBM_STATUS2:
1238         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1239         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1240         case UVD_STATUS:
1241                 *val = RREG32(reg);
1242                 return 0;
1243         default:
1244                 return -EINVAL;
1245         }
1246 }
1247
1248 #define PCIE_BUS_CLK                10000
1249 #define TCLK                        (PCIE_BUS_CLK / 10)
1250
1251 /**
1252  * si_get_xclk - get the xclk
1253  *
1254  * @rdev: radeon_device pointer
1255  *
1256  * Returns the reference clock used by the gfx engine
1257  * (SI).
1258  */
1259 u32 si_get_xclk(struct radeon_device *rdev)
1260 {
1261         u32 reference_clock = rdev->clock.spll.reference_freq;
1262         u32 tmp;
1263
1264         tmp = RREG32(CG_CLKPIN_CNTL_2);
1265         if (tmp & MUX_TCLK_TO_XCLK)
1266                 return TCLK;
1267
1268         tmp = RREG32(CG_CLKPIN_CNTL);
1269         if (tmp & XTALIN_DIVIDE)
1270                 return reference_clock / 4;
1271
1272         return reference_clock;
1273 }
1274
1275 /* get temperature in millidegrees */
1276 int si_get_temp(struct radeon_device *rdev)
1277 {
1278         u32 temp;
1279         int actual_temp = 0;
1280
1281         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1282                 CTF_TEMP_SHIFT;
1283
1284         if (temp & 0x200)
1285                 actual_temp = 255;
1286         else
1287                 actual_temp = temp & 0x1ff;
1288
1289         actual_temp = (actual_temp * 1000);
1290
1291         return actual_temp;
1292 }
1293
1294 #define TAHITI_IO_MC_REGS_SIZE 36
1295
1296 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1297         {0x0000006f, 0x03044000},
1298         {0x00000070, 0x0480c018},
1299         {0x00000071, 0x00000040},
1300         {0x00000072, 0x01000000},
1301         {0x00000074, 0x000000ff},
1302         {0x00000075, 0x00143400},
1303         {0x00000076, 0x08ec0800},
1304         {0x00000077, 0x040000cc},
1305         {0x00000079, 0x00000000},
1306         {0x0000007a, 0x21000409},
1307         {0x0000007c, 0x00000000},
1308         {0x0000007d, 0xe8000000},
1309         {0x0000007e, 0x044408a8},
1310         {0x0000007f, 0x00000003},
1311         {0x00000080, 0x00000000},
1312         {0x00000081, 0x01000000},
1313         {0x00000082, 0x02000000},
1314         {0x00000083, 0x00000000},
1315         {0x00000084, 0xe3f3e4f4},
1316         {0x00000085, 0x00052024},
1317         {0x00000087, 0x00000000},
1318         {0x00000088, 0x66036603},
1319         {0x00000089, 0x01000000},
1320         {0x0000008b, 0x1c0a0000},
1321         {0x0000008c, 0xff010000},
1322         {0x0000008e, 0xffffefff},
1323         {0x0000008f, 0xfff3efff},
1324         {0x00000090, 0xfff3efbf},
1325         {0x00000094, 0x00101101},
1326         {0x00000095, 0x00000fff},
1327         {0x00000096, 0x00116fff},
1328         {0x00000097, 0x60010000},
1329         {0x00000098, 0x10010000},
1330         {0x00000099, 0x00006000},
1331         {0x0000009a, 0x00001000},
1332         {0x0000009f, 0x00a77400}
1333 };
1334
1335 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1336         {0x0000006f, 0x03044000},
1337         {0x00000070, 0x0480c018},
1338         {0x00000071, 0x00000040},
1339         {0x00000072, 0x01000000},
1340         {0x00000074, 0x000000ff},
1341         {0x00000075, 0x00143400},
1342         {0x00000076, 0x08ec0800},
1343         {0x00000077, 0x040000cc},
1344         {0x00000079, 0x00000000},
1345         {0x0000007a, 0x21000409},
1346         {0x0000007c, 0x00000000},
1347         {0x0000007d, 0xe8000000},
1348         {0x0000007e, 0x044408a8},
1349         {0x0000007f, 0x00000003},
1350         {0x00000080, 0x00000000},
1351         {0x00000081, 0x01000000},
1352         {0x00000082, 0x02000000},
1353         {0x00000083, 0x00000000},
1354         {0x00000084, 0xe3f3e4f4},
1355         {0x00000085, 0x00052024},
1356         {0x00000087, 0x00000000},
1357         {0x00000088, 0x66036603},
1358         {0x00000089, 0x01000000},
1359         {0x0000008b, 0x1c0a0000},
1360         {0x0000008c, 0xff010000},
1361         {0x0000008e, 0xffffefff},
1362         {0x0000008f, 0xfff3efff},
1363         {0x00000090, 0xfff3efbf},
1364         {0x00000094, 0x00101101},
1365         {0x00000095, 0x00000fff},
1366         {0x00000096, 0x00116fff},
1367         {0x00000097, 0x60010000},
1368         {0x00000098, 0x10010000},
1369         {0x00000099, 0x00006000},
1370         {0x0000009a, 0x00001000},
1371         {0x0000009f, 0x00a47400}
1372 };
1373
1374 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1375         {0x0000006f, 0x03044000},
1376         {0x00000070, 0x0480c018},
1377         {0x00000071, 0x00000040},
1378         {0x00000072, 0x01000000},
1379         {0x00000074, 0x000000ff},
1380         {0x00000075, 0x00143400},
1381         {0x00000076, 0x08ec0800},
1382         {0x00000077, 0x040000cc},
1383         {0x00000079, 0x00000000},
1384         {0x0000007a, 0x21000409},
1385         {0x0000007c, 0x00000000},
1386         {0x0000007d, 0xe8000000},
1387         {0x0000007e, 0x044408a8},
1388         {0x0000007f, 0x00000003},
1389         {0x00000080, 0x00000000},
1390         {0x00000081, 0x01000000},
1391         {0x00000082, 0x02000000},
1392         {0x00000083, 0x00000000},
1393         {0x00000084, 0xe3f3e4f4},
1394         {0x00000085, 0x00052024},
1395         {0x00000087, 0x00000000},
1396         {0x00000088, 0x66036603},
1397         {0x00000089, 0x01000000},
1398         {0x0000008b, 0x1c0a0000},
1399         {0x0000008c, 0xff010000},
1400         {0x0000008e, 0xffffefff},
1401         {0x0000008f, 0xfff3efff},
1402         {0x00000090, 0xfff3efbf},
1403         {0x00000094, 0x00101101},
1404         {0x00000095, 0x00000fff},
1405         {0x00000096, 0x00116fff},
1406         {0x00000097, 0x60010000},
1407         {0x00000098, 0x10010000},
1408         {0x00000099, 0x00006000},
1409         {0x0000009a, 0x00001000},
1410         {0x0000009f, 0x00a37400}
1411 };
1412
1413 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1414         {0x0000006f, 0x03044000},
1415         {0x00000070, 0x0480c018},
1416         {0x00000071, 0x00000040},
1417         {0x00000072, 0x01000000},
1418         {0x00000074, 0x000000ff},
1419         {0x00000075, 0x00143400},
1420         {0x00000076, 0x08ec0800},
1421         {0x00000077, 0x040000cc},
1422         {0x00000079, 0x00000000},
1423         {0x0000007a, 0x21000409},
1424         {0x0000007c, 0x00000000},
1425         {0x0000007d, 0xe8000000},
1426         {0x0000007e, 0x044408a8},
1427         {0x0000007f, 0x00000003},
1428         {0x00000080, 0x00000000},
1429         {0x00000081, 0x01000000},
1430         {0x00000082, 0x02000000},
1431         {0x00000083, 0x00000000},
1432         {0x00000084, 0xe3f3e4f4},
1433         {0x00000085, 0x00052024},
1434         {0x00000087, 0x00000000},
1435         {0x00000088, 0x66036603},
1436         {0x00000089, 0x01000000},
1437         {0x0000008b, 0x1c0a0000},
1438         {0x0000008c, 0xff010000},
1439         {0x0000008e, 0xffffefff},
1440         {0x0000008f, 0xfff3efff},
1441         {0x00000090, 0xfff3efbf},
1442         {0x00000094, 0x00101101},
1443         {0x00000095, 0x00000fff},
1444         {0x00000096, 0x00116fff},
1445         {0x00000097, 0x60010000},
1446         {0x00000098, 0x10010000},
1447         {0x00000099, 0x00006000},
1448         {0x0000009a, 0x00001000},
1449         {0x0000009f, 0x00a17730}
1450 };
1451
1452 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1453         {0x0000006f, 0x03044000},
1454         {0x00000070, 0x0480c018},
1455         {0x00000071, 0x00000040},
1456         {0x00000072, 0x01000000},
1457         {0x00000074, 0x000000ff},
1458         {0x00000075, 0x00143400},
1459         {0x00000076, 0x08ec0800},
1460         {0x00000077, 0x040000cc},
1461         {0x00000079, 0x00000000},
1462         {0x0000007a, 0x21000409},
1463         {0x0000007c, 0x00000000},
1464         {0x0000007d, 0xe8000000},
1465         {0x0000007e, 0x044408a8},
1466         {0x0000007f, 0x00000003},
1467         {0x00000080, 0x00000000},
1468         {0x00000081, 0x01000000},
1469         {0x00000082, 0x02000000},
1470         {0x00000083, 0x00000000},
1471         {0x00000084, 0xe3f3e4f4},
1472         {0x00000085, 0x00052024},
1473         {0x00000087, 0x00000000},
1474         {0x00000088, 0x66036603},
1475         {0x00000089, 0x01000000},
1476         {0x0000008b, 0x1c0a0000},
1477         {0x0000008c, 0xff010000},
1478         {0x0000008e, 0xffffefff},
1479         {0x0000008f, 0xfff3efff},
1480         {0x00000090, 0xfff3efbf},
1481         {0x00000094, 0x00101101},
1482         {0x00000095, 0x00000fff},
1483         {0x00000096, 0x00116fff},
1484         {0x00000097, 0x60010000},
1485         {0x00000098, 0x10010000},
1486         {0x00000099, 0x00006000},
1487         {0x0000009a, 0x00001000},
1488         {0x0000009f, 0x00a07730}
1489 };
1490
1491 /* ucode loading */
1492 int si_mc_load_microcode(struct radeon_device *rdev)
1493 {
1494         const __be32 *fw_data = NULL;
1495         const __le32 *new_fw_data = NULL;
1496         u32 running;
1497         u32 *io_mc_regs = NULL;
1498         const __le32 *new_io_mc_regs = NULL;
1499         int i, regs_size, ucode_size;
1500
1501         if (!rdev->mc_fw)
1502                 return -EINVAL;
1503
1504         if (rdev->new_fw) {
1505                 const struct mc_firmware_header_v1_0 *hdr =
1506                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1507
1508                 radeon_ucode_print_mc_hdr(&hdr->header);
1509                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1510                 new_io_mc_regs = (const __le32 *)
1511                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1512                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1513                 new_fw_data = (const __le32 *)
1514                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1515         } else {
1516                 ucode_size = rdev->mc_fw->size / 4;
1517
1518                 switch (rdev->family) {
1519                 case CHIP_TAHITI:
1520                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1521                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1522                         break;
1523                 case CHIP_PITCAIRN:
1524                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1525                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1526                         break;
1527                 case CHIP_VERDE:
1528                 default:
1529                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1530                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1531                         break;
1532                 case CHIP_OLAND:
1533                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1534                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1535                         break;
1536                 case CHIP_HAINAN:
1537                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1538                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1539                         break;
1540                 }
1541                 fw_data = (const __be32 *)rdev->mc_fw->data;
1542         }
1543
1544         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1545
1546         if (running == 0) {
1547                 /* reset the engine and set to writable */
1548                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1549                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1550
1551                 /* load mc io regs */
1552                 for (i = 0; i < regs_size; i++) {
1553                         if (rdev->new_fw) {
1554                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1555                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1556                         } else {
1557                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1558                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1559                         }
1560                 }
1561                 /* load the MC ucode */
1562                 for (i = 0; i < ucode_size; i++) {
1563                         if (rdev->new_fw)
1564                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1565                         else
1566                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1567                 }
1568
1569                 /* put the engine back into the active state */
1570                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1571                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1572                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1573
1574                 /* wait for training to complete */
1575                 for (i = 0; i < rdev->usec_timeout; i++) {
1576                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1577                                 break;
1578                         udelay(1);
1579                 }
1580                 for (i = 0; i < rdev->usec_timeout; i++) {
1581                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1582                                 break;
1583                         udelay(1);
1584                 }
1585         }
1586
1587         return 0;
1588 }
1589
1590 static int si_init_microcode(struct radeon_device *rdev)
1591 {
1592         const char *chip_name;
1593         const char *new_chip_name;
1594         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1595         size_t smc_req_size, mc2_req_size;
1596         char fw_name[30];
1597         int err;
1598         int new_fw = 0;
1599         bool new_smc = false;
1600         bool si58_fw = false;
1601         bool banks2_fw = false;
1602
1603         DRM_DEBUG("\n");
1604
1605         switch (rdev->family) {
1606         case CHIP_TAHITI:
1607                 chip_name = "TAHITI";
1608                 new_chip_name = "tahiti";
1609                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1610                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1611                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1612                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1613                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1614                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1615                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1616                 break;
1617         case CHIP_PITCAIRN:
1618                 chip_name = "PITCAIRN";
1619                 if ((rdev->pdev->revision == 0x81) &&
1620                     ((rdev->pdev->device == 0x6810) ||
1621                      (rdev->pdev->device == 0x6811)))
1622                         new_smc = true;
1623                 new_chip_name = "pitcairn";
1624                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1625                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1626                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1627                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1628                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1629                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1630                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1631                 break;
1632         case CHIP_VERDE:
1633                 chip_name = "VERDE";
1634                 if (((rdev->pdev->device == 0x6820) &&
1635                      ((rdev->pdev->revision == 0x81) ||
1636                       (rdev->pdev->revision == 0x83))) ||
1637                     ((rdev->pdev->device == 0x6821) &&
1638                      ((rdev->pdev->revision == 0x83) ||
1639                       (rdev->pdev->revision == 0x87))) ||
1640                     ((rdev->pdev->revision == 0x87) &&
1641                      ((rdev->pdev->device == 0x6823) ||
1642                       (rdev->pdev->device == 0x682b))))
1643                         new_smc = true;
1644                 new_chip_name = "verde";
1645                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1646                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1647                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1648                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1649                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1650                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1651                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1652                 break;
1653         case CHIP_OLAND:
1654                 chip_name = "OLAND";
1655                 if (((rdev->pdev->revision == 0x81) &&
1656                      ((rdev->pdev->device == 0x6600) ||
1657                       (rdev->pdev->device == 0x6604) ||
1658                       (rdev->pdev->device == 0x6605) ||
1659                       (rdev->pdev->device == 0x6610))) ||
1660                     ((rdev->pdev->revision == 0x83) &&
1661                      (rdev->pdev->device == 0x6610)))
1662                         new_smc = true;
1663                 new_chip_name = "oland";
1664                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1666                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1667                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1669                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1670                 break;
1671         case CHIP_HAINAN:
1672                 chip_name = "HAINAN";
1673                 if (((rdev->pdev->revision == 0x81) &&
1674                      (rdev->pdev->device == 0x6660)) ||
1675                     ((rdev->pdev->revision == 0x83) &&
1676                      ((rdev->pdev->device == 0x6660) ||
1677                       (rdev->pdev->device == 0x6663) ||
1678                       (rdev->pdev->device == 0x6665) ||
1679                       (rdev->pdev->device == 0x6667))))
1680                         new_smc = true;
1681                 else if ((rdev->pdev->revision == 0xc3) &&
1682                          (rdev->pdev->device == 0x6665))
1683                         banks2_fw = true;
1684                 new_chip_name = "hainan";
1685                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1686                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1687                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1688                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1689                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1690                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1691                 break;
1692         default: BUG();
1693         }
1694
1695         /* this memory configuration requires special firmware */
1696         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1697                 si58_fw = true;
1698
1699         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1700
1701         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1702         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1703         if (err) {
1704                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1705                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1706                 if (err)
1707                         goto out;
1708                 if (rdev->pfp_fw->size != pfp_req_size) {
1709                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1710                                rdev->pfp_fw->size, fw_name);
1711                         err = -EINVAL;
1712                         goto out;
1713                 }
1714         } else {
1715                 err = radeon_ucode_validate(rdev->pfp_fw);
1716                 if (err) {
1717                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1718                                fw_name);
1719                         goto out;
1720                 } else {
1721                         new_fw++;
1722                 }
1723         }
1724
1725         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1726         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1727         if (err) {
1728                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1729                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1730                 if (err)
1731                         goto out;
1732                 if (rdev->me_fw->size != me_req_size) {
1733                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1734                                rdev->me_fw->size, fw_name);
1735                         err = -EINVAL;
1736                 }
1737         } else {
1738                 err = radeon_ucode_validate(rdev->me_fw);
1739                 if (err) {
1740                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1741                                fw_name);
1742                         goto out;
1743                 } else {
1744                         new_fw++;
1745                 }
1746         }
1747
1748         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1749         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1750         if (err) {
1751                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1752                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1753                 if (err)
1754                         goto out;
1755                 if (rdev->ce_fw->size != ce_req_size) {
1756                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1757                                rdev->ce_fw->size, fw_name);
1758                         err = -EINVAL;
1759                 }
1760         } else {
1761                 err = radeon_ucode_validate(rdev->ce_fw);
1762                 if (err) {
1763                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1764                                fw_name);
1765                         goto out;
1766                 } else {
1767                         new_fw++;
1768                 }
1769         }
1770
1771         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1772         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1773         if (err) {
1774                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1775                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1776                 if (err)
1777                         goto out;
1778                 if (rdev->rlc_fw->size != rlc_req_size) {
1779                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1780                                rdev->rlc_fw->size, fw_name);
1781                         err = -EINVAL;
1782                 }
1783         } else {
1784                 err = radeon_ucode_validate(rdev->rlc_fw);
1785                 if (err) {
1786                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1787                                fw_name);
1788                         goto out;
1789                 } else {
1790                         new_fw++;
1791                 }
1792         }
1793
1794         if (si58_fw)
1795                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1796         else
1797                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1798         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1799         if (err) {
1800                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1801                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1802                 if (err) {
1803                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1804                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1805                         if (err)
1806                                 goto out;
1807                 }
1808                 if ((rdev->mc_fw->size != mc_req_size) &&
1809                     (rdev->mc_fw->size != mc2_req_size)) {
1810                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1811                                rdev->mc_fw->size, fw_name);
1812                         err = -EINVAL;
1813                 }
1814                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1815         } else {
1816                 err = radeon_ucode_validate(rdev->mc_fw);
1817                 if (err) {
1818                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1819                                fw_name);
1820                         goto out;
1821                 } else {
1822                         new_fw++;
1823                 }
1824         }
1825
1826         if (banks2_fw)
1827                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1828         else if (new_smc)
1829                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1830         else
1831                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1832         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1833         if (err) {
1834                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1835                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1836                 if (err) {
1837                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1838                         release_firmware(rdev->smc_fw);
1839                         rdev->smc_fw = NULL;
1840                         err = 0;
1841                 } else if (rdev->smc_fw->size != smc_req_size) {
1842                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1843                                rdev->smc_fw->size, fw_name);
1844                         err = -EINVAL;
1845                 }
1846         } else {
1847                 err = radeon_ucode_validate(rdev->smc_fw);
1848                 if (err) {
1849                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1850                                fw_name);
1851                         goto out;
1852                 } else {
1853                         new_fw++;
1854                 }
1855         }
1856
1857         if (new_fw == 0) {
1858                 rdev->new_fw = false;
1859         } else if (new_fw < 6) {
1860                 pr_err("si_fw: mixing new and old firmware!\n");
1861                 err = -EINVAL;
1862         } else {
1863                 rdev->new_fw = true;
1864         }
1865 out:
1866         if (err) {
1867                 if (err != -EINVAL)
1868                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1869                                fw_name);
1870                 release_firmware(rdev->pfp_fw);
1871                 rdev->pfp_fw = NULL;
1872                 release_firmware(rdev->me_fw);
1873                 rdev->me_fw = NULL;
1874                 release_firmware(rdev->ce_fw);
1875                 rdev->ce_fw = NULL;
1876                 release_firmware(rdev->rlc_fw);
1877                 rdev->rlc_fw = NULL;
1878                 release_firmware(rdev->mc_fw);
1879                 rdev->mc_fw = NULL;
1880                 release_firmware(rdev->smc_fw);
1881                 rdev->smc_fw = NULL;
1882         }
1883         return err;
1884 }
1885
1886 /* watermark setup */
1887 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1888                                    struct radeon_crtc *radeon_crtc,
1889                                    struct drm_display_mode *mode,
1890                                    struct drm_display_mode *other_mode)
1891 {
1892         u32 tmp, buffer_alloc, i;
1893         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1894         /*
1895          * Line Buffer Setup
1896          * There are 3 line buffers, each one shared by 2 display controllers.
1897          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1898          * the display controllers.  The paritioning is done via one of four
1899          * preset allocations specified in bits 21:20:
1900          *  0 - half lb
1901          *  2 - whole lb, other crtc must be disabled
1902          */
1903         /* this can get tricky if we have two large displays on a paired group
1904          * of crtcs.  Ideally for multiple large displays we'd assign them to
1905          * non-linked crtcs for maximum line buffer allocation.
1906          */
1907         if (radeon_crtc->base.enabled && mode) {
1908                 if (other_mode) {
1909                         tmp = 0; /* 1/2 */
1910                         buffer_alloc = 1;
1911                 } else {
1912                         tmp = 2; /* whole */
1913                         buffer_alloc = 2;
1914                 }
1915         } else {
1916                 tmp = 0;
1917                 buffer_alloc = 0;
1918         }
1919
1920         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1921                DC_LB_MEMORY_CONFIG(tmp));
1922
1923         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1924                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1925         for (i = 0; i < rdev->usec_timeout; i++) {
1926                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1927                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1928                         break;
1929                 udelay(1);
1930         }
1931
1932         if (radeon_crtc->base.enabled && mode) {
1933                 switch (tmp) {
1934                 case 0:
1935                 default:
1936                         return 4096 * 2;
1937                 case 2:
1938                         return 8192 * 2;
1939                 }
1940         }
1941
1942         /* controller not enabled, so no lb used */
1943         return 0;
1944 }
1945
1946 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1947 {
1948         u32 tmp = RREG32(MC_SHARED_CHMAP);
1949
1950         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1951         case 0:
1952         default:
1953                 return 1;
1954         case 1:
1955                 return 2;
1956         case 2:
1957                 return 4;
1958         case 3:
1959                 return 8;
1960         case 4:
1961                 return 3;
1962         case 5:
1963                 return 6;
1964         case 6:
1965                 return 10;
1966         case 7:
1967                 return 12;
1968         case 8:
1969                 return 16;
1970         }
1971 }
1972
1973 struct dce6_wm_params {
1974         u32 dram_channels; /* number of dram channels */
1975         u32 yclk;          /* bandwidth per dram data pin in kHz */
1976         u32 sclk;          /* engine clock in kHz */
1977         u32 disp_clk;      /* display clock in kHz */
1978         u32 src_width;     /* viewport width */
1979         u32 active_time;   /* active display time in ns */
1980         u32 blank_time;    /* blank time in ns */
1981         bool interlaced;    /* mode is interlaced */
1982         fixed20_12 vsc;    /* vertical scale ratio */
1983         u32 num_heads;     /* number of active crtcs */
1984         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1985         u32 lb_size;       /* line buffer allocated to pipe */
1986         u32 vtaps;         /* vertical scaler taps */
1987 };
1988
1989 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1990 {
1991         /* Calculate raw DRAM Bandwidth */
1992         fixed20_12 dram_efficiency; /* 0.7 */
1993         fixed20_12 yclk, dram_channels, bandwidth;
1994         fixed20_12 a;
1995
1996         a.full = dfixed_const(1000);
1997         yclk.full = dfixed_const(wm->yclk);
1998         yclk.full = dfixed_div(yclk, a);
1999         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2000         a.full = dfixed_const(10);
2001         dram_efficiency.full = dfixed_const(7);
2002         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2003         bandwidth.full = dfixed_mul(dram_channels, yclk);
2004         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2005
2006         return dfixed_trunc(bandwidth);
2007 }
2008
2009 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2010 {
2011         /* Calculate DRAM Bandwidth and the part allocated to display. */
2012         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2013         fixed20_12 yclk, dram_channels, bandwidth;
2014         fixed20_12 a;
2015
2016         a.full = dfixed_const(1000);
2017         yclk.full = dfixed_const(wm->yclk);
2018         yclk.full = dfixed_div(yclk, a);
2019         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2020         a.full = dfixed_const(10);
2021         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2022         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2023         bandwidth.full = dfixed_mul(dram_channels, yclk);
2024         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2025
2026         return dfixed_trunc(bandwidth);
2027 }
2028
2029 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2030 {
2031         /* Calculate the display Data return Bandwidth */
2032         fixed20_12 return_efficiency; /* 0.8 */
2033         fixed20_12 sclk, bandwidth;
2034         fixed20_12 a;
2035
2036         a.full = dfixed_const(1000);
2037         sclk.full = dfixed_const(wm->sclk);
2038         sclk.full = dfixed_div(sclk, a);
2039         a.full = dfixed_const(10);
2040         return_efficiency.full = dfixed_const(8);
2041         return_efficiency.full = dfixed_div(return_efficiency, a);
2042         a.full = dfixed_const(32);
2043         bandwidth.full = dfixed_mul(a, sclk);
2044         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2045
2046         return dfixed_trunc(bandwidth);
2047 }
2048
2049 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2050 {
2051         return 32;
2052 }
2053
2054 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2055 {
2056         /* Calculate the DMIF Request Bandwidth */
2057         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2058         fixed20_12 disp_clk, sclk, bandwidth;
2059         fixed20_12 a, b1, b2;
2060         u32 min_bandwidth;
2061
2062         a.full = dfixed_const(1000);
2063         disp_clk.full = dfixed_const(wm->disp_clk);
2064         disp_clk.full = dfixed_div(disp_clk, a);
2065         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2066         b1.full = dfixed_mul(a, disp_clk);
2067
2068         a.full = dfixed_const(1000);
2069         sclk.full = dfixed_const(wm->sclk);
2070         sclk.full = dfixed_div(sclk, a);
2071         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2072         b2.full = dfixed_mul(a, sclk);
2073
2074         a.full = dfixed_const(10);
2075         disp_clk_request_efficiency.full = dfixed_const(8);
2076         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2077
2078         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2079
2080         a.full = dfixed_const(min_bandwidth);
2081         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2082
2083         return dfixed_trunc(bandwidth);
2084 }
2085
2086 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2087 {
2088         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2089         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2090         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2091         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2092
2093         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2094 }
2095
2096 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2097 {
2098         /* Calculate the display mode Average Bandwidth
2099          * DisplayMode should contain the source and destination dimensions,
2100          * timing, etc.
2101          */
2102         fixed20_12 bpp;
2103         fixed20_12 line_time;
2104         fixed20_12 src_width;
2105         fixed20_12 bandwidth;
2106         fixed20_12 a;
2107
2108         a.full = dfixed_const(1000);
2109         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2110         line_time.full = dfixed_div(line_time, a);
2111         bpp.full = dfixed_const(wm->bytes_per_pixel);
2112         src_width.full = dfixed_const(wm->src_width);
2113         bandwidth.full = dfixed_mul(src_width, bpp);
2114         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2115         bandwidth.full = dfixed_div(bandwidth, line_time);
2116
2117         return dfixed_trunc(bandwidth);
2118 }
2119
2120 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2121 {
2122         /* First calcualte the latency in ns */
2123         u32 mc_latency = 2000; /* 2000 ns. */
2124         u32 available_bandwidth = dce6_available_bandwidth(wm);
2125         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2126         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2127         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2128         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2129                 (wm->num_heads * cursor_line_pair_return_time);
2130         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2131         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2132         u32 tmp, dmif_size = 12288;
2133         fixed20_12 a, b, c;
2134
2135         if (wm->num_heads == 0)
2136                 return 0;
2137
2138         a.full = dfixed_const(2);
2139         b.full = dfixed_const(1);
2140         if ((wm->vsc.full > a.full) ||
2141             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2142             (wm->vtaps >= 5) ||
2143             ((wm->vsc.full >= a.full) && wm->interlaced))
2144                 max_src_lines_per_dst_line = 4;
2145         else
2146                 max_src_lines_per_dst_line = 2;
2147
2148         a.full = dfixed_const(available_bandwidth);
2149         b.full = dfixed_const(wm->num_heads);
2150         a.full = dfixed_div(a, b);
2151         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2152         tmp = min(dfixed_trunc(a), tmp);
2153
2154         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2155
2156         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2157         b.full = dfixed_const(1000);
2158         c.full = dfixed_const(lb_fill_bw);
2159         b.full = dfixed_div(c, b);
2160         a.full = dfixed_div(a, b);
2161         line_fill_time = dfixed_trunc(a);
2162
2163         if (line_fill_time < wm->active_time)
2164                 return latency;
2165         else
2166                 return latency + (line_fill_time - wm->active_time);
2167
2168 }
2169
2170 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2171 {
2172         if (dce6_average_bandwidth(wm) <=
2173             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2174                 return true;
2175         else
2176                 return false;
2177 };
2178
2179 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2180 {
2181         if (dce6_average_bandwidth(wm) <=
2182             (dce6_available_bandwidth(wm) / wm->num_heads))
2183                 return true;
2184         else
2185                 return false;
2186 };
2187
2188 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2189 {
2190         u32 lb_partitions = wm->lb_size / wm->src_width;
2191         u32 line_time = wm->active_time + wm->blank_time;
2192         u32 latency_tolerant_lines;
2193         u32 latency_hiding;
2194         fixed20_12 a;
2195
2196         a.full = dfixed_const(1);
2197         if (wm->vsc.full > a.full)
2198                 latency_tolerant_lines = 1;
2199         else {
2200                 if (lb_partitions <= (wm->vtaps + 1))
2201                         latency_tolerant_lines = 1;
2202                 else
2203                         latency_tolerant_lines = 2;
2204         }
2205
2206         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2207
2208         if (dce6_latency_watermark(wm) <= latency_hiding)
2209                 return true;
2210         else
2211                 return false;
2212 }
2213
2214 static void dce6_program_watermarks(struct radeon_device *rdev,
2215                                          struct radeon_crtc *radeon_crtc,
2216                                          u32 lb_size, u32 num_heads)
2217 {
2218         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2219         struct dce6_wm_params wm_low, wm_high;
2220         u32 dram_channels;
2221         u32 active_time;
2222         u32 line_time = 0;
2223         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2224         u32 priority_a_mark = 0, priority_b_mark = 0;
2225         u32 priority_a_cnt = PRIORITY_OFF;
2226         u32 priority_b_cnt = PRIORITY_OFF;
2227         u32 tmp, arb_control3;
2228         fixed20_12 a, b, c;
2229
2230         if (radeon_crtc->base.enabled && num_heads && mode) {
2231                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2232                                             (u32)mode->clock);
2233                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2234                                           (u32)mode->clock);
2235                 line_time = min(line_time, (u32)65535);
2236                 priority_a_cnt = 0;
2237                 priority_b_cnt = 0;
2238
2239                 if (rdev->family == CHIP_ARUBA)
2240                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2241                 else
2242                         dram_channels = si_get_number_of_dram_channels(rdev);
2243
2244                 /* watermark for high clocks */
2245                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2246                         wm_high.yclk =
2247                                 radeon_dpm_get_mclk(rdev, false) * 10;
2248                         wm_high.sclk =
2249                                 radeon_dpm_get_sclk(rdev, false) * 10;
2250                 } else {
2251                         wm_high.yclk = rdev->pm.current_mclk * 10;
2252                         wm_high.sclk = rdev->pm.current_sclk * 10;
2253                 }
2254
2255                 wm_high.disp_clk = mode->clock;
2256                 wm_high.src_width = mode->crtc_hdisplay;
2257                 wm_high.active_time = active_time;
2258                 wm_high.blank_time = line_time - wm_high.active_time;
2259                 wm_high.interlaced = false;
2260                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2261                         wm_high.interlaced = true;
2262                 wm_high.vsc = radeon_crtc->vsc;
2263                 wm_high.vtaps = 1;
2264                 if (radeon_crtc->rmx_type != RMX_OFF)
2265                         wm_high.vtaps = 2;
2266                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2267                 wm_high.lb_size = lb_size;
2268                 wm_high.dram_channels = dram_channels;
2269                 wm_high.num_heads = num_heads;
2270
2271                 /* watermark for low clocks */
2272                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2273                         wm_low.yclk =
2274                                 radeon_dpm_get_mclk(rdev, true) * 10;
2275                         wm_low.sclk =
2276                                 radeon_dpm_get_sclk(rdev, true) * 10;
2277                 } else {
2278                         wm_low.yclk = rdev->pm.current_mclk * 10;
2279                         wm_low.sclk = rdev->pm.current_sclk * 10;
2280                 }
2281
2282                 wm_low.disp_clk = mode->clock;
2283                 wm_low.src_width = mode->crtc_hdisplay;
2284                 wm_low.active_time = active_time;
2285                 wm_low.blank_time = line_time - wm_low.active_time;
2286                 wm_low.interlaced = false;
2287                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2288                         wm_low.interlaced = true;
2289                 wm_low.vsc = radeon_crtc->vsc;
2290                 wm_low.vtaps = 1;
2291                 if (radeon_crtc->rmx_type != RMX_OFF)
2292                         wm_low.vtaps = 2;
2293                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2294                 wm_low.lb_size = lb_size;
2295                 wm_low.dram_channels = dram_channels;
2296                 wm_low.num_heads = num_heads;
2297
2298                 /* set for high clocks */
2299                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2300                 /* set for low clocks */
2301                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2302
2303                 /* possibly force display priority to high */
2304                 /* should really do this at mode validation time... */
2305                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2306                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2307                     !dce6_check_latency_hiding(&wm_high) ||
2308                     (rdev->disp_priority == 2)) {
2309                         DRM_DEBUG_KMS("force priority to high\n");
2310                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2311                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2312                 }
2313                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2314                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2315                     !dce6_check_latency_hiding(&wm_low) ||
2316                     (rdev->disp_priority == 2)) {
2317                         DRM_DEBUG_KMS("force priority to high\n");
2318                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2319                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2320                 }
2321
2322                 a.full = dfixed_const(1000);
2323                 b.full = dfixed_const(mode->clock);
2324                 b.full = dfixed_div(b, a);
2325                 c.full = dfixed_const(latency_watermark_a);
2326                 c.full = dfixed_mul(c, b);
2327                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2328                 c.full = dfixed_div(c, a);
2329                 a.full = dfixed_const(16);
2330                 c.full = dfixed_div(c, a);
2331                 priority_a_mark = dfixed_trunc(c);
2332                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2333
2334                 a.full = dfixed_const(1000);
2335                 b.full = dfixed_const(mode->clock);
2336                 b.full = dfixed_div(b, a);
2337                 c.full = dfixed_const(latency_watermark_b);
2338                 c.full = dfixed_mul(c, b);
2339                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2340                 c.full = dfixed_div(c, a);
2341                 a.full = dfixed_const(16);
2342                 c.full = dfixed_div(c, a);
2343                 priority_b_mark = dfixed_trunc(c);
2344                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2345
2346                 /* Save number of lines the linebuffer leads before the scanout */
2347                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2348         }
2349
2350         /* select wm A */
2351         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2352         tmp = arb_control3;
2353         tmp &= ~LATENCY_WATERMARK_MASK(3);
2354         tmp |= LATENCY_WATERMARK_MASK(1);
2355         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2356         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2357                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2358                 LATENCY_HIGH_WATERMARK(line_time)));
2359         /* select wm B */
2360         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2361         tmp &= ~LATENCY_WATERMARK_MASK(3);
2362         tmp |= LATENCY_WATERMARK_MASK(2);
2363         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2364         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2365                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2366                 LATENCY_HIGH_WATERMARK(line_time)));
2367         /* restore original selection */
2368         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2369
2370         /* write the priority marks */
2371         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2372         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2373
2374         /* save values for DPM */
2375         radeon_crtc->line_time = line_time;
2376         radeon_crtc->wm_high = latency_watermark_a;
2377         radeon_crtc->wm_low = latency_watermark_b;
2378 }
2379
2380 void dce6_bandwidth_update(struct radeon_device *rdev)
2381 {
2382         struct drm_display_mode *mode0 = NULL;
2383         struct drm_display_mode *mode1 = NULL;
2384         u32 num_heads = 0, lb_size;
2385         int i;
2386
2387         if (!rdev->mode_info.mode_config_initialized)
2388                 return;
2389
2390         radeon_update_display_priority(rdev);
2391
2392         for (i = 0; i < rdev->num_crtc; i++) {
2393                 if (rdev->mode_info.crtcs[i]->base.enabled)
2394                         num_heads++;
2395         }
2396         for (i = 0; i < rdev->num_crtc; i += 2) {
2397                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2398                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2399                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2400                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2401                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2402                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2403         }
2404 }
2405
2406 /*
2407  * Core functions
2408  */
2409 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2410 {
2411         u32 *tile = rdev->config.si.tile_mode_array;
2412         const u32 num_tile_mode_states =
2413                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2414         u32 reg_offset, split_equal_to_row_size;
2415
2416         switch (rdev->config.si.mem_row_size_in_kb) {
2417         case 1:
2418                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2419                 break;
2420         case 2:
2421         default:
2422                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2423                 break;
2424         case 4:
2425                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2426                 break;
2427         }
2428
2429         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2430                 tile[reg_offset] = 0;
2431
2432         switch(rdev->family) {
2433         case CHIP_TAHITI:
2434         case CHIP_PITCAIRN:
2435                 /* non-AA compressed depth or any compressed stencil */
2436                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2438                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2439                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2440                            NUM_BANKS(ADDR_SURF_16_BANK) |
2441                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2444                 /* 2xAA/4xAA compressed depth only */
2445                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2447                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2448                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449                            NUM_BANKS(ADDR_SURF_16_BANK) |
2450                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2453                 /* 8xAA compressed depth only */
2454                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2456                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2457                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2458                            NUM_BANKS(ADDR_SURF_16_BANK) |
2459                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2462                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK) |
2468                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2472                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2473                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2476                            NUM_BANKS(ADDR_SURF_16_BANK) |
2477                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2480                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2481                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2483                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2484                            TILE_SPLIT(split_equal_to_row_size) |
2485                            NUM_BANKS(ADDR_SURF_16_BANK) |
2486                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2489                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2490                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2492                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2493                            TILE_SPLIT(split_equal_to_row_size) |
2494                            NUM_BANKS(ADDR_SURF_16_BANK) |
2495                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2498                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2499                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2501                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502                            TILE_SPLIT(split_equal_to_row_size) |
2503                            NUM_BANKS(ADDR_SURF_16_BANK) |
2504                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2507                 /* 1D and 1D Array Surfaces */
2508                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2509                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2510                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2511                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2512                            NUM_BANKS(ADDR_SURF_16_BANK) |
2513                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2515                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2516                 /* Displayable maps. */
2517                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2518                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2521                            NUM_BANKS(ADDR_SURF_16_BANK) |
2522                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2524                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2525                 /* Display 8bpp. */
2526                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2529                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2530                            NUM_BANKS(ADDR_SURF_16_BANK) |
2531                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2534                 /* Display 16bpp. */
2535                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2538                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2539                            NUM_BANKS(ADDR_SURF_16_BANK) |
2540                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2543                 /* Display 32bpp. */
2544                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2547                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2548                            NUM_BANKS(ADDR_SURF_16_BANK) |
2549                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2552                 /* Thin. */
2553                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2555                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2557                            NUM_BANKS(ADDR_SURF_16_BANK) |
2558                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2560                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2561                 /* Thin 8 bpp. */
2562                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2564                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2565                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2566                            NUM_BANKS(ADDR_SURF_16_BANK) |
2567                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2569                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2570                 /* Thin 16 bpp. */
2571                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2573                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2574                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2575                            NUM_BANKS(ADDR_SURF_16_BANK) |
2576                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2578                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2579                 /* Thin 32 bpp. */
2580                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2582                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2583                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2584                            NUM_BANKS(ADDR_SURF_16_BANK) |
2585                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2588                 /* Thin 64 bpp. */
2589                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2591                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2592                            TILE_SPLIT(split_equal_to_row_size) |
2593                            NUM_BANKS(ADDR_SURF_16_BANK) |
2594                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2596                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2597                 /* 8 bpp PRT. */
2598                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2600                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2602                            NUM_BANKS(ADDR_SURF_16_BANK) |
2603                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2604                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2606                 /* 16 bpp PRT */
2607                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2609                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2610                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2611                            NUM_BANKS(ADDR_SURF_16_BANK) |
2612                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2614                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2615                 /* 32 bpp PRT */
2616                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2618                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2619                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2620                            NUM_BANKS(ADDR_SURF_16_BANK) |
2621                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2623                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2624                 /* 64 bpp PRT */
2625                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2627                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2628                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2629                            NUM_BANKS(ADDR_SURF_16_BANK) |
2630                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633                 /* 128 bpp PRT */
2634                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2636                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2638                            NUM_BANKS(ADDR_SURF_8_BANK) |
2639                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2642
2643                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2644                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2645                 break;
2646
2647         case CHIP_VERDE:
2648         case CHIP_OLAND:
2649         case CHIP_HAINAN:
2650                 /* non-AA compressed depth or any compressed stencil */
2651                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2653                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2655                            NUM_BANKS(ADDR_SURF_16_BANK) |
2656                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2658                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2659                 /* 2xAA/4xAA compressed depth only */
2660                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2664                            NUM_BANKS(ADDR_SURF_16_BANK) |
2665                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2668                 /* 8xAA compressed depth only */
2669                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2672                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2673                            NUM_BANKS(ADDR_SURF_16_BANK) |
2674                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2676                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2677                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2678                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2680                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2682                            NUM_BANKS(ADDR_SURF_16_BANK) |
2683                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2685                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2686                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2687                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2688                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2691                            NUM_BANKS(ADDR_SURF_16_BANK) |
2692                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2694                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2695                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2696                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2698                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699                            TILE_SPLIT(split_equal_to_row_size) |
2700                            NUM_BANKS(ADDR_SURF_16_BANK) |
2701                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2703                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2704                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2705                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708                            TILE_SPLIT(split_equal_to_row_size) |
2709                            NUM_BANKS(ADDR_SURF_16_BANK) |
2710                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2713                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2714                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2716                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717                            TILE_SPLIT(split_equal_to_row_size) |
2718                            NUM_BANKS(ADDR_SURF_16_BANK) |
2719                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2721                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2722                 /* 1D and 1D Array Surfaces */
2723                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2724                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2727                            NUM_BANKS(ADDR_SURF_16_BANK) |
2728                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2730                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2731                 /* Displayable maps. */
2732                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2736                            NUM_BANKS(ADDR_SURF_16_BANK) |
2737                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2739                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2740                 /* Display 8bpp. */
2741                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2745                            NUM_BANKS(ADDR_SURF_16_BANK) |
2746                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2749                 /* Display 16bpp. */
2750                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2752                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2754                            NUM_BANKS(ADDR_SURF_16_BANK) |
2755                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2758                 /* Display 32bpp. */
2759                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2761                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2762                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2763                            NUM_BANKS(ADDR_SURF_16_BANK) |
2764                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2766                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2767                 /* Thin. */
2768                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2772                            NUM_BANKS(ADDR_SURF_16_BANK) |
2773                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2775                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2776                 /* Thin 8 bpp. */
2777                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2778                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2779                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2781                            NUM_BANKS(ADDR_SURF_16_BANK) |
2782                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2784                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2785                 /* Thin 16 bpp. */
2786                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2788                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2790                            NUM_BANKS(ADDR_SURF_16_BANK) |
2791                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2794                 /* Thin 32 bpp. */
2795                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2797                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799                            NUM_BANKS(ADDR_SURF_16_BANK) |
2800                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803                 /* Thin 64 bpp. */
2804                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2806                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807                            TILE_SPLIT(split_equal_to_row_size) |
2808                            NUM_BANKS(ADDR_SURF_16_BANK) |
2809                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2812                 /* 8 bpp PRT. */
2813                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2815                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2816                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2817                            NUM_BANKS(ADDR_SURF_16_BANK) |
2818                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2819                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2821                 /* 16 bpp PRT */
2822                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2824                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2825                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2826                            NUM_BANKS(ADDR_SURF_16_BANK) |
2827                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2830                 /* 32 bpp PRT */
2831                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2833                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2834                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2835                            NUM_BANKS(ADDR_SURF_16_BANK) |
2836                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2839                 /* 64 bpp PRT */
2840                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2842                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2843                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2844                            NUM_BANKS(ADDR_SURF_16_BANK) |
2845                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2847                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2848                 /* 128 bpp PRT */
2849                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2851                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2852                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2853                            NUM_BANKS(ADDR_SURF_8_BANK) |
2854                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2857
2858                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2859                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2860                 break;
2861
2862         default:
2863                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2864         }
2865 }
2866
2867 static void si_select_se_sh(struct radeon_device *rdev,
2868                             u32 se_num, u32 sh_num)
2869 {
2870         u32 data = INSTANCE_BROADCAST_WRITES;
2871
2872         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2873                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2874         else if (se_num == 0xffffffff)
2875                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2876         else if (sh_num == 0xffffffff)
2877                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2878         else
2879                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2880         WREG32(GRBM_GFX_INDEX, data);
2881 }
2882
2883 static u32 si_create_bitmask(u32 bit_width)
2884 {
2885         u32 i, mask = 0;
2886
2887         for (i = 0; i < bit_width; i++) {
2888                 mask <<= 1;
2889                 mask |= 1;
2890         }
2891         return mask;
2892 }
2893
2894 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2895 {
2896         u32 data, mask;
2897
2898         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2899         if (data & 1)
2900                 data &= INACTIVE_CUS_MASK;
2901         else
2902                 data = 0;
2903         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2904
2905         data >>= INACTIVE_CUS_SHIFT;
2906
2907         mask = si_create_bitmask(cu_per_sh);
2908
2909         return ~data & mask;
2910 }
2911
2912 static void si_setup_spi(struct radeon_device *rdev,
2913                          u32 se_num, u32 sh_per_se,
2914                          u32 cu_per_sh)
2915 {
2916         int i, j, k;
2917         u32 data, mask, active_cu;
2918
2919         for (i = 0; i < se_num; i++) {
2920                 for (j = 0; j < sh_per_se; j++) {
2921                         si_select_se_sh(rdev, i, j);
2922                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2923                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2924
2925                         mask = 1;
2926                         for (k = 0; k < 16; k++) {
2927                                 mask <<= k;
2928                                 if (active_cu & mask) {
2929                                         data &= ~mask;
2930                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2931                                         break;
2932                                 }
2933                         }
2934                 }
2935         }
2936         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2937 }
2938
2939 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2940                               u32 max_rb_num_per_se,
2941                               u32 sh_per_se)
2942 {
2943         u32 data, mask;
2944
2945         data = RREG32(CC_RB_BACKEND_DISABLE);
2946         if (data & 1)
2947                 data &= BACKEND_DISABLE_MASK;
2948         else
2949                 data = 0;
2950         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2951
2952         data >>= BACKEND_DISABLE_SHIFT;
2953
2954         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2955
2956         return data & mask;
2957 }
2958
2959 static void si_setup_rb(struct radeon_device *rdev,
2960                         u32 se_num, u32 sh_per_se,
2961                         u32 max_rb_num_per_se)
2962 {
2963         int i, j;
2964         u32 data, mask;
2965         u32 disabled_rbs = 0;
2966         u32 enabled_rbs = 0;
2967
2968         for (i = 0; i < se_num; i++) {
2969                 for (j = 0; j < sh_per_se; j++) {
2970                         si_select_se_sh(rdev, i, j);
2971                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2972                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2973                 }
2974         }
2975         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2976
2977         mask = 1;
2978         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2979                 if (!(disabled_rbs & mask))
2980                         enabled_rbs |= mask;
2981                 mask <<= 1;
2982         }
2983
2984         rdev->config.si.backend_enable_mask = enabled_rbs;
2985
2986         for (i = 0; i < se_num; i++) {
2987                 si_select_se_sh(rdev, i, 0xffffffff);
2988                 data = 0;
2989                 for (j = 0; j < sh_per_se; j++) {
2990                         switch (enabled_rbs & 3) {
2991                         case 1:
2992                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2993                                 break;
2994                         case 2:
2995                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2996                                 break;
2997                         case 3:
2998                         default:
2999                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3000                                 break;
3001                         }
3002                         enabled_rbs >>= 2;
3003                 }
3004                 WREG32(PA_SC_RASTER_CONFIG, data);
3005         }
3006         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3007 }
3008
3009 static void si_gpu_init(struct radeon_device *rdev)
3010 {
3011         u32 gb_addr_config = 0;
3012         u32 mc_shared_chmap, mc_arb_ramcfg;
3013         u32 sx_debug_1;
3014         u32 hdp_host_path_cntl;
3015         u32 tmp;
3016         int i, j;
3017
3018         switch (rdev->family) {
3019         case CHIP_TAHITI:
3020                 rdev->config.si.max_shader_engines = 2;
3021                 rdev->config.si.max_tile_pipes = 12;
3022                 rdev->config.si.max_cu_per_sh = 8;
3023                 rdev->config.si.max_sh_per_se = 2;
3024                 rdev->config.si.max_backends_per_se = 4;
3025                 rdev->config.si.max_texture_channel_caches = 12;
3026                 rdev->config.si.max_gprs = 256;
3027                 rdev->config.si.max_gs_threads = 32;
3028                 rdev->config.si.max_hw_contexts = 8;
3029
3030                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3031                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3032                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3033                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3034                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3035                 break;
3036         case CHIP_PITCAIRN:
3037                 rdev->config.si.max_shader_engines = 2;
3038                 rdev->config.si.max_tile_pipes = 8;
3039                 rdev->config.si.max_cu_per_sh = 5;
3040                 rdev->config.si.max_sh_per_se = 2;
3041                 rdev->config.si.max_backends_per_se = 4;
3042                 rdev->config.si.max_texture_channel_caches = 8;
3043                 rdev->config.si.max_gprs = 256;
3044                 rdev->config.si.max_gs_threads = 32;
3045                 rdev->config.si.max_hw_contexts = 8;
3046
3047                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3048                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3049                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3050                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3051                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3052                 break;
3053         case CHIP_VERDE:
3054         default:
3055                 rdev->config.si.max_shader_engines = 1;
3056                 rdev->config.si.max_tile_pipes = 4;
3057                 rdev->config.si.max_cu_per_sh = 5;
3058                 rdev->config.si.max_sh_per_se = 2;
3059                 rdev->config.si.max_backends_per_se = 4;
3060                 rdev->config.si.max_texture_channel_caches = 4;
3061                 rdev->config.si.max_gprs = 256;
3062                 rdev->config.si.max_gs_threads = 32;
3063                 rdev->config.si.max_hw_contexts = 8;
3064
3065                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3066                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3067                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3068                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3069                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3070                 break;
3071         case CHIP_OLAND:
3072                 rdev->config.si.max_shader_engines = 1;
3073                 rdev->config.si.max_tile_pipes = 4;
3074                 rdev->config.si.max_cu_per_sh = 6;
3075                 rdev->config.si.max_sh_per_se = 1;
3076                 rdev->config.si.max_backends_per_se = 2;
3077                 rdev->config.si.max_texture_channel_caches = 4;
3078                 rdev->config.si.max_gprs = 256;
3079                 rdev->config.si.max_gs_threads = 16;
3080                 rdev->config.si.max_hw_contexts = 8;
3081
3082                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3083                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3084                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3085                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3086                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3087                 break;
3088         case CHIP_HAINAN:
3089                 rdev->config.si.max_shader_engines = 1;
3090                 rdev->config.si.max_tile_pipes = 4;
3091                 rdev->config.si.max_cu_per_sh = 5;
3092                 rdev->config.si.max_sh_per_se = 1;
3093                 rdev->config.si.max_backends_per_se = 1;
3094                 rdev->config.si.max_texture_channel_caches = 2;
3095                 rdev->config.si.max_gprs = 256;
3096                 rdev->config.si.max_gs_threads = 16;
3097                 rdev->config.si.max_hw_contexts = 8;
3098
3099                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3100                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3101                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3102                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3103                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3104                 break;
3105         }
3106
3107         /* Initialize HDP */
3108         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3109                 WREG32((0x2c14 + j), 0x00000000);
3110                 WREG32((0x2c18 + j), 0x00000000);
3111                 WREG32((0x2c1c + j), 0x00000000);
3112                 WREG32((0x2c20 + j), 0x00000000);
3113                 WREG32((0x2c24 + j), 0x00000000);
3114         }
3115
3116         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3117         WREG32(SRBM_INT_CNTL, 1);
3118         WREG32(SRBM_INT_ACK, 1);
3119
3120         evergreen_fix_pci_max_read_req_size(rdev);
3121
3122         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3123
3124         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3125         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3126
3127         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3128         rdev->config.si.mem_max_burst_length_bytes = 256;
3129         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3130         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3131         if (rdev->config.si.mem_row_size_in_kb > 4)
3132                 rdev->config.si.mem_row_size_in_kb = 4;
3133         /* XXX use MC settings? */
3134         rdev->config.si.shader_engine_tile_size = 32;
3135         rdev->config.si.num_gpus = 1;
3136         rdev->config.si.multi_gpu_tile_size = 64;
3137
3138         /* fix up row size */
3139         gb_addr_config &= ~ROW_SIZE_MASK;
3140         switch (rdev->config.si.mem_row_size_in_kb) {
3141         case 1:
3142         default:
3143                 gb_addr_config |= ROW_SIZE(0);
3144                 break;
3145         case 2:
3146                 gb_addr_config |= ROW_SIZE(1);
3147                 break;
3148         case 4:
3149                 gb_addr_config |= ROW_SIZE(2);
3150                 break;
3151         }
3152
3153         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3154          * not have bank info, so create a custom tiling dword.
3155          * bits 3:0   num_pipes
3156          * bits 7:4   num_banks
3157          * bits 11:8  group_size
3158          * bits 15:12 row_size
3159          */
3160         rdev->config.si.tile_config = 0;
3161         switch (rdev->config.si.num_tile_pipes) {
3162         case 1:
3163                 rdev->config.si.tile_config |= (0 << 0);
3164                 break;
3165         case 2:
3166                 rdev->config.si.tile_config |= (1 << 0);
3167                 break;
3168         case 4:
3169                 rdev->config.si.tile_config |= (2 << 0);
3170                 break;
3171         case 8:
3172         default:
3173                 /* XXX what about 12? */
3174                 rdev->config.si.tile_config |= (3 << 0);
3175                 break;
3176         }       
3177         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3178         case 0: /* four banks */
3179                 rdev->config.si.tile_config |= 0 << 4;
3180                 break;
3181         case 1: /* eight banks */
3182                 rdev->config.si.tile_config |= 1 << 4;
3183                 break;
3184         case 2: /* sixteen banks */
3185         default:
3186                 rdev->config.si.tile_config |= 2 << 4;
3187                 break;
3188         }
3189         rdev->config.si.tile_config |=
3190                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3191         rdev->config.si.tile_config |=
3192                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3193
3194         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3195         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3196         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3197         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3198         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3199         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3200         if (rdev->has_uvd) {
3201                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3202                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3203                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3204         }
3205
3206         si_tiling_mode_table_init(rdev);
3207
3208         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3209                     rdev->config.si.max_sh_per_se,
3210                     rdev->config.si.max_backends_per_se);
3211
3212         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3213                      rdev->config.si.max_sh_per_se,
3214                      rdev->config.si.max_cu_per_sh);
3215
3216         rdev->config.si.active_cus = 0;
3217         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3218                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3219                         rdev->config.si.active_cus +=
3220                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3221                 }
3222         }
3223
3224         /* set HW defaults for 3D engine */
3225         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3226                                      ROQ_IB2_START(0x2b)));
3227         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3228
3229         sx_debug_1 = RREG32(SX_DEBUG_1);
3230         WREG32(SX_DEBUG_1, sx_debug_1);
3231
3232         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3233
3234         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3235                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3236                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3237                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3238
3239         WREG32(VGT_NUM_INSTANCES, 1);
3240
3241         WREG32(CP_PERFMON_CNTL, 0);
3242
3243         WREG32(SQ_CONFIG, 0);
3244
3245         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3246                                           FORCE_EOV_MAX_REZ_CNT(255)));
3247
3248         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3249                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3250
3251         WREG32(VGT_GS_VERTEX_REUSE, 16);
3252         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3253
3254         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3255         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3256         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3257         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3258         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3259         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3260         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3261         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3262
3263         tmp = RREG32(HDP_MISC_CNTL);
3264         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3265         WREG32(HDP_MISC_CNTL, tmp);
3266
3267         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3268         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3269
3270         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3271
3272         udelay(50);
3273 }
3274
3275 /*
3276  * GPU scratch registers helpers function.
3277  */
3278 static void si_scratch_init(struct radeon_device *rdev)
3279 {
3280         int i;
3281
3282         rdev->scratch.num_reg = 7;
3283         rdev->scratch.reg_base = SCRATCH_REG0;
3284         for (i = 0; i < rdev->scratch.num_reg; i++) {
3285                 rdev->scratch.free[i] = true;
3286                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3287         }
3288 }
3289
3290 void si_fence_ring_emit(struct radeon_device *rdev,
3291                         struct radeon_fence *fence)
3292 {
3293         struct radeon_ring *ring = &rdev->ring[fence->ring];
3294         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3295
3296         /* flush read cache over gart */
3297         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3298         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3299         radeon_ring_write(ring, 0);
3300         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3301         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3302                           PACKET3_TC_ACTION_ENA |
3303                           PACKET3_SH_KCACHE_ACTION_ENA |
3304                           PACKET3_SH_ICACHE_ACTION_ENA);
3305         radeon_ring_write(ring, 0xFFFFFFFF);
3306         radeon_ring_write(ring, 0);
3307         radeon_ring_write(ring, 10); /* poll interval */
3308         /* EVENT_WRITE_EOP - flush caches, send int */
3309         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3310         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3311         radeon_ring_write(ring, lower_32_bits(addr));
3312         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3313         radeon_ring_write(ring, fence->seq);
3314         radeon_ring_write(ring, 0);
3315 }
3316
3317 /*
3318  * IB stuff
3319  */
3320 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3321 {
3322         struct radeon_ring *ring = &rdev->ring[ib->ring];
3323         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3324         u32 header;
3325
3326         if (ib->is_const_ib) {
3327                 /* set switch buffer packet before const IB */
3328                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3329                 radeon_ring_write(ring, 0);
3330
3331                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3332         } else {
3333                 u32 next_rptr;
3334                 if (ring->rptr_save_reg) {
3335                         next_rptr = ring->wptr + 3 + 4 + 8;
3336                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3337                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3338                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3339                         radeon_ring_write(ring, next_rptr);
3340                 } else if (rdev->wb.enabled) {
3341                         next_rptr = ring->wptr + 5 + 4 + 8;
3342                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3343                         radeon_ring_write(ring, (1 << 8));
3344                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3345                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3346                         radeon_ring_write(ring, next_rptr);
3347                 }
3348
3349                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3350         }
3351
3352         radeon_ring_write(ring, header);
3353         radeon_ring_write(ring,
3354 #ifdef __BIG_ENDIAN
3355                           (2 << 0) |
3356 #endif
3357                           (ib->gpu_addr & 0xFFFFFFFC));
3358         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3359         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3360
3361         if (!ib->is_const_ib) {
3362                 /* flush read cache over gart for this vmid */
3363                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3364                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3365                 radeon_ring_write(ring, vm_id);
3366                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3367                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3368                                   PACKET3_TC_ACTION_ENA |
3369                                   PACKET3_SH_KCACHE_ACTION_ENA |
3370                                   PACKET3_SH_ICACHE_ACTION_ENA);
3371                 radeon_ring_write(ring, 0xFFFFFFFF);
3372                 radeon_ring_write(ring, 0);
3373                 radeon_ring_write(ring, 10); /* poll interval */
3374         }
3375 }
3376
3377 /*
3378  * CP.
3379  */
3380 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3381 {
3382         if (enable)
3383                 WREG32(CP_ME_CNTL, 0);
3384         else {
3385                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3386                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3387                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3388                 WREG32(SCRATCH_UMSK, 0);
3389                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3390                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3391                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3392         }
3393         udelay(50);
3394 }
3395
3396 static int si_cp_load_microcode(struct radeon_device *rdev)
3397 {
3398         int i;
3399
3400         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3401                 return -EINVAL;
3402
3403         si_cp_enable(rdev, false);
3404
3405         if (rdev->new_fw) {
3406                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3407                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3408                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3409                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3410                 const struct gfx_firmware_header_v1_0 *me_hdr =
3411                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3412                 const __le32 *fw_data;
3413                 u32 fw_size;
3414
3415                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3416                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3417                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3418
3419                 /* PFP */
3420                 fw_data = (const __le32 *)
3421                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3422                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3423                 WREG32(CP_PFP_UCODE_ADDR, 0);
3424                 for (i = 0; i < fw_size; i++)
3425                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3426                 WREG32(CP_PFP_UCODE_ADDR, 0);
3427
3428                 /* CE */
3429                 fw_data = (const __le32 *)
3430                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3431                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3432                 WREG32(CP_CE_UCODE_ADDR, 0);
3433                 for (i = 0; i < fw_size; i++)
3434                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3435                 WREG32(CP_CE_UCODE_ADDR, 0);
3436
3437                 /* ME */
3438                 fw_data = (const __be32 *)
3439                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3440                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3441                 WREG32(CP_ME_RAM_WADDR, 0);
3442                 for (i = 0; i < fw_size; i++)
3443                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3444                 WREG32(CP_ME_RAM_WADDR, 0);
3445         } else {
3446                 const __be32 *fw_data;
3447
3448                 /* PFP */
3449                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3450                 WREG32(CP_PFP_UCODE_ADDR, 0);
3451                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3452                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3453                 WREG32(CP_PFP_UCODE_ADDR, 0);
3454
3455                 /* CE */
3456                 fw_data = (const __be32 *)rdev->ce_fw->data;
3457                 WREG32(CP_CE_UCODE_ADDR, 0);
3458                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3459                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3460                 WREG32(CP_CE_UCODE_ADDR, 0);
3461
3462                 /* ME */
3463                 fw_data = (const __be32 *)rdev->me_fw->data;
3464                 WREG32(CP_ME_RAM_WADDR, 0);
3465                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3466                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3467                 WREG32(CP_ME_RAM_WADDR, 0);
3468         }
3469
3470         WREG32(CP_PFP_UCODE_ADDR, 0);
3471         WREG32(CP_CE_UCODE_ADDR, 0);
3472         WREG32(CP_ME_RAM_WADDR, 0);
3473         WREG32(CP_ME_RAM_RADDR, 0);
3474         return 0;
3475 }
3476
3477 static int si_cp_start(struct radeon_device *rdev)
3478 {
3479         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3480         int r, i;
3481
3482         r = radeon_ring_lock(rdev, ring, 7 + 4);
3483         if (r) {
3484                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3485                 return r;
3486         }
3487         /* init the CP */
3488         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3489         radeon_ring_write(ring, 0x1);
3490         radeon_ring_write(ring, 0x0);
3491         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3492         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3493         radeon_ring_write(ring, 0);
3494         radeon_ring_write(ring, 0);
3495
3496         /* init the CE partitions */
3497         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3498         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3499         radeon_ring_write(ring, 0xc000);
3500         radeon_ring_write(ring, 0xe000);
3501         radeon_ring_unlock_commit(rdev, ring, false);
3502
3503         si_cp_enable(rdev, true);
3504
3505         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3506         if (r) {
3507                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3508                 return r;
3509         }
3510
3511         /* setup clear context state */
3512         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3513         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3514
3515         for (i = 0; i < si_default_size; i++)
3516                 radeon_ring_write(ring, si_default_state[i]);
3517
3518         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3519         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3520
3521         /* set clear context state */
3522         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3523         radeon_ring_write(ring, 0);
3524
3525         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3526         radeon_ring_write(ring, 0x00000316);
3527         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3528         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3529
3530         radeon_ring_unlock_commit(rdev, ring, false);
3531
3532         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3533                 ring = &rdev->ring[i];
3534                 r = radeon_ring_lock(rdev, ring, 2);
3535
3536                 /* clear the compute context state */
3537                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3538                 radeon_ring_write(ring, 0);
3539
3540                 radeon_ring_unlock_commit(rdev, ring, false);
3541         }
3542
3543         return 0;
3544 }
3545
3546 static void si_cp_fini(struct radeon_device *rdev)
3547 {
3548         struct radeon_ring *ring;
3549         si_cp_enable(rdev, false);
3550
3551         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3552         radeon_ring_fini(rdev, ring);
3553         radeon_scratch_free(rdev, ring->rptr_save_reg);
3554
3555         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3556         radeon_ring_fini(rdev, ring);
3557         radeon_scratch_free(rdev, ring->rptr_save_reg);
3558
3559         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3560         radeon_ring_fini(rdev, ring);
3561         radeon_scratch_free(rdev, ring->rptr_save_reg);
3562 }
3563
3564 static int si_cp_resume(struct radeon_device *rdev)
3565 {
3566         struct radeon_ring *ring;
3567         u32 tmp;
3568         u32 rb_bufsz;
3569         int r;
3570
3571         si_enable_gui_idle_interrupt(rdev, false);
3572
3573         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3574         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3575
3576         /* Set the write pointer delay */
3577         WREG32(CP_RB_WPTR_DELAY, 0);
3578
3579         WREG32(CP_DEBUG, 0);
3580         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3581
3582         /* ring 0 - compute and gfx */
3583         /* Set ring buffer size */
3584         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3585         rb_bufsz = order_base_2(ring->ring_size / 8);
3586         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3587 #ifdef __BIG_ENDIAN
3588         tmp |= BUF_SWAP_32BIT;
3589 #endif
3590         WREG32(CP_RB0_CNTL, tmp);
3591
3592         /* Initialize the ring buffer's read and write pointers */
3593         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3594         ring->wptr = 0;
3595         WREG32(CP_RB0_WPTR, ring->wptr);
3596
3597         /* set the wb address whether it's enabled or not */
3598         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3599         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3600
3601         if (rdev->wb.enabled)
3602                 WREG32(SCRATCH_UMSK, 0xff);
3603         else {
3604                 tmp |= RB_NO_UPDATE;
3605                 WREG32(SCRATCH_UMSK, 0);
3606         }
3607
3608         mdelay(1);
3609         WREG32(CP_RB0_CNTL, tmp);
3610
3611         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3612
3613         /* ring1  - compute only */
3614         /* Set ring buffer size */
3615         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3616         rb_bufsz = order_base_2(ring->ring_size / 8);
3617         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3618 #ifdef __BIG_ENDIAN
3619         tmp |= BUF_SWAP_32BIT;
3620 #endif
3621         WREG32(CP_RB1_CNTL, tmp);
3622
3623         /* Initialize the ring buffer's read and write pointers */
3624         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3625         ring->wptr = 0;
3626         WREG32(CP_RB1_WPTR, ring->wptr);
3627
3628         /* set the wb address whether it's enabled or not */
3629         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3630         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3631
3632         mdelay(1);
3633         WREG32(CP_RB1_CNTL, tmp);
3634
3635         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3636
3637         /* ring2 - compute only */
3638         /* Set ring buffer size */
3639         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3640         rb_bufsz = order_base_2(ring->ring_size / 8);
3641         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3642 #ifdef __BIG_ENDIAN
3643         tmp |= BUF_SWAP_32BIT;
3644 #endif
3645         WREG32(CP_RB2_CNTL, tmp);
3646
3647         /* Initialize the ring buffer's read and write pointers */
3648         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3649         ring->wptr = 0;
3650         WREG32(CP_RB2_WPTR, ring->wptr);
3651
3652         /* set the wb address whether it's enabled or not */
3653         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3654         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3655
3656         mdelay(1);
3657         WREG32(CP_RB2_CNTL, tmp);
3658
3659         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3660
3661         /* start the rings */
3662         si_cp_start(rdev);
3663         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3664         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3665         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3666         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3667         if (r) {
3668                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3669                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3670                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3671                 return r;
3672         }
3673         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3674         if (r) {
3675                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3676         }
3677         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3678         if (r) {
3679                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3680         }
3681
3682         si_enable_gui_idle_interrupt(rdev, true);
3683
3684         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3685                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3686
3687         return 0;
3688 }
3689
3690 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3691 {
3692         u32 reset_mask = 0;
3693         u32 tmp;
3694
3695         /* GRBM_STATUS */
3696         tmp = RREG32(GRBM_STATUS);
3697         if (tmp & (PA_BUSY | SC_BUSY |
3698                    BCI_BUSY | SX_BUSY |
3699                    TA_BUSY | VGT_BUSY |
3700                    DB_BUSY | CB_BUSY |
3701                    GDS_BUSY | SPI_BUSY |
3702                    IA_BUSY | IA_BUSY_NO_DMA))
3703                 reset_mask |= RADEON_RESET_GFX;
3704
3705         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3706                    CP_BUSY | CP_COHERENCY_BUSY))
3707                 reset_mask |= RADEON_RESET_CP;
3708
3709         if (tmp & GRBM_EE_BUSY)
3710                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3711
3712         /* GRBM_STATUS2 */
3713         tmp = RREG32(GRBM_STATUS2);
3714         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3715                 reset_mask |= RADEON_RESET_RLC;
3716
3717         /* DMA_STATUS_REG 0 */
3718         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3719         if (!(tmp & DMA_IDLE))
3720                 reset_mask |= RADEON_RESET_DMA;
3721
3722         /* DMA_STATUS_REG 1 */
3723         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3724         if (!(tmp & DMA_IDLE))
3725                 reset_mask |= RADEON_RESET_DMA1;
3726
3727         /* SRBM_STATUS2 */
3728         tmp = RREG32(SRBM_STATUS2);
3729         if (tmp & DMA_BUSY)
3730                 reset_mask |= RADEON_RESET_DMA;
3731
3732         if (tmp & DMA1_BUSY)
3733                 reset_mask |= RADEON_RESET_DMA1;
3734
3735         /* SRBM_STATUS */
3736         tmp = RREG32(SRBM_STATUS);
3737
3738         if (tmp & IH_BUSY)
3739                 reset_mask |= RADEON_RESET_IH;
3740
3741         if (tmp & SEM_BUSY)
3742                 reset_mask |= RADEON_RESET_SEM;
3743
3744         if (tmp & GRBM_RQ_PENDING)
3745                 reset_mask |= RADEON_RESET_GRBM;
3746
3747         if (tmp & VMC_BUSY)
3748                 reset_mask |= RADEON_RESET_VMC;
3749
3750         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3751                    MCC_BUSY | MCD_BUSY))
3752                 reset_mask |= RADEON_RESET_MC;
3753
3754         if (evergreen_is_display_hung(rdev))
3755                 reset_mask |= RADEON_RESET_DISPLAY;
3756
3757         /* VM_L2_STATUS */
3758         tmp = RREG32(VM_L2_STATUS);
3759         if (tmp & L2_BUSY)
3760                 reset_mask |= RADEON_RESET_VMC;
3761
3762         /* Skip MC reset as it's mostly likely not hung, just busy */
3763         if (reset_mask & RADEON_RESET_MC) {
3764                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3765                 reset_mask &= ~RADEON_RESET_MC;
3766         }
3767
3768         return reset_mask;
3769 }
3770
3771 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3772 {
3773         struct evergreen_mc_save save;
3774         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3775         u32 tmp;
3776
3777         if (reset_mask == 0)
3778                 return;
3779
3780         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3781
3782         evergreen_print_gpu_status_regs(rdev);
3783         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3784                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3785         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3786                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3787
3788         /* disable PG/CG */
3789         si_fini_pg(rdev);
3790         si_fini_cg(rdev);
3791
3792         /* stop the rlc */
3793         si_rlc_stop(rdev);
3794
3795         /* Disable CP parsing/prefetching */
3796         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3797
3798         if (reset_mask & RADEON_RESET_DMA) {
3799                 /* dma0 */
3800                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3801                 tmp &= ~DMA_RB_ENABLE;
3802                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3803         }
3804         if (reset_mask & RADEON_RESET_DMA1) {
3805                 /* dma1 */
3806                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3807                 tmp &= ~DMA_RB_ENABLE;
3808                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3809         }
3810
3811         udelay(50);
3812
3813         evergreen_mc_stop(rdev, &save);
3814         if (evergreen_mc_wait_for_idle(rdev)) {
3815                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3816         }
3817
3818         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3819                 grbm_soft_reset = SOFT_RESET_CB |
3820                         SOFT_RESET_DB |
3821                         SOFT_RESET_GDS |
3822                         SOFT_RESET_PA |
3823                         SOFT_RESET_SC |
3824                         SOFT_RESET_BCI |
3825                         SOFT_RESET_SPI |
3826                         SOFT_RESET_SX |
3827                         SOFT_RESET_TC |
3828                         SOFT_RESET_TA |
3829                         SOFT_RESET_VGT |
3830                         SOFT_RESET_IA;
3831         }
3832
3833         if (reset_mask & RADEON_RESET_CP) {
3834                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3835
3836                 srbm_soft_reset |= SOFT_RESET_GRBM;
3837         }
3838
3839         if (reset_mask & RADEON_RESET_DMA)
3840                 srbm_soft_reset |= SOFT_RESET_DMA;
3841
3842         if (reset_mask & RADEON_RESET_DMA1)
3843                 srbm_soft_reset |= SOFT_RESET_DMA1;
3844
3845         if (reset_mask & RADEON_RESET_DISPLAY)
3846                 srbm_soft_reset |= SOFT_RESET_DC;
3847
3848         if (reset_mask & RADEON_RESET_RLC)
3849                 grbm_soft_reset |= SOFT_RESET_RLC;
3850
3851         if (reset_mask & RADEON_RESET_SEM)
3852                 srbm_soft_reset |= SOFT_RESET_SEM;
3853
3854         if (reset_mask & RADEON_RESET_IH)
3855                 srbm_soft_reset |= SOFT_RESET_IH;
3856
3857         if (reset_mask & RADEON_RESET_GRBM)
3858                 srbm_soft_reset |= SOFT_RESET_GRBM;
3859
3860         if (reset_mask & RADEON_RESET_VMC)
3861                 srbm_soft_reset |= SOFT_RESET_VMC;
3862
3863         if (reset_mask & RADEON_RESET_MC)
3864                 srbm_soft_reset |= SOFT_RESET_MC;
3865
3866         if (grbm_soft_reset) {
3867                 tmp = RREG32(GRBM_SOFT_RESET);
3868                 tmp |= grbm_soft_reset;
3869                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3870                 WREG32(GRBM_SOFT_RESET, tmp);
3871                 tmp = RREG32(GRBM_SOFT_RESET);
3872
3873                 udelay(50);
3874
3875                 tmp &= ~grbm_soft_reset;
3876                 WREG32(GRBM_SOFT_RESET, tmp);
3877                 tmp = RREG32(GRBM_SOFT_RESET);
3878         }
3879
3880         if (srbm_soft_reset) {
3881                 tmp = RREG32(SRBM_SOFT_RESET);
3882                 tmp |= srbm_soft_reset;
3883                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3884                 WREG32(SRBM_SOFT_RESET, tmp);
3885                 tmp = RREG32(SRBM_SOFT_RESET);
3886
3887                 udelay(50);
3888
3889                 tmp &= ~srbm_soft_reset;
3890                 WREG32(SRBM_SOFT_RESET, tmp);
3891                 tmp = RREG32(SRBM_SOFT_RESET);
3892         }
3893
3894         /* Wait a little for things to settle down */
3895         udelay(50);
3896
3897         evergreen_mc_resume(rdev, &save);
3898         udelay(50);
3899
3900         evergreen_print_gpu_status_regs(rdev);
3901 }
3902
3903 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3904 {
3905         u32 tmp, i;
3906
3907         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3908         tmp |= SPLL_BYPASS_EN;
3909         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3910
3911         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3912         tmp |= SPLL_CTLREQ_CHG;
3913         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3914
3915         for (i = 0; i < rdev->usec_timeout; i++) {
3916                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3917                         break;
3918                 udelay(1);
3919         }
3920
3921         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3922         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3923         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3924
3925         tmp = RREG32(MPLL_CNTL_MODE);
3926         tmp &= ~MPLL_MCLK_SEL;
3927         WREG32(MPLL_CNTL_MODE, tmp);
3928 }
3929
3930 static void si_spll_powerdown(struct radeon_device *rdev)
3931 {
3932         u32 tmp;
3933
3934         tmp = RREG32(SPLL_CNTL_MODE);
3935         tmp |= SPLL_SW_DIR_CONTROL;
3936         WREG32(SPLL_CNTL_MODE, tmp);
3937
3938         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3939         tmp |= SPLL_RESET;
3940         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3941
3942         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3943         tmp |= SPLL_SLEEP;
3944         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3945
3946         tmp = RREG32(SPLL_CNTL_MODE);
3947         tmp &= ~SPLL_SW_DIR_CONTROL;
3948         WREG32(SPLL_CNTL_MODE, tmp);
3949 }
3950
3951 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3952 {
3953         struct evergreen_mc_save save;
3954         u32 tmp, i;
3955
3956         dev_info(rdev->dev, "GPU pci config reset\n");
3957
3958         /* disable dpm? */
3959
3960         /* disable cg/pg */
3961         si_fini_pg(rdev);
3962         si_fini_cg(rdev);
3963
3964         /* Disable CP parsing/prefetching */
3965         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3966         /* dma0 */
3967         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3968         tmp &= ~DMA_RB_ENABLE;
3969         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3970         /* dma1 */
3971         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3972         tmp &= ~DMA_RB_ENABLE;
3973         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3974         /* XXX other engines? */
3975
3976         /* halt the rlc, disable cp internal ints */
3977         si_rlc_stop(rdev);
3978
3979         udelay(50);
3980
3981         /* disable mem access */
3982         evergreen_mc_stop(rdev, &save);
3983         if (evergreen_mc_wait_for_idle(rdev)) {
3984                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3985         }
3986
3987         /* set mclk/sclk to bypass */
3988         si_set_clk_bypass_mode(rdev);
3989         /* powerdown spll */
3990         si_spll_powerdown(rdev);
3991         /* disable BM */
3992         pci_clear_master(rdev->pdev);
3993         /* reset */
3994         radeon_pci_config_reset(rdev);
3995         /* wait for asic to come out of reset */
3996         for (i = 0; i < rdev->usec_timeout; i++) {
3997                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3998                         break;
3999                 udelay(1);
4000         }
4001 }
4002
4003 int si_asic_reset(struct radeon_device *rdev, bool hard)
4004 {
4005         u32 reset_mask;
4006
4007         if (hard) {
4008                 si_gpu_pci_config_reset(rdev);
4009                 return 0;
4010         }
4011
4012         reset_mask = si_gpu_check_soft_reset(rdev);
4013
4014         if (reset_mask)
4015                 r600_set_bios_scratch_engine_hung(rdev, true);
4016
4017         /* try soft reset */
4018         si_gpu_soft_reset(rdev, reset_mask);
4019
4020         reset_mask = si_gpu_check_soft_reset(rdev);
4021
4022         /* try pci config reset */
4023         if (reset_mask && radeon_hard_reset)
4024                 si_gpu_pci_config_reset(rdev);
4025
4026         reset_mask = si_gpu_check_soft_reset(rdev);
4027
4028         if (!reset_mask)
4029                 r600_set_bios_scratch_engine_hung(rdev, false);
4030
4031         return 0;
4032 }
4033
4034 /**
4035  * si_gfx_is_lockup - Check if the GFX engine is locked up
4036  *
4037  * @rdev: radeon_device pointer
4038  * @ring: radeon_ring structure holding ring information
4039  *
4040  * Check if the GFX engine is locked up.
4041  * Returns true if the engine appears to be locked up, false if not.
4042  */
4043 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4044 {
4045         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4046
4047         if (!(reset_mask & (RADEON_RESET_GFX |
4048                             RADEON_RESET_COMPUTE |
4049                             RADEON_RESET_CP))) {
4050                 radeon_ring_lockup_update(rdev, ring);
4051                 return false;
4052         }
4053         return radeon_ring_test_lockup(rdev, ring);
4054 }
4055
4056 /* MC */
4057 static void si_mc_program(struct radeon_device *rdev)
4058 {
4059         struct evergreen_mc_save save;
4060         u32 tmp;
4061         int i, j;
4062
4063         /* Initialize HDP */
4064         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4065                 WREG32((0x2c14 + j), 0x00000000);
4066                 WREG32((0x2c18 + j), 0x00000000);
4067                 WREG32((0x2c1c + j), 0x00000000);
4068                 WREG32((0x2c20 + j), 0x00000000);
4069                 WREG32((0x2c24 + j), 0x00000000);
4070         }
4071         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4072
4073         evergreen_mc_stop(rdev, &save);
4074         if (radeon_mc_wait_for_idle(rdev)) {
4075                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4076         }
4077         if (!ASIC_IS_NODCE(rdev))
4078                 /* Lockout access through VGA aperture*/
4079                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4080         /* Update configuration */
4081         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4082                rdev->mc.vram_start >> 12);
4083         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4084                rdev->mc.vram_end >> 12);
4085         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4086                rdev->vram_scratch.gpu_addr >> 12);
4087         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4088         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4089         WREG32(MC_VM_FB_LOCATION, tmp);
4090         /* XXX double check these! */
4091         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4092         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4093         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4094         WREG32(MC_VM_AGP_BASE, 0);
4095         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4096         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4097         if (radeon_mc_wait_for_idle(rdev)) {
4098                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4099         }
4100         evergreen_mc_resume(rdev, &save);
4101         if (!ASIC_IS_NODCE(rdev)) {
4102                 /* we need to own VRAM, so turn off the VGA renderer here
4103                  * to stop it overwriting our objects */
4104                 rv515_vga_render_disable(rdev);
4105         }
4106 }
4107
4108 void si_vram_gtt_location(struct radeon_device *rdev,
4109                           struct radeon_mc *mc)
4110 {
4111         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4112                 /* leave room for at least 1024M GTT */
4113                 dev_warn(rdev->dev, "limiting VRAM\n");
4114                 mc->real_vram_size = 0xFFC0000000ULL;
4115                 mc->mc_vram_size = 0xFFC0000000ULL;
4116         }
4117         radeon_vram_location(rdev, &rdev->mc, 0);
4118         rdev->mc.gtt_base_align = 0;
4119         radeon_gtt_location(rdev, mc);
4120 }
4121
4122 static int si_mc_init(struct radeon_device *rdev)
4123 {
4124         u32 tmp;
4125         int chansize, numchan;
4126
4127         /* Get VRAM informations */
4128         rdev->mc.vram_is_ddr = true;
4129         tmp = RREG32(MC_ARB_RAMCFG);
4130         if (tmp & CHANSIZE_OVERRIDE) {
4131                 chansize = 16;
4132         } else if (tmp & CHANSIZE_MASK) {
4133                 chansize = 64;
4134         } else {
4135                 chansize = 32;
4136         }
4137         tmp = RREG32(MC_SHARED_CHMAP);
4138         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4139         case 0:
4140         default:
4141                 numchan = 1;
4142                 break;
4143         case 1:
4144                 numchan = 2;
4145                 break;
4146         case 2:
4147                 numchan = 4;
4148                 break;
4149         case 3:
4150                 numchan = 8;
4151                 break;
4152         case 4:
4153                 numchan = 3;
4154                 break;
4155         case 5:
4156                 numchan = 6;
4157                 break;
4158         case 6:
4159                 numchan = 10;
4160                 break;
4161         case 7:
4162                 numchan = 12;
4163                 break;
4164         case 8:
4165                 numchan = 16;
4166                 break;
4167         }
4168         rdev->mc.vram_width = numchan * chansize;
4169         /* Could aper size report 0 ? */
4170         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4171         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4172         /* size in MB on si */
4173         tmp = RREG32(CONFIG_MEMSIZE);
4174         /* some boards may have garbage in the upper 16 bits */
4175         if (tmp & 0xffff0000) {
4176                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4177                 if (tmp & 0xffff)
4178                         tmp &= 0xffff;
4179         }
4180         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4181         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4182         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4183         si_vram_gtt_location(rdev, &rdev->mc);
4184         radeon_update_bandwidth_info(rdev);
4185
4186         return 0;
4187 }
4188
4189 /*
4190  * GART
4191  */
4192 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4193 {
4194         /* flush hdp cache */
4195         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4196
4197         /* bits 0-15 are the VM contexts0-15 */
4198         WREG32(VM_INVALIDATE_REQUEST, 1);
4199 }
4200
4201 static int si_pcie_gart_enable(struct radeon_device *rdev)
4202 {
4203         int r, i;
4204
4205         if (rdev->gart.robj == NULL) {
4206                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4207                 return -EINVAL;
4208         }
4209         r = radeon_gart_table_vram_pin(rdev);
4210         if (r)
4211                 return r;
4212         /* Setup TLB control */
4213         WREG32(MC_VM_MX_L1_TLB_CNTL,
4214                (0xA << 7) |
4215                ENABLE_L1_TLB |
4216                ENABLE_L1_FRAGMENT_PROCESSING |
4217                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4218                ENABLE_ADVANCED_DRIVER_MODEL |
4219                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4220         /* Setup L2 cache */
4221         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4222                ENABLE_L2_FRAGMENT_PROCESSING |
4223                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4224                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4225                EFFECTIVE_L2_QUEUE_SIZE(7) |
4226                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4227         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4228         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4229                BANK_SELECT(4) |
4230                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4231         /* setup context0 */
4232         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4233         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4234         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4235         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4236                         (u32)(rdev->dummy_page.addr >> 12));
4237         WREG32(VM_CONTEXT0_CNTL2, 0);
4238         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4239                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4240
4241         WREG32(0x15D4, 0);
4242         WREG32(0x15D8, 0);
4243         WREG32(0x15DC, 0);
4244
4245         /* empty context1-15 */
4246         /* set vm size, must be a multiple of 4 */
4247         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4248         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4249         /* Assign the pt base to something valid for now; the pts used for
4250          * the VMs are determined by the application and setup and assigned
4251          * on the fly in the vm part of radeon_gart.c
4252          */
4253         for (i = 1; i < 16; i++) {
4254                 if (i < 8)
4255                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4256                                rdev->vm_manager.saved_table_addr[i]);
4257                 else
4258                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4259                                rdev->vm_manager.saved_table_addr[i]);
4260         }
4261
4262         /* enable context1-15 */
4263         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4264                (u32)(rdev->dummy_page.addr >> 12));
4265         WREG32(VM_CONTEXT1_CNTL2, 4);
4266         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4267                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4268                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4269                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4270                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4271                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4272                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4273                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4274                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4275                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4276                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4277                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4278                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4279                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4280
4281         si_pcie_gart_tlb_flush(rdev);
4282         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4283                  (unsigned)(rdev->mc.gtt_size >> 20),
4284                  (unsigned long long)rdev->gart.table_addr);
4285         rdev->gart.ready = true;
4286         return 0;
4287 }
4288
4289 static void si_pcie_gart_disable(struct radeon_device *rdev)
4290 {
4291         unsigned i;
4292
4293         for (i = 1; i < 16; ++i) {
4294                 uint32_t reg;
4295                 if (i < 8)
4296                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4297                 else
4298                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4299                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4300         }
4301
4302         /* Disable all tables */
4303         WREG32(VM_CONTEXT0_CNTL, 0);
4304         WREG32(VM_CONTEXT1_CNTL, 0);
4305         /* Setup TLB control */
4306         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4307                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4308         /* Setup L2 cache */
4309         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4310                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4311                EFFECTIVE_L2_QUEUE_SIZE(7) |
4312                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4313         WREG32(VM_L2_CNTL2, 0);
4314         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4315                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4316         radeon_gart_table_vram_unpin(rdev);
4317 }
4318
4319 static void si_pcie_gart_fini(struct radeon_device *rdev)
4320 {
4321         si_pcie_gart_disable(rdev);
4322         radeon_gart_table_vram_free(rdev);
4323         radeon_gart_fini(rdev);
4324 }
4325
4326 /* vm parser */
4327 static bool si_vm_reg_valid(u32 reg)
4328 {
4329         /* context regs are fine */
4330         if (reg >= 0x28000)
4331                 return true;
4332
4333         /* shader regs are also fine */
4334         if (reg >= 0xB000 && reg < 0xC000)
4335                 return true;
4336
4337         /* check config regs */
4338         switch (reg) {
4339         case GRBM_GFX_INDEX:
4340         case CP_STRMOUT_CNTL:
4341         case VGT_VTX_VECT_EJECT_REG:
4342         case VGT_CACHE_INVALIDATION:
4343         case VGT_ESGS_RING_SIZE:
4344         case VGT_GSVS_RING_SIZE:
4345         case VGT_GS_VERTEX_REUSE:
4346         case VGT_PRIMITIVE_TYPE:
4347         case VGT_INDEX_TYPE:
4348         case VGT_NUM_INDICES:
4349         case VGT_NUM_INSTANCES:
4350         case VGT_TF_RING_SIZE:
4351         case VGT_HS_OFFCHIP_PARAM:
4352         case VGT_TF_MEMORY_BASE:
4353         case PA_CL_ENHANCE:
4354         case PA_SU_LINE_STIPPLE_VALUE:
4355         case PA_SC_LINE_STIPPLE_STATE:
4356         case PA_SC_ENHANCE:
4357         case SQC_CACHES:
4358         case SPI_STATIC_THREAD_MGMT_1:
4359         case SPI_STATIC_THREAD_MGMT_2:
4360         case SPI_STATIC_THREAD_MGMT_3:
4361         case SPI_PS_MAX_WAVE_ID:
4362         case SPI_CONFIG_CNTL:
4363         case SPI_CONFIG_CNTL_1:
4364         case TA_CNTL_AUX:
4365         case TA_CS_BC_BASE_ADDR:
4366                 return true;
4367         default:
4368                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4369                 return false;
4370         }
4371 }
4372
4373 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4374                                   u32 *ib, struct radeon_cs_packet *pkt)
4375 {
4376         switch (pkt->opcode) {
4377         case PACKET3_NOP:
4378         case PACKET3_SET_BASE:
4379         case PACKET3_SET_CE_DE_COUNTERS:
4380         case PACKET3_LOAD_CONST_RAM:
4381         case PACKET3_WRITE_CONST_RAM:
4382         case PACKET3_WRITE_CONST_RAM_OFFSET:
4383         case PACKET3_DUMP_CONST_RAM:
4384         case PACKET3_INCREMENT_CE_COUNTER:
4385         case PACKET3_WAIT_ON_DE_COUNTER:
4386         case PACKET3_CE_WRITE:
4387                 break;
4388         default:
4389                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4390                 return -EINVAL;
4391         }
4392         return 0;
4393 }
4394
4395 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4396 {
4397         u32 start_reg, reg, i;
4398         u32 command = ib[idx + 4];
4399         u32 info = ib[idx + 1];
4400         u32 idx_value = ib[idx];
4401         if (command & PACKET3_CP_DMA_CMD_SAS) {
4402                 /* src address space is register */
4403                 if (((info & 0x60000000) >> 29) == 0) {
4404                         start_reg = idx_value << 2;
4405                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4406                                 reg = start_reg;
4407                                 if (!si_vm_reg_valid(reg)) {
4408                                         DRM_ERROR("CP DMA Bad SRC register\n");
4409                                         return -EINVAL;
4410                                 }
4411                         } else {
4412                                 for (i = 0; i < (command & 0x1fffff); i++) {
4413                                         reg = start_reg + (4 * i);
4414                                         if (!si_vm_reg_valid(reg)) {
4415                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4416                                                 return -EINVAL;
4417                                         }
4418                                 }
4419                         }
4420                 }
4421         }
4422         if (command & PACKET3_CP_DMA_CMD_DAS) {
4423                 /* dst address space is register */
4424                 if (((info & 0x00300000) >> 20) == 0) {
4425                         start_reg = ib[idx + 2];
4426                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4427                                 reg = start_reg;
4428                                 if (!si_vm_reg_valid(reg)) {
4429                                         DRM_ERROR("CP DMA Bad DST register\n");
4430                                         return -EINVAL;
4431                                 }
4432                         } else {
4433                                 for (i = 0; i < (command & 0x1fffff); i++) {
4434                                         reg = start_reg + (4 * i);
4435                                 if (!si_vm_reg_valid(reg)) {
4436                                                 DRM_ERROR("CP DMA Bad DST register\n");
4437                                                 return -EINVAL;
4438                                         }
4439                                 }
4440                         }
4441                 }
4442         }
4443         return 0;
4444 }
4445
4446 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4447                                    u32 *ib, struct radeon_cs_packet *pkt)
4448 {
4449         int r;
4450         u32 idx = pkt->idx + 1;
4451         u32 idx_value = ib[idx];
4452         u32 start_reg, end_reg, reg, i;
4453
4454         switch (pkt->opcode) {
4455         case PACKET3_NOP:
4456         case PACKET3_SET_BASE:
4457         case PACKET3_CLEAR_STATE:
4458         case PACKET3_INDEX_BUFFER_SIZE:
4459         case PACKET3_DISPATCH_DIRECT:
4460         case PACKET3_DISPATCH_INDIRECT:
4461         case PACKET3_ALLOC_GDS:
4462         case PACKET3_WRITE_GDS_RAM:
4463         case PACKET3_ATOMIC_GDS:
4464         case PACKET3_ATOMIC:
4465         case PACKET3_OCCLUSION_QUERY:
4466         case PACKET3_SET_PREDICATION:
4467         case PACKET3_COND_EXEC:
4468         case PACKET3_PRED_EXEC:
4469         case PACKET3_DRAW_INDIRECT:
4470         case PACKET3_DRAW_INDEX_INDIRECT:
4471         case PACKET3_INDEX_BASE:
4472         case PACKET3_DRAW_INDEX_2:
4473         case PACKET3_CONTEXT_CONTROL:
4474         case PACKET3_INDEX_TYPE:
4475         case PACKET3_DRAW_INDIRECT_MULTI:
4476         case PACKET3_DRAW_INDEX_AUTO:
4477         case PACKET3_DRAW_INDEX_IMMD:
4478         case PACKET3_NUM_INSTANCES:
4479         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4480         case PACKET3_STRMOUT_BUFFER_UPDATE:
4481         case PACKET3_DRAW_INDEX_OFFSET_2:
4482         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4483         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4484         case PACKET3_MPEG_INDEX:
4485         case PACKET3_WAIT_REG_MEM:
4486         case PACKET3_MEM_WRITE:
4487         case PACKET3_PFP_SYNC_ME:
4488         case PACKET3_SURFACE_SYNC:
4489         case PACKET3_EVENT_WRITE:
4490         case PACKET3_EVENT_WRITE_EOP:
4491         case PACKET3_EVENT_WRITE_EOS:
4492         case PACKET3_SET_CONTEXT_REG:
4493         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4494         case PACKET3_SET_SH_REG:
4495         case PACKET3_SET_SH_REG_OFFSET:
4496         case PACKET3_INCREMENT_DE_COUNTER:
4497         case PACKET3_WAIT_ON_CE_COUNTER:
4498         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4499         case PACKET3_ME_WRITE:
4500                 break;
4501         case PACKET3_COPY_DATA:
4502                 if ((idx_value & 0xf00) == 0) {
4503                         reg = ib[idx + 3] * 4;
4504                         if (!si_vm_reg_valid(reg))
4505                                 return -EINVAL;
4506                 }
4507                 break;
4508         case PACKET3_WRITE_DATA:
4509                 if ((idx_value & 0xf00) == 0) {
4510                         start_reg = ib[idx + 1] * 4;
4511                         if (idx_value & 0x10000) {
4512                                 if (!si_vm_reg_valid(start_reg))
4513                                         return -EINVAL;
4514                         } else {
4515                                 for (i = 0; i < (pkt->count - 2); i++) {
4516                                         reg = start_reg + (4 * i);
4517                                         if (!si_vm_reg_valid(reg))
4518                                                 return -EINVAL;
4519                                 }
4520                         }
4521                 }
4522                 break;
4523         case PACKET3_COND_WRITE:
4524                 if (idx_value & 0x100) {
4525                         reg = ib[idx + 5] * 4;
4526                         if (!si_vm_reg_valid(reg))
4527                                 return -EINVAL;
4528                 }
4529                 break;
4530         case PACKET3_COPY_DW:
4531                 if (idx_value & 0x2) {
4532                         reg = ib[idx + 3] * 4;
4533                         if (!si_vm_reg_valid(reg))
4534                                 return -EINVAL;
4535                 }
4536                 break;
4537         case PACKET3_SET_CONFIG_REG:
4538                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4539                 end_reg = 4 * pkt->count + start_reg - 4;
4540                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4541                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4542                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4543                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4544                         return -EINVAL;
4545                 }
4546                 for (i = 0; i < pkt->count; i++) {
4547                         reg = start_reg + (4 * i);
4548                         if (!si_vm_reg_valid(reg))
4549                                 return -EINVAL;
4550                 }
4551                 break;
4552         case PACKET3_CP_DMA:
4553                 r = si_vm_packet3_cp_dma_check(ib, idx);
4554                 if (r)
4555                         return r;
4556                 break;
4557         default:
4558                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4559                 return -EINVAL;
4560         }
4561         return 0;
4562 }
4563
4564 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4565                                        u32 *ib, struct radeon_cs_packet *pkt)
4566 {
4567         int r;
4568         u32 idx = pkt->idx + 1;
4569         u32 idx_value = ib[idx];
4570         u32 start_reg, reg, i;
4571
4572         switch (pkt->opcode) {
4573         case PACKET3_NOP:
4574         case PACKET3_SET_BASE:
4575         case PACKET3_CLEAR_STATE:
4576         case PACKET3_DISPATCH_DIRECT:
4577         case PACKET3_DISPATCH_INDIRECT:
4578         case PACKET3_ALLOC_GDS:
4579         case PACKET3_WRITE_GDS_RAM:
4580         case PACKET3_ATOMIC_GDS:
4581         case PACKET3_ATOMIC:
4582         case PACKET3_OCCLUSION_QUERY:
4583         case PACKET3_SET_PREDICATION:
4584         case PACKET3_COND_EXEC:
4585         case PACKET3_PRED_EXEC:
4586         case PACKET3_CONTEXT_CONTROL:
4587         case PACKET3_STRMOUT_BUFFER_UPDATE:
4588         case PACKET3_WAIT_REG_MEM:
4589         case PACKET3_MEM_WRITE:
4590         case PACKET3_PFP_SYNC_ME:
4591         case PACKET3_SURFACE_SYNC:
4592         case PACKET3_EVENT_WRITE:
4593         case PACKET3_EVENT_WRITE_EOP:
4594         case PACKET3_EVENT_WRITE_EOS:
4595         case PACKET3_SET_CONTEXT_REG:
4596         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4597         case PACKET3_SET_SH_REG:
4598         case PACKET3_SET_SH_REG_OFFSET:
4599         case PACKET3_INCREMENT_DE_COUNTER:
4600         case PACKET3_WAIT_ON_CE_COUNTER:
4601         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4602         case PACKET3_ME_WRITE:
4603                 break;
4604         case PACKET3_COPY_DATA:
4605                 if ((idx_value & 0xf00) == 0) {
4606                         reg = ib[idx + 3] * 4;
4607                         if (!si_vm_reg_valid(reg))
4608                                 return -EINVAL;
4609                 }
4610                 break;
4611         case PACKET3_WRITE_DATA:
4612                 if ((idx_value & 0xf00) == 0) {
4613                         start_reg = ib[idx + 1] * 4;
4614                         if (idx_value & 0x10000) {
4615                                 if (!si_vm_reg_valid(start_reg))
4616                                         return -EINVAL;
4617                         } else {
4618                                 for (i = 0; i < (pkt->count - 2); i++) {
4619                                         reg = start_reg + (4 * i);
4620                                         if (!si_vm_reg_valid(reg))
4621                                                 return -EINVAL;
4622                                 }
4623                         }
4624                 }
4625                 break;
4626         case PACKET3_COND_WRITE:
4627                 if (idx_value & 0x100) {
4628                         reg = ib[idx + 5] * 4;
4629                         if (!si_vm_reg_valid(reg))
4630                                 return -EINVAL;
4631                 }
4632                 break;
4633         case PACKET3_COPY_DW:
4634                 if (idx_value & 0x2) {
4635                         reg = ib[idx + 3] * 4;
4636                         if (!si_vm_reg_valid(reg))
4637                                 return -EINVAL;
4638                 }
4639                 break;
4640         case PACKET3_CP_DMA:
4641                 r = si_vm_packet3_cp_dma_check(ib, idx);
4642                 if (r)
4643                         return r;
4644                 break;
4645         default:
4646                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4647                 return -EINVAL;
4648         }
4649         return 0;
4650 }
4651
4652 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4653 {
4654         int ret = 0;
4655         u32 idx = 0, i;
4656         struct radeon_cs_packet pkt;
4657
4658         do {
4659                 pkt.idx = idx;
4660                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4661                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4662                 pkt.one_reg_wr = 0;
4663                 switch (pkt.type) {
4664                 case RADEON_PACKET_TYPE0:
4665                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4666                         ret = -EINVAL;
4667                         break;
4668                 case RADEON_PACKET_TYPE2:
4669                         idx += 1;
4670                         break;
4671                 case RADEON_PACKET_TYPE3:
4672                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4673                         if (ib->is_const_ib)
4674                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4675                         else {
4676                                 switch (ib->ring) {
4677                                 case RADEON_RING_TYPE_GFX_INDEX:
4678                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4679                                         break;
4680                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4681                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4682                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4683                                         break;
4684                                 default:
4685                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4686                                         ret = -EINVAL;
4687                                         break;
4688                                 }
4689                         }
4690                         idx += pkt.count + 2;
4691                         break;
4692                 default:
4693                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4694                         ret = -EINVAL;
4695                         break;
4696                 }
4697                 if (ret) {
4698                         for (i = 0; i < ib->length_dw; i++) {
4699                                 if (i == idx)
4700                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4701                                 else
4702                                         printk("\t0x%08x\n", ib->ptr[i]);
4703                         }
4704                         break;
4705                 }
4706         } while (idx < ib->length_dw);
4707
4708         return ret;
4709 }
4710
4711 /*
4712  * vm
4713  */
4714 int si_vm_init(struct radeon_device *rdev)
4715 {
4716         /* number of VMs */
4717         rdev->vm_manager.nvm = 16;
4718         /* base offset of vram pages */
4719         rdev->vm_manager.vram_base_offset = 0;
4720
4721         return 0;
4722 }
4723
4724 void si_vm_fini(struct radeon_device *rdev)
4725 {
4726 }
4727
4728 /**
4729  * si_vm_decode_fault - print human readable fault info
4730  *
4731  * @rdev: radeon_device pointer
4732  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4733  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4734  *
4735  * Print human readable fault information (SI).
4736  */
4737 static void si_vm_decode_fault(struct radeon_device *rdev,
4738                                u32 status, u32 addr)
4739 {
4740         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4741         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4742         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4743         char *block;
4744
4745         if (rdev->family == CHIP_TAHITI) {
4746                 switch (mc_id) {
4747                 case 160:
4748                 case 144:
4749                 case 96:
4750                 case 80:
4751                 case 224:
4752                 case 208:
4753                 case 32:
4754                 case 16:
4755                         block = "CB";
4756                         break;
4757                 case 161:
4758                 case 145:
4759                 case 97:
4760                 case 81:
4761                 case 225:
4762                 case 209:
4763                 case 33:
4764                 case 17:
4765                         block = "CB_FMASK";
4766                         break;
4767                 case 162:
4768                 case 146:
4769                 case 98:
4770                 case 82:
4771                 case 226:
4772                 case 210:
4773                 case 34:
4774                 case 18:
4775                         block = "CB_CMASK";
4776                         break;
4777                 case 163:
4778                 case 147:
4779                 case 99:
4780                 case 83:
4781                 case 227:
4782                 case 211:
4783                 case 35:
4784                 case 19:
4785                         block = "CB_IMMED";
4786                         break;
4787                 case 164:
4788                 case 148:
4789                 case 100:
4790                 case 84:
4791                 case 228:
4792                 case 212:
4793                 case 36:
4794                 case 20:
4795                         block = "DB";
4796                         break;
4797                 case 165:
4798                 case 149:
4799                 case 101:
4800                 case 85:
4801                 case 229:
4802                 case 213:
4803                 case 37:
4804                 case 21:
4805                         block = "DB_HTILE";
4806                         break;
4807                 case 167:
4808                 case 151:
4809                 case 103:
4810                 case 87:
4811                 case 231:
4812                 case 215:
4813                 case 39:
4814                 case 23:
4815                         block = "DB_STEN";
4816                         break;
4817                 case 72:
4818                 case 68:
4819                 case 64:
4820                 case 8:
4821                 case 4:
4822                 case 0:
4823                 case 136:
4824                 case 132:
4825                 case 128:
4826                 case 200:
4827                 case 196:
4828                 case 192:
4829                         block = "TC";
4830                         break;
4831                 case 112:
4832                 case 48:
4833                         block = "CP";
4834                         break;
4835                 case 49:
4836                 case 177:
4837                 case 50:
4838                 case 178:
4839                         block = "SH";
4840                         break;
4841                 case 53:
4842                 case 190:
4843                         block = "VGT";
4844                         break;
4845                 case 117:
4846                         block = "IH";
4847                         break;
4848                 case 51:
4849                 case 115:
4850                         block = "RLC";
4851                         break;
4852                 case 119:
4853                 case 183:
4854                         block = "DMA0";
4855                         break;
4856                 case 61:
4857                         block = "DMA1";
4858                         break;
4859                 case 248:
4860                 case 120:
4861                         block = "HDP";
4862                         break;
4863                 default:
4864                         block = "unknown";
4865                         break;
4866                 }
4867         } else {
4868                 switch (mc_id) {
4869                 case 32:
4870                 case 16:
4871                 case 96:
4872                 case 80:
4873                 case 160:
4874                 case 144:
4875                 case 224:
4876                 case 208:
4877                         block = "CB";
4878                         break;
4879                 case 33:
4880                 case 17:
4881                 case 97:
4882                 case 81:
4883                 case 161:
4884                 case 145:
4885                 case 225:
4886                 case 209:
4887                         block = "CB_FMASK";
4888                         break;
4889                 case 34:
4890                 case 18:
4891                 case 98:
4892                 case 82:
4893                 case 162:
4894                 case 146:
4895                 case 226:
4896                 case 210:
4897                         block = "CB_CMASK";
4898                         break;
4899                 case 35:
4900                 case 19:
4901                 case 99:
4902                 case 83:
4903                 case 163:
4904                 case 147:
4905                 case 227:
4906                 case 211:
4907                         block = "CB_IMMED";
4908                         break;
4909                 case 36:
4910                 case 20:
4911                 case 100:
4912                 case 84:
4913                 case 164:
4914                 case 148:
4915                 case 228:
4916                 case 212:
4917                         block = "DB";
4918                         break;
4919                 case 37:
4920                 case 21:
4921                 case 101:
4922                 case 85:
4923                 case 165:
4924                 case 149:
4925                 case 229:
4926                 case 213:
4927                         block = "DB_HTILE";
4928                         break;
4929                 case 39:
4930                 case 23:
4931                 case 103:
4932                 case 87:
4933                 case 167:
4934                 case 151:
4935                 case 231:
4936                 case 215:
4937                         block = "DB_STEN";
4938                         break;
4939                 case 72:
4940                 case 68:
4941                 case 8:
4942                 case 4:
4943                 case 136:
4944                 case 132:
4945                 case 200:
4946                 case 196:
4947                         block = "TC";
4948                         break;
4949                 case 112:
4950                 case 48:
4951                         block = "CP";
4952                         break;
4953                 case 49:
4954                 case 177:
4955                 case 50:
4956                 case 178:
4957                         block = "SH";
4958                         break;
4959                 case 53:
4960                         block = "VGT";
4961                         break;
4962                 case 117:
4963                         block = "IH";
4964                         break;
4965                 case 51:
4966                 case 115:
4967                         block = "RLC";
4968                         break;
4969                 case 119:
4970                 case 183:
4971                         block = "DMA0";
4972                         break;
4973                 case 61:
4974                         block = "DMA1";
4975                         break;
4976                 case 248:
4977                 case 120:
4978                         block = "HDP";
4979                         break;
4980                 default:
4981                         block = "unknown";
4982                         break;
4983                 }
4984         }
4985
4986         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4987                protections, vmid, addr,
4988                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4989                block, mc_id);
4990 }
4991
4992 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
4993                  unsigned vm_id, uint64_t pd_addr)
4994 {
4995         /* write new base address */
4996         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4997         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4998                                  WRITE_DATA_DST_SEL(0)));
4999
5000         if (vm_id < 8) {
5001                 radeon_ring_write(ring,
5002                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5003         } else {
5004                 radeon_ring_write(ring,
5005                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5006         }
5007         radeon_ring_write(ring, 0);
5008         radeon_ring_write(ring, pd_addr >> 12);
5009
5010         /* flush hdp cache */
5011         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5012         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5013                                  WRITE_DATA_DST_SEL(0)));
5014         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5015         radeon_ring_write(ring, 0);
5016         radeon_ring_write(ring, 0x1);
5017
5018         /* bits 0-15 are the VM contexts0-15 */
5019         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5020         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5021                                  WRITE_DATA_DST_SEL(0)));
5022         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5023         radeon_ring_write(ring, 0);
5024         radeon_ring_write(ring, 1 << vm_id);
5025
5026         /* wait for the invalidate to complete */
5027         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5028         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5029                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5030         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5031         radeon_ring_write(ring, 0);
5032         radeon_ring_write(ring, 0); /* ref */
5033         radeon_ring_write(ring, 0); /* mask */
5034         radeon_ring_write(ring, 0x20); /* poll interval */
5035
5036         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5037         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5038         radeon_ring_write(ring, 0x0);
5039 }
5040
5041 /*
5042  *  Power and clock gating
5043  */
5044 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5045 {
5046         int i;
5047
5048         for (i = 0; i < rdev->usec_timeout; i++) {
5049                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5050                         break;
5051                 udelay(1);
5052         }
5053
5054         for (i = 0; i < rdev->usec_timeout; i++) {
5055                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5056                         break;
5057                 udelay(1);
5058         }
5059 }
5060
5061 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5062                                          bool enable)
5063 {
5064         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5065         u32 mask;
5066         int i;
5067
5068         if (enable)
5069                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5070         else
5071                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5072         WREG32(CP_INT_CNTL_RING0, tmp);
5073
5074         if (!enable) {
5075                 /* read a gfx register */
5076                 tmp = RREG32(DB_DEPTH_INFO);
5077
5078                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5079                 for (i = 0; i < rdev->usec_timeout; i++) {
5080                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5081                                 break;
5082                         udelay(1);
5083                 }
5084         }
5085 }
5086
5087 static void si_set_uvd_dcm(struct radeon_device *rdev,
5088                            bool sw_mode)
5089 {
5090         u32 tmp, tmp2;
5091
5092         tmp = RREG32(UVD_CGC_CTRL);
5093         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5094         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5095
5096         if (sw_mode) {
5097                 tmp &= ~0x7ffff800;
5098                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5099         } else {
5100                 tmp |= 0x7ffff800;
5101                 tmp2 = 0;
5102         }
5103
5104         WREG32(UVD_CGC_CTRL, tmp);
5105         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5106 }
5107
5108 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5109 {
5110         bool hw_mode = true;
5111
5112         if (hw_mode) {
5113                 si_set_uvd_dcm(rdev, false);
5114         } else {
5115                 u32 tmp = RREG32(UVD_CGC_CTRL);
5116                 tmp &= ~DCM;
5117                 WREG32(UVD_CGC_CTRL, tmp);
5118         }
5119 }
5120
5121 static u32 si_halt_rlc(struct radeon_device *rdev)
5122 {
5123         u32 data, orig;
5124
5125         orig = data = RREG32(RLC_CNTL);
5126
5127         if (data & RLC_ENABLE) {
5128                 data &= ~RLC_ENABLE;
5129                 WREG32(RLC_CNTL, data);
5130
5131                 si_wait_for_rlc_serdes(rdev);
5132         }
5133
5134         return orig;
5135 }
5136
5137 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5138 {
5139         u32 tmp;
5140
5141         tmp = RREG32(RLC_CNTL);
5142         if (tmp != rlc)
5143                 WREG32(RLC_CNTL, rlc);
5144 }
5145
5146 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5147 {
5148         u32 data, orig;
5149
5150         orig = data = RREG32(DMA_PG);
5151         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5152                 data |= PG_CNTL_ENABLE;
5153         else
5154                 data &= ~PG_CNTL_ENABLE;
5155         if (orig != data)
5156                 WREG32(DMA_PG, data);
5157 }
5158
5159 static void si_init_dma_pg(struct radeon_device *rdev)
5160 {
5161         u32 tmp;
5162
5163         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5164         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5165
5166         for (tmp = 0; tmp < 5; tmp++)
5167                 WREG32(DMA_PGFSM_WRITE, 0);
5168 }
5169
5170 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5171                                bool enable)
5172 {
5173         u32 tmp;
5174
5175         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5176                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5177                 WREG32(RLC_TTOP_D, tmp);
5178
5179                 tmp = RREG32(RLC_PG_CNTL);
5180                 tmp |= GFX_PG_ENABLE;
5181                 WREG32(RLC_PG_CNTL, tmp);
5182
5183                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5184                 tmp |= AUTO_PG_EN;
5185                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5186         } else {
5187                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5188                 tmp &= ~AUTO_PG_EN;
5189                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5190
5191                 tmp = RREG32(DB_RENDER_CONTROL);
5192         }
5193 }
5194
5195 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5196 {
5197         u32 tmp;
5198
5199         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5200
5201         tmp = RREG32(RLC_PG_CNTL);
5202         tmp |= GFX_PG_SRC;
5203         WREG32(RLC_PG_CNTL, tmp);
5204
5205         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5206
5207         tmp = RREG32(RLC_AUTO_PG_CTRL);
5208
5209         tmp &= ~GRBM_REG_SGIT_MASK;
5210         tmp |= GRBM_REG_SGIT(0x700);
5211         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5212         WREG32(RLC_AUTO_PG_CTRL, tmp);
5213 }
5214
5215 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5216 {
5217         u32 mask = 0, tmp, tmp1;
5218         int i;
5219
5220         si_select_se_sh(rdev, se, sh);
5221         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5222         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5223         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5224
5225         tmp &= 0xffff0000;
5226
5227         tmp |= tmp1;
5228         tmp >>= 16;
5229
5230         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5231                 mask <<= 1;
5232                 mask |= 1;
5233         }
5234
5235         return (~tmp) & mask;
5236 }
5237
5238 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5239 {
5240         u32 i, j, k, active_cu_number = 0;
5241         u32 mask, counter, cu_bitmap;
5242         u32 tmp = 0;
5243
5244         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5245                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5246                         mask = 1;
5247                         cu_bitmap = 0;
5248                         counter  = 0;
5249                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5250                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5251                                         if (counter < 2)
5252                                                 cu_bitmap |= mask;
5253                                         counter++;
5254                                 }
5255                                 mask <<= 1;
5256                         }
5257
5258                         active_cu_number += counter;
5259                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5260                 }
5261         }
5262
5263         WREG32(RLC_PG_AO_CU_MASK, tmp);
5264
5265         tmp = RREG32(RLC_MAX_PG_CU);
5266         tmp &= ~MAX_PU_CU_MASK;
5267         tmp |= MAX_PU_CU(active_cu_number);
5268         WREG32(RLC_MAX_PG_CU, tmp);
5269 }
5270
5271 static void si_enable_cgcg(struct radeon_device *rdev,
5272                            bool enable)
5273 {
5274         u32 data, orig, tmp;
5275
5276         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5277
5278         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5279                 si_enable_gui_idle_interrupt(rdev, true);
5280
5281                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5282
5283                 tmp = si_halt_rlc(rdev);
5284
5285                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5286                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5287                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5288
5289                 si_wait_for_rlc_serdes(rdev);
5290
5291                 si_update_rlc(rdev, tmp);
5292
5293                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5294
5295                 data |= CGCG_EN | CGLS_EN;
5296         } else {
5297                 si_enable_gui_idle_interrupt(rdev, false);
5298
5299                 RREG32(CB_CGTT_SCLK_CTRL);
5300                 RREG32(CB_CGTT_SCLK_CTRL);
5301                 RREG32(CB_CGTT_SCLK_CTRL);
5302                 RREG32(CB_CGTT_SCLK_CTRL);
5303
5304                 data &= ~(CGCG_EN | CGLS_EN);
5305         }
5306
5307         if (orig != data)
5308                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5309 }
5310
5311 static void si_enable_mgcg(struct radeon_device *rdev,
5312                            bool enable)
5313 {
5314         u32 data, orig, tmp = 0;
5315
5316         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5317                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5318                 data = 0x96940200;
5319                 if (orig != data)
5320                         WREG32(CGTS_SM_CTRL_REG, data);
5321
5322                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5323                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5324                         data |= CP_MEM_LS_EN;
5325                         if (orig != data)
5326                                 WREG32(CP_MEM_SLP_CNTL, data);
5327                 }
5328
5329                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5330                 data &= 0xffffffc0;
5331                 if (orig != data)
5332                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5333
5334                 tmp = si_halt_rlc(rdev);
5335
5336                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5337                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5338                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5339
5340                 si_update_rlc(rdev, tmp);
5341         } else {
5342                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5343                 data |= 0x00000003;
5344                 if (orig != data)
5345                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5346
5347                 data = RREG32(CP_MEM_SLP_CNTL);
5348                 if (data & CP_MEM_LS_EN) {
5349                         data &= ~CP_MEM_LS_EN;
5350                         WREG32(CP_MEM_SLP_CNTL, data);
5351                 }
5352                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5353                 data |= LS_OVERRIDE | OVERRIDE;
5354                 if (orig != data)
5355                         WREG32(CGTS_SM_CTRL_REG, data);
5356
5357                 tmp = si_halt_rlc(rdev);
5358
5359                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5360                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5361                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5362
5363                 si_update_rlc(rdev, tmp);
5364         }
5365 }
5366
5367 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5368                                bool enable)
5369 {
5370         u32 orig, data, tmp;
5371
5372         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5373                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5374                 tmp |= 0x3fff;
5375                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5376
5377                 orig = data = RREG32(UVD_CGC_CTRL);
5378                 data |= DCM;
5379                 if (orig != data)
5380                         WREG32(UVD_CGC_CTRL, data);
5381
5382                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5383                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5384         } else {
5385                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5386                 tmp &= ~0x3fff;
5387                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5388
5389                 orig = data = RREG32(UVD_CGC_CTRL);
5390                 data &= ~DCM;
5391                 if (orig != data)
5392                         WREG32(UVD_CGC_CTRL, data);
5393
5394                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5395                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5396         }
5397 }
5398
5399 static const u32 mc_cg_registers[] =
5400 {
5401         MC_HUB_MISC_HUB_CG,
5402         MC_HUB_MISC_SIP_CG,
5403         MC_HUB_MISC_VM_CG,
5404         MC_XPB_CLK_GAT,
5405         ATC_MISC_CG,
5406         MC_CITF_MISC_WR_CG,
5407         MC_CITF_MISC_RD_CG,
5408         MC_CITF_MISC_VM_CG,
5409         VM_L2_CG,
5410 };
5411
5412 static void si_enable_mc_ls(struct radeon_device *rdev,
5413                             bool enable)
5414 {
5415         int i;
5416         u32 orig, data;
5417
5418         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5419                 orig = data = RREG32(mc_cg_registers[i]);
5420                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5421                         data |= MC_LS_ENABLE;
5422                 else
5423                         data &= ~MC_LS_ENABLE;
5424                 if (data != orig)
5425                         WREG32(mc_cg_registers[i], data);
5426         }
5427 }
5428
5429 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5430                                bool enable)
5431 {
5432         int i;
5433         u32 orig, data;
5434
5435         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5436                 orig = data = RREG32(mc_cg_registers[i]);
5437                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5438                         data |= MC_CG_ENABLE;
5439                 else
5440                         data &= ~MC_CG_ENABLE;
5441                 if (data != orig)
5442                         WREG32(mc_cg_registers[i], data);
5443         }
5444 }
5445
5446 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5447                                bool enable)
5448 {
5449         u32 orig, data, offset;
5450         int i;
5451
5452         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5453                 for (i = 0; i < 2; i++) {
5454                         if (i == 0)
5455                                 offset = DMA0_REGISTER_OFFSET;
5456                         else
5457                                 offset = DMA1_REGISTER_OFFSET;
5458                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5459                         data &= ~MEM_POWER_OVERRIDE;
5460                         if (data != orig)
5461                                 WREG32(DMA_POWER_CNTL + offset, data);
5462                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5463                 }
5464         } else {
5465                 for (i = 0; i < 2; i++) {
5466                         if (i == 0)
5467                                 offset = DMA0_REGISTER_OFFSET;
5468                         else
5469                                 offset = DMA1_REGISTER_OFFSET;
5470                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5471                         data |= MEM_POWER_OVERRIDE;
5472                         if (data != orig)
5473                                 WREG32(DMA_POWER_CNTL + offset, data);
5474
5475                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5476                         data = 0xff000000;
5477                         if (data != orig)
5478                                 WREG32(DMA_CLK_CTRL + offset, data);
5479                 }
5480         }
5481 }
5482
5483 static void si_enable_bif_mgls(struct radeon_device *rdev,
5484                                bool enable)
5485 {
5486         u32 orig, data;
5487
5488         orig = data = RREG32_PCIE(PCIE_CNTL2);
5489
5490         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5491                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5492                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5493         else
5494                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5495                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5496
5497         if (orig != data)
5498                 WREG32_PCIE(PCIE_CNTL2, data);
5499 }
5500
5501 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5502                                bool enable)
5503 {
5504         u32 orig, data;
5505
5506         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5507
5508         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5509                 data &= ~CLOCK_GATING_DIS;
5510         else
5511                 data |= CLOCK_GATING_DIS;
5512
5513         if (orig != data)
5514                 WREG32(HDP_HOST_PATH_CNTL, data);
5515 }
5516
5517 static void si_enable_hdp_ls(struct radeon_device *rdev,
5518                              bool enable)
5519 {
5520         u32 orig, data;
5521
5522         orig = data = RREG32(HDP_MEM_POWER_LS);
5523
5524         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5525                 data |= HDP_LS_ENABLE;
5526         else
5527                 data &= ~HDP_LS_ENABLE;
5528
5529         if (orig != data)
5530                 WREG32(HDP_MEM_POWER_LS, data);
5531 }
5532
5533 static void si_update_cg(struct radeon_device *rdev,
5534                          u32 block, bool enable)
5535 {
5536         if (block & RADEON_CG_BLOCK_GFX) {
5537                 si_enable_gui_idle_interrupt(rdev, false);
5538                 /* order matters! */
5539                 if (enable) {
5540                         si_enable_mgcg(rdev, true);
5541                         si_enable_cgcg(rdev, true);
5542                 } else {
5543                         si_enable_cgcg(rdev, false);
5544                         si_enable_mgcg(rdev, false);
5545                 }
5546                 si_enable_gui_idle_interrupt(rdev, true);
5547         }
5548
5549         if (block & RADEON_CG_BLOCK_MC) {
5550                 si_enable_mc_mgcg(rdev, enable);
5551                 si_enable_mc_ls(rdev, enable);
5552         }
5553
5554         if (block & RADEON_CG_BLOCK_SDMA) {
5555                 si_enable_dma_mgcg(rdev, enable);
5556         }
5557
5558         if (block & RADEON_CG_BLOCK_BIF) {
5559                 si_enable_bif_mgls(rdev, enable);
5560         }
5561
5562         if (block & RADEON_CG_BLOCK_UVD) {
5563                 if (rdev->has_uvd) {
5564                         si_enable_uvd_mgcg(rdev, enable);
5565                 }
5566         }
5567
5568         if (block & RADEON_CG_BLOCK_HDP) {
5569                 si_enable_hdp_mgcg(rdev, enable);
5570                 si_enable_hdp_ls(rdev, enable);
5571         }
5572 }
5573
5574 static void si_init_cg(struct radeon_device *rdev)
5575 {
5576         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5577                             RADEON_CG_BLOCK_MC |
5578                             RADEON_CG_BLOCK_SDMA |
5579                             RADEON_CG_BLOCK_BIF |
5580                             RADEON_CG_BLOCK_HDP), true);
5581         if (rdev->has_uvd) {
5582                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5583                 si_init_uvd_internal_cg(rdev);
5584         }
5585 }
5586
5587 static void si_fini_cg(struct radeon_device *rdev)
5588 {
5589         if (rdev->has_uvd) {
5590                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5591         }
5592         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5593                             RADEON_CG_BLOCK_MC |
5594                             RADEON_CG_BLOCK_SDMA |
5595                             RADEON_CG_BLOCK_BIF |
5596                             RADEON_CG_BLOCK_HDP), false);
5597 }
5598
5599 u32 si_get_csb_size(struct radeon_device *rdev)
5600 {
5601         u32 count = 0;
5602         const struct cs_section_def *sect = NULL;
5603         const struct cs_extent_def *ext = NULL;
5604
5605         if (rdev->rlc.cs_data == NULL)
5606                 return 0;
5607
5608         /* begin clear state */
5609         count += 2;
5610         /* context control state */
5611         count += 3;
5612
5613         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5614                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5615                         if (sect->id == SECT_CONTEXT)
5616                                 count += 2 + ext->reg_count;
5617                         else
5618                                 return 0;
5619                 }
5620         }
5621         /* pa_sc_raster_config */
5622         count += 3;
5623         /* end clear state */
5624         count += 2;
5625         /* clear state */
5626         count += 2;
5627
5628         return count;
5629 }
5630
5631 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5632 {
5633         u32 count = 0, i;
5634         const struct cs_section_def *sect = NULL;
5635         const struct cs_extent_def *ext = NULL;
5636
5637         if (rdev->rlc.cs_data == NULL)
5638                 return;
5639         if (buffer == NULL)
5640                 return;
5641
5642         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5643         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5644
5645         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5646         buffer[count++] = cpu_to_le32(0x80000000);
5647         buffer[count++] = cpu_to_le32(0x80000000);
5648
5649         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5650                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5651                         if (sect->id == SECT_CONTEXT) {
5652                                 buffer[count++] =
5653                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5654                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5655                                 for (i = 0; i < ext->reg_count; i++)
5656                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5657                         } else {
5658                                 return;
5659                         }
5660                 }
5661         }
5662
5663         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5664         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5665         switch (rdev->family) {
5666         case CHIP_TAHITI:
5667         case CHIP_PITCAIRN:
5668                 buffer[count++] = cpu_to_le32(0x2a00126a);
5669                 break;
5670         case CHIP_VERDE:
5671                 buffer[count++] = cpu_to_le32(0x0000124a);
5672                 break;
5673         case CHIP_OLAND:
5674                 buffer[count++] = cpu_to_le32(0x00000082);
5675                 break;
5676         case CHIP_HAINAN:
5677                 buffer[count++] = cpu_to_le32(0x00000000);
5678                 break;
5679         default:
5680                 buffer[count++] = cpu_to_le32(0x00000000);
5681                 break;
5682         }
5683
5684         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5685         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5686
5687         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5688         buffer[count++] = cpu_to_le32(0);
5689 }
5690
5691 static void si_init_pg(struct radeon_device *rdev)
5692 {
5693         if (rdev->pg_flags) {
5694                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5695                         si_init_dma_pg(rdev);
5696                 }
5697                 si_init_ao_cu_mask(rdev);
5698                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5699                         si_init_gfx_cgpg(rdev);
5700                 } else {
5701                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5702                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5703                 }
5704                 si_enable_dma_pg(rdev, true);
5705                 si_enable_gfx_cgpg(rdev, true);
5706         } else {
5707                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5708                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5709         }
5710 }
5711
5712 static void si_fini_pg(struct radeon_device *rdev)
5713 {
5714         if (rdev->pg_flags) {
5715                 si_enable_dma_pg(rdev, false);
5716                 si_enable_gfx_cgpg(rdev, false);
5717         }
5718 }
5719
5720 /*
5721  * RLC
5722  */
5723 void si_rlc_reset(struct radeon_device *rdev)
5724 {
5725         u32 tmp = RREG32(GRBM_SOFT_RESET);
5726
5727         tmp |= SOFT_RESET_RLC;
5728         WREG32(GRBM_SOFT_RESET, tmp);
5729         udelay(50);
5730         tmp &= ~SOFT_RESET_RLC;
5731         WREG32(GRBM_SOFT_RESET, tmp);
5732         udelay(50);
5733 }
5734
5735 static void si_rlc_stop(struct radeon_device *rdev)
5736 {
5737         WREG32(RLC_CNTL, 0);
5738
5739         si_enable_gui_idle_interrupt(rdev, false);
5740
5741         si_wait_for_rlc_serdes(rdev);
5742 }
5743
5744 static void si_rlc_start(struct radeon_device *rdev)
5745 {
5746         WREG32(RLC_CNTL, RLC_ENABLE);
5747
5748         si_enable_gui_idle_interrupt(rdev, true);
5749
5750         udelay(50);
5751 }
5752
5753 static bool si_lbpw_supported(struct radeon_device *rdev)
5754 {
5755         u32 tmp;
5756
5757         /* Enable LBPW only for DDR3 */
5758         tmp = RREG32(MC_SEQ_MISC0);
5759         if ((tmp & 0xF0000000) == 0xB0000000)
5760                 return true;
5761         return false;
5762 }
5763
5764 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5765 {
5766         u32 tmp;
5767
5768         tmp = RREG32(RLC_LB_CNTL);
5769         if (enable)
5770                 tmp |= LOAD_BALANCE_ENABLE;
5771         else
5772                 tmp &= ~LOAD_BALANCE_ENABLE;
5773         WREG32(RLC_LB_CNTL, tmp);
5774
5775         if (!enable) {
5776                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5777                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5778         }
5779 }
5780
5781 static int si_rlc_resume(struct radeon_device *rdev)
5782 {
5783         u32 i;
5784
5785         if (!rdev->rlc_fw)
5786                 return -EINVAL;
5787
5788         si_rlc_stop(rdev);
5789
5790         si_rlc_reset(rdev);
5791
5792         si_init_pg(rdev);
5793
5794         si_init_cg(rdev);
5795
5796         WREG32(RLC_RL_BASE, 0);
5797         WREG32(RLC_RL_SIZE, 0);
5798         WREG32(RLC_LB_CNTL, 0);
5799         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5800         WREG32(RLC_LB_CNTR_INIT, 0);
5801         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5802
5803         WREG32(RLC_MC_CNTL, 0);
5804         WREG32(RLC_UCODE_CNTL, 0);
5805
5806         if (rdev->new_fw) {
5807                 const struct rlc_firmware_header_v1_0 *hdr =
5808                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5809                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5810                 const __le32 *fw_data = (const __le32 *)
5811                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5812
5813                 radeon_ucode_print_rlc_hdr(&hdr->header);
5814
5815                 for (i = 0; i < fw_size; i++) {
5816                         WREG32(RLC_UCODE_ADDR, i);
5817                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5818                 }
5819         } else {
5820                 const __be32 *fw_data =
5821                         (const __be32 *)rdev->rlc_fw->data;
5822                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5823                         WREG32(RLC_UCODE_ADDR, i);
5824                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5825                 }
5826         }
5827         WREG32(RLC_UCODE_ADDR, 0);
5828
5829         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5830
5831         si_rlc_start(rdev);
5832
5833         return 0;
5834 }
5835
5836 static void si_enable_interrupts(struct radeon_device *rdev)
5837 {
5838         u32 ih_cntl = RREG32(IH_CNTL);
5839         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5840
5841         ih_cntl |= ENABLE_INTR;
5842         ih_rb_cntl |= IH_RB_ENABLE;
5843         WREG32(IH_CNTL, ih_cntl);
5844         WREG32(IH_RB_CNTL, ih_rb_cntl);
5845         rdev->ih.enabled = true;
5846 }
5847
5848 static void si_disable_interrupts(struct radeon_device *rdev)
5849 {
5850         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5851         u32 ih_cntl = RREG32(IH_CNTL);
5852
5853         ih_rb_cntl &= ~IH_RB_ENABLE;
5854         ih_cntl &= ~ENABLE_INTR;
5855         WREG32(IH_RB_CNTL, ih_rb_cntl);
5856         WREG32(IH_CNTL, ih_cntl);
5857         /* set rptr, wptr to 0 */
5858         WREG32(IH_RB_RPTR, 0);
5859         WREG32(IH_RB_WPTR, 0);
5860         rdev->ih.enabled = false;
5861         rdev->ih.rptr = 0;
5862 }
5863
5864 static void si_disable_interrupt_state(struct radeon_device *rdev)
5865 {
5866         int i;
5867         u32 tmp;
5868
5869         tmp = RREG32(CP_INT_CNTL_RING0) &
5870                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5871         WREG32(CP_INT_CNTL_RING0, tmp);
5872         WREG32(CP_INT_CNTL_RING1, 0);
5873         WREG32(CP_INT_CNTL_RING2, 0);
5874         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5875         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5876         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5877         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5878         WREG32(GRBM_INT_CNTL, 0);
5879         WREG32(SRBM_INT_CNTL, 0);
5880         for (i = 0; i < rdev->num_crtc; i++)
5881                 WREG32(INT_MASK + crtc_offsets[i], 0);
5882         for (i = 0; i < rdev->num_crtc; i++)
5883                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5884
5885         if (!ASIC_IS_NODCE(rdev)) {
5886                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5887
5888                 for (i = 0; i < 6; i++)
5889                         WREG32_AND(DC_HPDx_INT_CONTROL(i),
5890                                    DC_HPDx_INT_POLARITY);
5891         }
5892 }
5893
5894 static int si_irq_init(struct radeon_device *rdev)
5895 {
5896         int ret = 0;
5897         int rb_bufsz;
5898         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5899
5900         /* allocate ring */
5901         ret = r600_ih_ring_alloc(rdev);
5902         if (ret)
5903                 return ret;
5904
5905         /* disable irqs */
5906         si_disable_interrupts(rdev);
5907
5908         /* init rlc */
5909         ret = si_rlc_resume(rdev);
5910         if (ret) {
5911                 r600_ih_ring_fini(rdev);
5912                 return ret;
5913         }
5914
5915         /* setup interrupt control */
5916         /* set dummy read address to dummy page address */
5917         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5918         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5919         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5920          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5921          */
5922         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5923         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5924         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5925         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5926
5927         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5928         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5929
5930         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5931                       IH_WPTR_OVERFLOW_CLEAR |
5932                       (rb_bufsz << 1));
5933
5934         if (rdev->wb.enabled)
5935                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5936
5937         /* set the writeback address whether it's enabled or not */
5938         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5939         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5940
5941         WREG32(IH_RB_CNTL, ih_rb_cntl);
5942
5943         /* set rptr, wptr to 0 */
5944         WREG32(IH_RB_RPTR, 0);
5945         WREG32(IH_RB_WPTR, 0);
5946
5947         /* Default settings for IH_CNTL (disabled at first) */
5948         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5949         /* RPTR_REARM only works if msi's are enabled */
5950         if (rdev->msi_enabled)
5951                 ih_cntl |= RPTR_REARM;
5952         WREG32(IH_CNTL, ih_cntl);
5953
5954         /* force the active interrupt state to all disabled */
5955         si_disable_interrupt_state(rdev);
5956
5957         pci_set_master(rdev->pdev);
5958
5959         /* enable irqs */
5960         si_enable_interrupts(rdev);
5961
5962         return ret;
5963 }
5964
5965 /* The order we write back each register here is important */
5966 int si_irq_set(struct radeon_device *rdev)
5967 {
5968         int i;
5969         u32 cp_int_cntl;
5970         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5971         u32 grbm_int_cntl = 0;
5972         u32 dma_cntl, dma_cntl1;
5973         u32 thermal_int = 0;
5974
5975         if (!rdev->irq.installed) {
5976                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5977                 return -EINVAL;
5978         }
5979         /* don't enable anything if the ih is disabled */
5980         if (!rdev->ih.enabled) {
5981                 si_disable_interrupts(rdev);
5982                 /* force the active interrupt state to all disabled */
5983                 si_disable_interrupt_state(rdev);
5984                 return 0;
5985         }
5986
5987         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5988                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5989
5990         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5991         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5992
5993         thermal_int = RREG32(CG_THERMAL_INT) &
5994                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5995
5996         /* enable CP interrupts on all rings */
5997         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5998                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5999                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6000         }
6001         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6002                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6003                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6004         }
6005         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6006                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6007                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6008         }
6009         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6010                 DRM_DEBUG("si_irq_set: sw int dma\n");
6011                 dma_cntl |= TRAP_ENABLE;
6012         }
6013
6014         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6015                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6016                 dma_cntl1 |= TRAP_ENABLE;
6017         }
6018
6019         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6020         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6021         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6022
6023         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6024         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6025
6026         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6027
6028         if (rdev->irq.dpm_thermal) {
6029                 DRM_DEBUG("dpm thermal\n");
6030                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6031         }
6032
6033         for (i = 0; i < rdev->num_crtc; i++) {
6034                 radeon_irq_kms_set_irq_n_enabled(
6035                     rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6036                     rdev->irq.crtc_vblank_int[i] ||
6037                     atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6038         }
6039
6040         for (i = 0; i < rdev->num_crtc; i++)
6041                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6042
6043         if (!ASIC_IS_NODCE(rdev)) {
6044                 for (i = 0; i < 6; i++) {
6045                         radeon_irq_kms_set_irq_n_enabled(
6046                             rdev, DC_HPDx_INT_CONTROL(i),
6047                             DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6048                             rdev->irq.hpd[i], "HPD", i);
6049                 }
6050         }
6051
6052         WREG32(CG_THERMAL_INT, thermal_int);
6053
6054         /* posting read */
6055         RREG32(SRBM_STATUS);
6056
6057         return 0;
6058 }
6059
6060 /* The order we write back each register here is important */
6061 static inline void si_irq_ack(struct radeon_device *rdev)
6062 {
6063         int i, j;
6064         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6065         u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6066
6067         if (ASIC_IS_NODCE(rdev))
6068                 return;
6069
6070         for (i = 0; i < 6; i++) {
6071                 disp_int[i] = RREG32(si_disp_int_status[i]);
6072                 if (i < rdev->num_crtc)
6073                         grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6074         }
6075
6076         /* We write back each interrupt register in pairs of two */
6077         for (i = 0; i < rdev->num_crtc; i += 2) {
6078                 for (j = i; j < (i + 2); j++) {
6079                         if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6080                                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6081                                        GRPH_PFLIP_INT_CLEAR);
6082                 }
6083
6084                 for (j = i; j < (i + 2); j++) {
6085                         if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6086                                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6087                                        VBLANK_ACK);
6088                         if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6089                                 WREG32(VLINE_STATUS + crtc_offsets[j],
6090                                        VLINE_ACK);
6091                 }
6092         }
6093
6094         for (i = 0; i < 6; i++) {
6095                 if (disp_int[i] & DC_HPD1_INTERRUPT)
6096                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6097         }
6098
6099         for (i = 0; i < 6; i++) {
6100                 if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6101                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6102         }
6103 }
6104
6105 static void si_irq_disable(struct radeon_device *rdev)
6106 {
6107         si_disable_interrupts(rdev);
6108         /* Wait and acknowledge irq */
6109         mdelay(1);
6110         si_irq_ack(rdev);
6111         si_disable_interrupt_state(rdev);
6112 }
6113
6114 static void si_irq_suspend(struct radeon_device *rdev)
6115 {
6116         si_irq_disable(rdev);
6117         si_rlc_stop(rdev);
6118 }
6119
6120 static void si_irq_fini(struct radeon_device *rdev)
6121 {
6122         si_irq_suspend(rdev);
6123         r600_ih_ring_fini(rdev);
6124 }
6125
6126 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6127 {
6128         u32 wptr, tmp;
6129
6130         if (rdev->wb.enabled)
6131                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6132         else
6133                 wptr = RREG32(IH_RB_WPTR);
6134
6135         if (wptr & RB_OVERFLOW) {
6136                 wptr &= ~RB_OVERFLOW;
6137                 /* When a ring buffer overflow happen start parsing interrupt
6138                  * from the last not overwritten vector (wptr + 16). Hopefully
6139                  * this should allow us to catchup.
6140                  */
6141                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6142                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6143                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6144                 tmp = RREG32(IH_RB_CNTL);
6145                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6146                 WREG32(IH_RB_CNTL, tmp);
6147         }
6148         return (wptr & rdev->ih.ptr_mask);
6149 }
6150
6151 /*        SI IV Ring
6152  * Each IV ring entry is 128 bits:
6153  * [7:0]    - interrupt source id
6154  * [31:8]   - reserved
6155  * [59:32]  - interrupt source data
6156  * [63:60]  - reserved
6157  * [71:64]  - RINGID
6158  * [79:72]  - VMID
6159  * [127:80] - reserved
6160  */
6161 int si_irq_process(struct radeon_device *rdev)
6162 {
6163         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6164         u32 crtc_idx, hpd_idx;
6165         u32 mask;
6166         u32 wptr;
6167         u32 rptr;
6168         u32 src_id, src_data, ring_id;
6169         u32 ring_index;
6170         bool queue_hotplug = false;
6171         bool queue_dp = false;
6172         bool queue_thermal = false;
6173         u32 status, addr;
6174         const char *event_name;
6175
6176         if (!rdev->ih.enabled || rdev->shutdown)
6177                 return IRQ_NONE;
6178
6179         wptr = si_get_ih_wptr(rdev);
6180
6181 restart_ih:
6182         /* is somebody else already processing irqs? */
6183         if (atomic_xchg(&rdev->ih.lock, 1))
6184                 return IRQ_NONE;
6185
6186         rptr = rdev->ih.rptr;
6187         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6188
6189         /* Order reading of wptr vs. reading of IH ring data */
6190         rmb();
6191
6192         /* display interrupts */
6193         si_irq_ack(rdev);
6194
6195         while (rptr != wptr) {
6196                 /* wptr/rptr are in bytes! */
6197                 ring_index = rptr / 4;
6198                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6199                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6200                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6201
6202                 switch (src_id) {
6203                 case 1: /* D1 vblank/vline */
6204                 case 2: /* D2 vblank/vline */
6205                 case 3: /* D3 vblank/vline */
6206                 case 4: /* D4 vblank/vline */
6207                 case 5: /* D5 vblank/vline */
6208                 case 6: /* D6 vblank/vline */
6209                         crtc_idx = src_id - 1;
6210
6211                         if (src_data == 0) { /* vblank */
6212                                 mask = LB_D1_VBLANK_INTERRUPT;
6213                                 event_name = "vblank";
6214
6215                                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6216                                         drm_handle_vblank(rdev->ddev, crtc_idx);
6217                                         rdev->pm.vblank_sync = true;
6218                                         wake_up(&rdev->irq.vblank_queue);
6219                                 }
6220                                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6221                                         radeon_crtc_handle_vblank(rdev,
6222                                                                   crtc_idx);
6223                                 }
6224
6225                         } else if (src_data == 1) { /* vline */
6226                                 mask = LB_D1_VLINE_INTERRUPT;
6227                                 event_name = "vline";
6228                         } else {
6229                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6230                                           src_id, src_data);
6231                                 break;
6232                         }
6233
6234                         if (!(disp_int[crtc_idx] & mask)) {
6235                                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6236                                           crtc_idx + 1, event_name);
6237                         }
6238
6239                         disp_int[crtc_idx] &= ~mask;
6240                         DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6241
6242                         break;
6243                 case 8: /* D1 page flip */
6244                 case 10: /* D2 page flip */
6245                 case 12: /* D3 page flip */
6246                 case 14: /* D4 page flip */
6247                 case 16: /* D5 page flip */
6248                 case 18: /* D6 page flip */
6249                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6250                         if (radeon_use_pflipirq > 0)
6251                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6252                         break;
6253                 case 42: /* HPD hotplug */
6254                         if (src_data <= 5) {
6255                                 hpd_idx = src_data;
6256                                 mask = DC_HPD1_INTERRUPT;
6257                                 queue_hotplug = true;
6258                                 event_name = "HPD";
6259
6260                         } else if (src_data <= 11) {
6261                                 hpd_idx = src_data - 6;
6262                                 mask = DC_HPD1_RX_INTERRUPT;
6263                                 queue_dp = true;
6264                                 event_name = "HPD_RX";
6265
6266                         } else {
6267                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6268                                           src_id, src_data);
6269                                 break;
6270                         }
6271
6272                         if (!(disp_int[hpd_idx] & mask))
6273                                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6274
6275                         disp_int[hpd_idx] &= ~mask;
6276                         DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6277                         break;
6278                 case 96:
6279                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6280                         WREG32(SRBM_INT_ACK, 0x1);
6281                         break;
6282                 case 124: /* UVD */
6283                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6284                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6285                         break;
6286                 case 146:
6287                 case 147:
6288                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6289                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6290                         /* reset addr and status */
6291                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6292                         if (addr == 0x0 && status == 0x0)
6293                                 break;
6294                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6295                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6296                                 addr);
6297                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6298                                 status);
6299                         si_vm_decode_fault(rdev, status, addr);
6300                         break;
6301                 case 176: /* RINGID0 CP_INT */
6302                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6303                         break;
6304                 case 177: /* RINGID1 CP_INT */
6305                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6306                         break;
6307                 case 178: /* RINGID2 CP_INT */
6308                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6309                         break;
6310                 case 181: /* CP EOP event */
6311                         DRM_DEBUG("IH: CP EOP\n");
6312                         switch (ring_id) {
6313                         case 0:
6314                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6315                                 break;
6316                         case 1:
6317                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6318                                 break;
6319                         case 2:
6320                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6321                                 break;
6322                         }
6323                         break;
6324                 case 224: /* DMA trap event */
6325                         DRM_DEBUG("IH: DMA trap\n");
6326                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6327                         break;
6328                 case 230: /* thermal low to high */
6329                         DRM_DEBUG("IH: thermal low to high\n");
6330                         rdev->pm.dpm.thermal.high_to_low = false;
6331                         queue_thermal = true;
6332                         break;
6333                 case 231: /* thermal high to low */
6334                         DRM_DEBUG("IH: thermal high to low\n");
6335                         rdev->pm.dpm.thermal.high_to_low = true;
6336                         queue_thermal = true;
6337                         break;
6338                 case 233: /* GUI IDLE */
6339                         DRM_DEBUG("IH: GUI idle\n");
6340                         break;
6341                 case 244: /* DMA trap event */
6342                         DRM_DEBUG("IH: DMA1 trap\n");
6343                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6344                         break;
6345                 default:
6346                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6347                         break;
6348                 }
6349
6350                 /* wptr/rptr are in bytes! */
6351                 rptr += 16;
6352                 rptr &= rdev->ih.ptr_mask;
6353                 WREG32(IH_RB_RPTR, rptr);
6354         }
6355         if (queue_dp)
6356                 schedule_work(&rdev->dp_work);
6357         if (queue_hotplug)
6358                 schedule_delayed_work(&rdev->hotplug_work, 0);
6359         if (queue_thermal && rdev->pm.dpm_enabled)
6360                 schedule_work(&rdev->pm.dpm.thermal.work);
6361         rdev->ih.rptr = rptr;
6362         atomic_set(&rdev->ih.lock, 0);
6363
6364         /* make sure wptr hasn't changed while processing */
6365         wptr = si_get_ih_wptr(rdev);
6366         if (wptr != rptr)
6367                 goto restart_ih;
6368
6369         return IRQ_HANDLED;
6370 }
6371
6372 /*
6373  * startup/shutdown callbacks
6374  */
6375 static void si_uvd_init(struct radeon_device *rdev)
6376 {
6377         int r;
6378
6379         if (!rdev->has_uvd)
6380                 return;
6381
6382         r = radeon_uvd_init(rdev);
6383         if (r) {
6384                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6385                 /*
6386                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6387                  * to early fails uvd_v2_2_resume() and thus nothing happens
6388                  * there. So it is pointless to try to go through that code
6389                  * hence why we disable uvd here.
6390                  */
6391                 rdev->has_uvd = 0;
6392                 return;
6393         }
6394         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6395         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6396 }
6397
6398 static void si_uvd_start(struct radeon_device *rdev)
6399 {
6400         int r;
6401
6402         if (!rdev->has_uvd)
6403                 return;
6404
6405         r = uvd_v2_2_resume(rdev);
6406         if (r) {
6407                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6408                 goto error;
6409         }
6410         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6411         if (r) {
6412                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6413                 goto error;
6414         }
6415         return;
6416
6417 error:
6418         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6419 }
6420
6421 static void si_uvd_resume(struct radeon_device *rdev)
6422 {
6423         struct radeon_ring *ring;
6424         int r;
6425
6426         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6427                 return;
6428
6429         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6430         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6431         if (r) {
6432                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6433                 return;
6434         }
6435         r = uvd_v1_0_init(rdev);
6436         if (r) {
6437                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6438                 return;
6439         }
6440 }
6441
6442 static void si_vce_init(struct radeon_device *rdev)
6443 {
6444         int r;
6445
6446         if (!rdev->has_vce)
6447                 return;
6448
6449         r = radeon_vce_init(rdev);
6450         if (r) {
6451                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6452                 /*
6453                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6454                  * to early fails si_vce_start() and thus nothing happens
6455                  * there. So it is pointless to try to go through that code
6456                  * hence why we disable vce here.
6457                  */
6458                 rdev->has_vce = 0;
6459                 return;
6460         }
6461         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6462         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6463         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6464         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6465 }
6466
6467 static void si_vce_start(struct radeon_device *rdev)
6468 {
6469         int r;
6470
6471         if (!rdev->has_vce)
6472                 return;
6473
6474         r = radeon_vce_resume(rdev);
6475         if (r) {
6476                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6477                 goto error;
6478         }
6479         r = vce_v1_0_resume(rdev);
6480         if (r) {
6481                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6482                 goto error;
6483         }
6484         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6485         if (r) {
6486                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6487                 goto error;
6488         }
6489         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6490         if (r) {
6491                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6492                 goto error;
6493         }
6494         return;
6495
6496 error:
6497         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6498         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6499 }
6500
6501 static void si_vce_resume(struct radeon_device *rdev)
6502 {
6503         struct radeon_ring *ring;
6504         int r;
6505
6506         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6507                 return;
6508
6509         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6510         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6511         if (r) {
6512                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6513                 return;
6514         }
6515         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6516         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6517         if (r) {
6518                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6519                 return;
6520         }
6521         r = vce_v1_0_init(rdev);
6522         if (r) {
6523                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6524                 return;
6525         }
6526 }
6527
6528 static int si_startup(struct radeon_device *rdev)
6529 {
6530         struct radeon_ring *ring;
6531         int r;
6532
6533         /* enable pcie gen2/3 link */
6534         si_pcie_gen3_enable(rdev);
6535         /* enable aspm */
6536         si_program_aspm(rdev);
6537
6538         /* scratch needs to be initialized before MC */
6539         r = r600_vram_scratch_init(rdev);
6540         if (r)
6541                 return r;
6542
6543         si_mc_program(rdev);
6544
6545         if (!rdev->pm.dpm_enabled) {
6546                 r = si_mc_load_microcode(rdev);
6547                 if (r) {
6548                         DRM_ERROR("Failed to load MC firmware!\n");
6549                         return r;
6550                 }
6551         }
6552
6553         r = si_pcie_gart_enable(rdev);
6554         if (r)
6555                 return r;
6556         si_gpu_init(rdev);
6557
6558         /* allocate rlc buffers */
6559         if (rdev->family == CHIP_VERDE) {
6560                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6561                 rdev->rlc.reg_list_size =
6562                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6563         }
6564         rdev->rlc.cs_data = si_cs_data;
6565         r = sumo_rlc_init(rdev);
6566         if (r) {
6567                 DRM_ERROR("Failed to init rlc BOs!\n");
6568                 return r;
6569         }
6570
6571         /* allocate wb buffer */
6572         r = radeon_wb_init(rdev);
6573         if (r)
6574                 return r;
6575
6576         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6577         if (r) {
6578                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6579                 return r;
6580         }
6581
6582         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6583         if (r) {
6584                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6585                 return r;
6586         }
6587
6588         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6589         if (r) {
6590                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6591                 return r;
6592         }
6593
6594         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6595         if (r) {
6596                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6597                 return r;
6598         }
6599
6600         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6601         if (r) {
6602                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6603                 return r;
6604         }
6605
6606         si_uvd_start(rdev);
6607         si_vce_start(rdev);
6608
6609         /* Enable IRQ */
6610         if (!rdev->irq.installed) {
6611                 r = radeon_irq_kms_init(rdev);
6612                 if (r)
6613                         return r;
6614         }
6615
6616         r = si_irq_init(rdev);
6617         if (r) {
6618                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6619                 radeon_irq_kms_fini(rdev);
6620                 return r;
6621         }
6622         si_irq_set(rdev);
6623
6624         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6625         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6626                              RADEON_CP_PACKET2);
6627         if (r)
6628                 return r;
6629
6630         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6631         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6632                              RADEON_CP_PACKET2);
6633         if (r)
6634                 return r;
6635
6636         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6637         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6638                              RADEON_CP_PACKET2);
6639         if (r)
6640                 return r;
6641
6642         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6643         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6644                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6645         if (r)
6646                 return r;
6647
6648         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6649         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6650                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6651         if (r)
6652                 return r;
6653
6654         r = si_cp_load_microcode(rdev);
6655         if (r)
6656                 return r;
6657         r = si_cp_resume(rdev);
6658         if (r)
6659                 return r;
6660
6661         r = cayman_dma_resume(rdev);
6662         if (r)
6663                 return r;
6664
6665         si_uvd_resume(rdev);
6666         si_vce_resume(rdev);
6667
6668         r = radeon_ib_pool_init(rdev);
6669         if (r) {
6670                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6671                 return r;
6672         }
6673
6674         r = radeon_vm_manager_init(rdev);
6675         if (r) {
6676                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6677                 return r;
6678         }
6679
6680         r = radeon_audio_init(rdev);
6681         if (r)
6682                 return r;
6683
6684         return 0;
6685 }
6686
6687 int si_resume(struct radeon_device *rdev)
6688 {
6689         int r;
6690
6691         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6692          * posting will perform necessary task to bring back GPU into good
6693          * shape.
6694          */
6695         /* post card */
6696         atom_asic_init(rdev->mode_info.atom_context);
6697
6698         /* init golden registers */
6699         si_init_golden_registers(rdev);
6700
6701         if (rdev->pm.pm_method == PM_METHOD_DPM)
6702                 radeon_pm_resume(rdev);
6703
6704         rdev->accel_working = true;
6705         r = si_startup(rdev);
6706         if (r) {
6707                 DRM_ERROR("si startup failed on resume\n");
6708                 rdev->accel_working = false;
6709                 return r;
6710         }
6711
6712         return r;
6713
6714 }
6715
6716 int si_suspend(struct radeon_device *rdev)
6717 {
6718         radeon_pm_suspend(rdev);
6719         radeon_audio_fini(rdev);
6720         radeon_vm_manager_fini(rdev);
6721         si_cp_enable(rdev, false);
6722         cayman_dma_stop(rdev);
6723         if (rdev->has_uvd) {
6724                 uvd_v1_0_fini(rdev);
6725                 radeon_uvd_suspend(rdev);
6726         }
6727         if (rdev->has_vce)
6728                 radeon_vce_suspend(rdev);
6729         si_fini_pg(rdev);
6730         si_fini_cg(rdev);
6731         si_irq_suspend(rdev);
6732         radeon_wb_disable(rdev);
6733         si_pcie_gart_disable(rdev);
6734         return 0;
6735 }
6736
6737 /* Plan is to move initialization in that function and use
6738  * helper function so that radeon_device_init pretty much
6739  * do nothing more than calling asic specific function. This
6740  * should also allow to remove a bunch of callback function
6741  * like vram_info.
6742  */
6743 int si_init(struct radeon_device *rdev)
6744 {
6745         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6746         int r;
6747
6748         /* Read BIOS */
6749         if (!radeon_get_bios(rdev)) {
6750                 if (ASIC_IS_AVIVO(rdev))
6751                         return -EINVAL;
6752         }
6753         /* Must be an ATOMBIOS */
6754         if (!rdev->is_atom_bios) {
6755                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6756                 return -EINVAL;
6757         }
6758         r = radeon_atombios_init(rdev);
6759         if (r)
6760                 return r;
6761
6762         /* Post card if necessary */
6763         if (!radeon_card_posted(rdev)) {
6764                 if (!rdev->bios) {
6765                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6766                         return -EINVAL;
6767                 }
6768                 DRM_INFO("GPU not posted. posting now...\n");
6769                 atom_asic_init(rdev->mode_info.atom_context);
6770         }
6771         /* init golden registers */
6772         si_init_golden_registers(rdev);
6773         /* Initialize scratch registers */
6774         si_scratch_init(rdev);
6775         /* Initialize surface registers */
6776         radeon_surface_init(rdev);
6777         /* Initialize clocks */
6778         radeon_get_clock_info(rdev->ddev);
6779
6780         /* Fence driver */
6781         r = radeon_fence_driver_init(rdev);
6782         if (r)
6783                 return r;
6784
6785         /* initialize memory controller */
6786         r = si_mc_init(rdev);
6787         if (r)
6788                 return r;
6789         /* Memory manager */
6790         r = radeon_bo_init(rdev);
6791         if (r)
6792                 return r;
6793
6794         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6795             !rdev->rlc_fw || !rdev->mc_fw) {
6796                 r = si_init_microcode(rdev);
6797                 if (r) {
6798                         DRM_ERROR("Failed to load firmware!\n");
6799                         return r;
6800                 }
6801         }
6802
6803         /* Initialize power management */
6804         radeon_pm_init(rdev);
6805
6806         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6807         ring->ring_obj = NULL;
6808         r600_ring_init(rdev, ring, 1024 * 1024);
6809
6810         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6811         ring->ring_obj = NULL;
6812         r600_ring_init(rdev, ring, 1024 * 1024);
6813
6814         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6815         ring->ring_obj = NULL;
6816         r600_ring_init(rdev, ring, 1024 * 1024);
6817
6818         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6819         ring->ring_obj = NULL;
6820         r600_ring_init(rdev, ring, 64 * 1024);
6821
6822         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6823         ring->ring_obj = NULL;
6824         r600_ring_init(rdev, ring, 64 * 1024);
6825
6826         si_uvd_init(rdev);
6827         si_vce_init(rdev);
6828
6829         rdev->ih.ring_obj = NULL;
6830         r600_ih_ring_init(rdev, 64 * 1024);
6831
6832         r = r600_pcie_gart_init(rdev);
6833         if (r)
6834                 return r;
6835
6836         rdev->accel_working = true;
6837         r = si_startup(rdev);
6838         if (r) {
6839                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6840                 si_cp_fini(rdev);
6841                 cayman_dma_fini(rdev);
6842                 si_irq_fini(rdev);
6843                 sumo_rlc_fini(rdev);
6844                 radeon_wb_fini(rdev);
6845                 radeon_ib_pool_fini(rdev);
6846                 radeon_vm_manager_fini(rdev);
6847                 radeon_irq_kms_fini(rdev);
6848                 si_pcie_gart_fini(rdev);
6849                 rdev->accel_working = false;
6850         }
6851
6852         /* Don't start up if the MC ucode is missing.
6853          * The default clocks and voltages before the MC ucode
6854          * is loaded are not suffient for advanced operations.
6855          */
6856         if (!rdev->mc_fw) {
6857                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6858                 return -EINVAL;
6859         }
6860
6861         return 0;
6862 }
6863
6864 void si_fini(struct radeon_device *rdev)
6865 {
6866         radeon_pm_fini(rdev);
6867         si_cp_fini(rdev);
6868         cayman_dma_fini(rdev);
6869         si_fini_pg(rdev);
6870         si_fini_cg(rdev);
6871         si_irq_fini(rdev);
6872         sumo_rlc_fini(rdev);
6873         radeon_wb_fini(rdev);
6874         radeon_vm_manager_fini(rdev);
6875         radeon_ib_pool_fini(rdev);
6876         radeon_irq_kms_fini(rdev);
6877         if (rdev->has_uvd) {
6878                 uvd_v1_0_fini(rdev);
6879                 radeon_uvd_fini(rdev);
6880         }
6881         if (rdev->has_vce)
6882                 radeon_vce_fini(rdev);
6883         si_pcie_gart_fini(rdev);
6884         r600_vram_scratch_fini(rdev);
6885         radeon_gem_fini(rdev);
6886         radeon_fence_driver_fini(rdev);
6887         radeon_bo_fini(rdev);
6888         radeon_atombios_fini(rdev);
6889         kfree(rdev->bios);
6890         rdev->bios = NULL;
6891 }
6892
6893 /**
6894  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6895  *
6896  * @rdev: radeon_device pointer
6897  *
6898  * Fetches a GPU clock counter snapshot (SI).
6899  * Returns the 64 bit clock counter snapshot.
6900  */
6901 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6902 {
6903         uint64_t clock;
6904
6905         mutex_lock(&rdev->gpu_clock_mutex);
6906         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6907         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6908                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6909         mutex_unlock(&rdev->gpu_clock_mutex);
6910         return clock;
6911 }
6912
6913 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6914 {
6915         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6916         int r;
6917
6918         /* bypass vclk and dclk with bclk */
6919         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6920                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6921                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6922
6923         /* put PLL in bypass mode */
6924         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6925
6926         if (!vclk || !dclk) {
6927                 /* keep the Bypass mode */
6928                 return 0;
6929         }
6930
6931         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6932                                           16384, 0x03FFFFFF, 0, 128, 5,
6933                                           &fb_div, &vclk_div, &dclk_div);
6934         if (r)
6935                 return r;
6936
6937         /* set RESET_ANTI_MUX to 0 */
6938         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6939
6940         /* set VCO_MODE to 1 */
6941         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6942
6943         /* disable sleep mode */
6944         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6945
6946         /* deassert UPLL_RESET */
6947         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6948
6949         mdelay(1);
6950
6951         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6952         if (r)
6953                 return r;
6954
6955         /* assert UPLL_RESET again */
6956         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6957
6958         /* disable spread spectrum. */
6959         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6960
6961         /* set feedback divider */
6962         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6963
6964         /* set ref divider to 0 */
6965         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6966
6967         if (fb_div < 307200)
6968                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6969         else
6970                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6971
6972         /* set PDIV_A and PDIV_B */
6973         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6974                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6975                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6976
6977         /* give the PLL some time to settle */
6978         mdelay(15);
6979
6980         /* deassert PLL_RESET */
6981         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6982
6983         mdelay(15);
6984
6985         /* switch from bypass mode to normal mode */
6986         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6987
6988         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6989         if (r)
6990                 return r;
6991
6992         /* switch VCLK and DCLK selection */
6993         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6994                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6995                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6996
6997         mdelay(100);
6998
6999         return 0;
7000 }
7001
7002 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7003 {
7004         struct pci_dev *root = rdev->pdev->bus->self;
7005         int bridge_pos, gpu_pos;
7006         u32 speed_cntl, mask, current_data_rate;
7007         int ret, i;
7008         u16 tmp16;
7009
7010         if (pci_is_root_bus(rdev->pdev->bus))
7011                 return;
7012
7013         if (radeon_pcie_gen2 == 0)
7014                 return;
7015
7016         if (rdev->flags & RADEON_IS_IGP)
7017                 return;
7018
7019         if (!(rdev->flags & RADEON_IS_PCIE))
7020                 return;
7021
7022         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7023         if (ret != 0)
7024                 return;
7025
7026         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7027                 return;
7028
7029         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7030         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7031                 LC_CURRENT_DATA_RATE_SHIFT;
7032         if (mask & DRM_PCIE_SPEED_80) {
7033                 if (current_data_rate == 2) {
7034                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7035                         return;
7036                 }
7037                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7038         } else if (mask & DRM_PCIE_SPEED_50) {
7039                 if (current_data_rate == 1) {
7040                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7041                         return;
7042                 }
7043                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7044         }
7045
7046         bridge_pos = pci_pcie_cap(root);
7047         if (!bridge_pos)
7048                 return;
7049
7050         gpu_pos = pci_pcie_cap(rdev->pdev);
7051         if (!gpu_pos)
7052                 return;
7053
7054         if (mask & DRM_PCIE_SPEED_80) {
7055                 /* re-try equalization if gen3 is not already enabled */
7056                 if (current_data_rate != 2) {
7057                         u16 bridge_cfg, gpu_cfg;
7058                         u16 bridge_cfg2, gpu_cfg2;
7059                         u32 max_lw, current_lw, tmp;
7060
7061                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7062                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7063
7064                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7065                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7066
7067                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7068                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7069
7070                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7071                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7072                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7073
7074                         if (current_lw < max_lw) {
7075                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7076                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7077                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7078                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7079                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7080                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7081                                 }
7082                         }
7083
7084                         for (i = 0; i < 10; i++) {
7085                                 /* check status */
7086                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7087                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7088                                         break;
7089
7090                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7091                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7092
7093                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7094                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7095
7096                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7097                                 tmp |= LC_SET_QUIESCE;
7098                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7099
7100                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7101                                 tmp |= LC_REDO_EQ;
7102                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7103
7104                                 mdelay(100);
7105
7106                                 /* linkctl */
7107                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7108                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7109                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7110                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7111
7112                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7113                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7114                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7115                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7116
7117                                 /* linkctl2 */
7118                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7119                                 tmp16 &= ~((1 << 4) | (7 << 9));
7120                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7121                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7122
7123                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7124                                 tmp16 &= ~((1 << 4) | (7 << 9));
7125                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7126                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7127
7128                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7129                                 tmp &= ~LC_SET_QUIESCE;
7130                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7131                         }
7132                 }
7133         }
7134
7135         /* set the link speed */
7136         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7137         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7138         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7139
7140         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7141         tmp16 &= ~0xf;
7142         if (mask & DRM_PCIE_SPEED_80)
7143                 tmp16 |= 3; /* gen3 */
7144         else if (mask & DRM_PCIE_SPEED_50)
7145                 tmp16 |= 2; /* gen2 */
7146         else
7147                 tmp16 |= 1; /* gen1 */
7148         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7149
7150         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7151         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7152         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7153
7154         for (i = 0; i < rdev->usec_timeout; i++) {
7155                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7156                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7157                         break;
7158                 udelay(1);
7159         }
7160 }
7161
7162 static void si_program_aspm(struct radeon_device *rdev)
7163 {
7164         u32 data, orig;
7165         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7166         bool disable_clkreq = false;
7167
7168         if (radeon_aspm == 0)
7169                 return;
7170
7171         if (!(rdev->flags & RADEON_IS_PCIE))
7172                 return;
7173
7174         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7175         data &= ~LC_XMIT_N_FTS_MASK;
7176         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7177         if (orig != data)
7178                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7179
7180         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7181         data |= LC_GO_TO_RECOVERY;
7182         if (orig != data)
7183                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7184
7185         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7186         data |= P_IGNORE_EDB_ERR;
7187         if (orig != data)
7188                 WREG32_PCIE(PCIE_P_CNTL, data);
7189
7190         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7191         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7192         data |= LC_PMI_TO_L1_DIS;
7193         if (!disable_l0s)
7194                 data |= LC_L0S_INACTIVITY(7);
7195
7196         if (!disable_l1) {
7197                 data |= LC_L1_INACTIVITY(7);
7198                 data &= ~LC_PMI_TO_L1_DIS;
7199                 if (orig != data)
7200                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7201
7202                 if (!disable_plloff_in_l1) {
7203                         bool clk_req_support;
7204
7205                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7206                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7207                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7208                         if (orig != data)
7209                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7210
7211                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7212                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7213                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7214                         if (orig != data)
7215                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7216
7217                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7218                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7219                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7220                         if (orig != data)
7221                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7222
7223                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7224                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7225                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7226                         if (orig != data)
7227                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7228
7229                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7230                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7231                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7232                                 if (orig != data)
7233                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7234
7235                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7236                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7237                                 if (orig != data)
7238                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7239
7240                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7241                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7242                                 if (orig != data)
7243                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7244
7245                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7246                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7247                                 if (orig != data)
7248                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7249
7250                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7251                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7252                                 if (orig != data)
7253                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7254
7255                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7256                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7257                                 if (orig != data)
7258                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7259
7260                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7261                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7262                                 if (orig != data)
7263                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7264
7265                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7266                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7267                                 if (orig != data)
7268                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7269                         }
7270                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7271                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7272                         data |= LC_DYN_LANES_PWR_STATE(3);
7273                         if (orig != data)
7274                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7275
7276                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7277                         data &= ~LS2_EXIT_TIME_MASK;
7278                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7279                                 data |= LS2_EXIT_TIME(5);
7280                         if (orig != data)
7281                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7282
7283                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7284                         data &= ~LS2_EXIT_TIME_MASK;
7285                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7286                                 data |= LS2_EXIT_TIME(5);
7287                         if (orig != data)
7288                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7289
7290                         if (!disable_clkreq &&
7291                             !pci_is_root_bus(rdev->pdev->bus)) {
7292                                 struct pci_dev *root = rdev->pdev->bus->self;
7293                                 u32 lnkcap;
7294
7295                                 clk_req_support = false;
7296                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7297                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7298                                         clk_req_support = true;
7299                         } else {
7300                                 clk_req_support = false;
7301                         }
7302
7303                         if (clk_req_support) {
7304                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7305                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7306                                 if (orig != data)
7307                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7308
7309                                 orig = data = RREG32(THM_CLK_CNTL);
7310                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7311                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7312                                 if (orig != data)
7313                                         WREG32(THM_CLK_CNTL, data);
7314
7315                                 orig = data = RREG32(MISC_CLK_CNTL);
7316                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7317                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7318                                 if (orig != data)
7319                                         WREG32(MISC_CLK_CNTL, data);
7320
7321                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7322                                 data &= ~BCLK_AS_XCLK;
7323                                 if (orig != data)
7324                                         WREG32(CG_CLKPIN_CNTL, data);
7325
7326                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7327                                 data &= ~FORCE_BIF_REFCLK_EN;
7328                                 if (orig != data)
7329                                         WREG32(CG_CLKPIN_CNTL_2, data);
7330
7331                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7332                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7333                                 data |= MPLL_CLKOUT_SEL(4);
7334                                 if (orig != data)
7335                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7336
7337                                 orig = data = RREG32(SPLL_CNTL_MODE);
7338                                 data &= ~SPLL_REFCLK_SEL_MASK;
7339                                 if (orig != data)
7340                                         WREG32(SPLL_CNTL_MODE, data);
7341                         }
7342                 }
7343         } else {
7344                 if (orig != data)
7345                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7346         }
7347
7348         orig = data = RREG32_PCIE(PCIE_CNTL2);
7349         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7350         if (orig != data)
7351                 WREG32_PCIE(PCIE_CNTL2, data);
7352
7353         if (!disable_l0s) {
7354                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7355                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7356                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7357                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7358                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7359                                 data &= ~LC_L0S_INACTIVITY_MASK;
7360                                 if (orig != data)
7361                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7362                         }
7363                 }
7364         }
7365 }
7366
7367 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7368 {
7369         unsigned i;
7370
7371         /* make sure VCEPLL_CTLREQ is deasserted */
7372         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7373
7374         mdelay(10);
7375
7376         /* assert UPLL_CTLREQ */
7377         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7378
7379         /* wait for CTLACK and CTLACK2 to get asserted */
7380         for (i = 0; i < 100; ++i) {
7381                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7382                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7383                         break;
7384                 mdelay(10);
7385         }
7386
7387         /* deassert UPLL_CTLREQ */
7388         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7389
7390         if (i == 100) {
7391                 DRM_ERROR("Timeout setting UVD clocks!\n");
7392                 return -ETIMEDOUT;
7393         }
7394
7395         return 0;
7396 }
7397
7398 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7399 {
7400         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7401         int r;
7402
7403         /* bypass evclk and ecclk with bclk */
7404         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7405                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7406                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7407
7408         /* put PLL in bypass mode */
7409         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7410                      ~VCEPLL_BYPASS_EN_MASK);
7411
7412         if (!evclk || !ecclk) {
7413                 /* keep the Bypass mode, put PLL to sleep */
7414                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7415                              ~VCEPLL_SLEEP_MASK);
7416                 return 0;
7417         }
7418
7419         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7420                                           16384, 0x03FFFFFF, 0, 128, 5,
7421                                           &fb_div, &evclk_div, &ecclk_div);
7422         if (r)
7423                 return r;
7424
7425         /* set RESET_ANTI_MUX to 0 */
7426         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7427
7428         /* set VCO_MODE to 1 */
7429         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7430                      ~VCEPLL_VCO_MODE_MASK);
7431
7432         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7433         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7434                      ~VCEPLL_SLEEP_MASK);
7435         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7436
7437         /* deassert VCEPLL_RESET */
7438         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7439
7440         mdelay(1);
7441
7442         r = si_vce_send_vcepll_ctlreq(rdev);
7443         if (r)
7444                 return r;
7445
7446         /* assert VCEPLL_RESET again */
7447         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7448
7449         /* disable spread spectrum. */
7450         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7451
7452         /* set feedback divider */
7453         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7454
7455         /* set ref divider to 0 */
7456         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7457
7458         /* set PDIV_A and PDIV_B */
7459         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7460                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7461                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7462
7463         /* give the PLL some time to settle */
7464         mdelay(15);
7465
7466         /* deassert PLL_RESET */
7467         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7468
7469         mdelay(15);
7470
7471         /* switch from bypass mode to normal mode */
7472         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7473
7474         r = si_vce_send_vcepll_ctlreq(rdev);
7475         if (r)
7476                 return r;
7477
7478         /* switch VCLK and DCLK selection */
7479         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7480                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7481                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7482
7483         mdelay(100);
7484
7485         return 0;
7486 }