GNU Linux-libre 4.9.309-gnu1
[releases.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 /*(DEBLOBBED)*/
39
40 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
41 extern void r600_ih_ring_fini(struct radeon_device *rdev);
42 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
43 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
44 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
45 extern void sumo_rlc_fini(struct radeon_device *rdev);
46 extern int sumo_rlc_init(struct radeon_device *rdev);
47 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
48 extern void si_rlc_reset(struct radeon_device *rdev);
49 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
50 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
51 extern int cik_sdma_resume(struct radeon_device *rdev);
52 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
53 extern void cik_sdma_fini(struct radeon_device *rdev);
54 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
55 static void cik_rlc_stop(struct radeon_device *rdev);
56 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
57 static void cik_program_aspm(struct radeon_device *rdev);
58 static void cik_init_pg(struct radeon_device *rdev);
59 static void cik_init_cg(struct radeon_device *rdev);
60 static void cik_fini_pg(struct radeon_device *rdev);
61 static void cik_fini_cg(struct radeon_device *rdev);
62 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
63                                           bool enable);
64
65 /**
66  * cik_get_allowed_info_register - fetch the register for the info ioctl
67  *
68  * @rdev: radeon_device pointer
69  * @reg: register offset in bytes
70  * @val: register value
71  *
72  * Returns 0 for success or -EINVAL for an invalid register
73  *
74  */
75 int cik_get_allowed_info_register(struct radeon_device *rdev,
76                                   u32 reg, u32 *val)
77 {
78         switch (reg) {
79         case GRBM_STATUS:
80         case GRBM_STATUS2:
81         case GRBM_STATUS_SE0:
82         case GRBM_STATUS_SE1:
83         case GRBM_STATUS_SE2:
84         case GRBM_STATUS_SE3:
85         case SRBM_STATUS:
86         case SRBM_STATUS2:
87         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
88         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
89         case UVD_STATUS:
90         /* TODO VCE */
91                 *val = RREG32(reg);
92                 return 0;
93         default:
94                 return -EINVAL;
95         }
96 }
97
98 /*
99  * Indirect registers accessor
100  */
101 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
102 {
103         unsigned long flags;
104         u32 r;
105
106         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
107         WREG32(CIK_DIDT_IND_INDEX, (reg));
108         r = RREG32(CIK_DIDT_IND_DATA);
109         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
110         return r;
111 }
112
113 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
114 {
115         unsigned long flags;
116
117         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
118         WREG32(CIK_DIDT_IND_INDEX, (reg));
119         WREG32(CIK_DIDT_IND_DATA, (v));
120         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
121 }
122
123 /* get temperature in millidegrees */
124 int ci_get_temp(struct radeon_device *rdev)
125 {
126         u32 temp;
127         int actual_temp = 0;
128
129         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
130                 CTF_TEMP_SHIFT;
131
132         if (temp & 0x200)
133                 actual_temp = 255;
134         else
135                 actual_temp = temp & 0x1ff;
136
137         actual_temp = actual_temp * 1000;
138
139         return actual_temp;
140 }
141
142 /* get temperature in millidegrees */
143 int kv_get_temp(struct radeon_device *rdev)
144 {
145         u32 temp;
146         int actual_temp = 0;
147
148         temp = RREG32_SMC(0xC0300E0C);
149
150         if (temp)
151                 actual_temp = (temp / 8) - 49;
152         else
153                 actual_temp = 0;
154
155         actual_temp = actual_temp * 1000;
156
157         return actual_temp;
158 }
159
160 /*
161  * Indirect registers accessor
162  */
163 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
164 {
165         unsigned long flags;
166         u32 r;
167
168         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
169         WREG32(PCIE_INDEX, reg);
170         (void)RREG32(PCIE_INDEX);
171         r = RREG32(PCIE_DATA);
172         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
173         return r;
174 }
175
176 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
177 {
178         unsigned long flags;
179
180         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
181         WREG32(PCIE_INDEX, reg);
182         (void)RREG32(PCIE_INDEX);
183         WREG32(PCIE_DATA, v);
184         (void)RREG32(PCIE_DATA);
185         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
186 }
187
188 static const u32 spectre_rlc_save_restore_register_list[] =
189 {
190         (0x0e00 << 16) | (0xc12c >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0xc140 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0xc150 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0xc15c >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0xc168 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0xc170 >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0xc178 >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0xc204 >> 2),
205         0x00000000,
206         (0x0e00 << 16) | (0xc2b4 >> 2),
207         0x00000000,
208         (0x0e00 << 16) | (0xc2b8 >> 2),
209         0x00000000,
210         (0x0e00 << 16) | (0xc2bc >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc2c0 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0x8228 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0x829c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x869c >> 2),
219         0x00000000,
220         (0x0600 << 16) | (0x98f4 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0x98f8 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x9900 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc260 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0x90e8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0x3c000 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0x3c00c >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8c1c >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x9700 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xcd20 >> 2),
239         0x00000000,
240         (0x4e00 << 16) | (0xcd20 >> 2),
241         0x00000000,
242         (0x5e00 << 16) | (0xcd20 >> 2),
243         0x00000000,
244         (0x6e00 << 16) | (0xcd20 >> 2),
245         0x00000000,
246         (0x7e00 << 16) | (0xcd20 >> 2),
247         0x00000000,
248         (0x8e00 << 16) | (0xcd20 >> 2),
249         0x00000000,
250         (0x9e00 << 16) | (0xcd20 >> 2),
251         0x00000000,
252         (0xae00 << 16) | (0xcd20 >> 2),
253         0x00000000,
254         (0xbe00 << 16) | (0xcd20 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x89bc >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0x8900 >> 2),
259         0x00000000,
260         0x3,
261         (0x0e00 << 16) | (0xc130 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc134 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc1fc >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc208 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc264 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc268 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc26c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc270 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc274 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc278 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc27c >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc280 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc284 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc288 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc28c >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc290 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc294 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc298 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc29c >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc2a0 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc2a4 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc2a8 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc2ac  >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc2b0 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x301d0 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x30238 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x30250 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x30254 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x30258 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x3025c >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xc900 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xc900 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xc900 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xc900 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xc900 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xc900 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xc900 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xc900 >> 2),
336         0x00000000,
337         (0x4e00 << 16) | (0xc904 >> 2),
338         0x00000000,
339         (0x5e00 << 16) | (0xc904 >> 2),
340         0x00000000,
341         (0x6e00 << 16) | (0xc904 >> 2),
342         0x00000000,
343         (0x7e00 << 16) | (0xc904 >> 2),
344         0x00000000,
345         (0x8e00 << 16) | (0xc904 >> 2),
346         0x00000000,
347         (0x9e00 << 16) | (0xc904 >> 2),
348         0x00000000,
349         (0xae00 << 16) | (0xc904 >> 2),
350         0x00000000,
351         (0xbe00 << 16) | (0xc904 >> 2),
352         0x00000000,
353         (0x4e00 << 16) | (0xc908 >> 2),
354         0x00000000,
355         (0x5e00 << 16) | (0xc908 >> 2),
356         0x00000000,
357         (0x6e00 << 16) | (0xc908 >> 2),
358         0x00000000,
359         (0x7e00 << 16) | (0xc908 >> 2),
360         0x00000000,
361         (0x8e00 << 16) | (0xc908 >> 2),
362         0x00000000,
363         (0x9e00 << 16) | (0xc908 >> 2),
364         0x00000000,
365         (0xae00 << 16) | (0xc908 >> 2),
366         0x00000000,
367         (0xbe00 << 16) | (0xc908 >> 2),
368         0x00000000,
369         (0x4e00 << 16) | (0xc90c >> 2),
370         0x00000000,
371         (0x5e00 << 16) | (0xc90c >> 2),
372         0x00000000,
373         (0x6e00 << 16) | (0xc90c >> 2),
374         0x00000000,
375         (0x7e00 << 16) | (0xc90c >> 2),
376         0x00000000,
377         (0x8e00 << 16) | (0xc90c >> 2),
378         0x00000000,
379         (0x9e00 << 16) | (0xc90c >> 2),
380         0x00000000,
381         (0xae00 << 16) | (0xc90c >> 2),
382         0x00000000,
383         (0xbe00 << 16) | (0xc90c >> 2),
384         0x00000000,
385         (0x4e00 << 16) | (0xc910 >> 2),
386         0x00000000,
387         (0x5e00 << 16) | (0xc910 >> 2),
388         0x00000000,
389         (0x6e00 << 16) | (0xc910 >> 2),
390         0x00000000,
391         (0x7e00 << 16) | (0xc910 >> 2),
392         0x00000000,
393         (0x8e00 << 16) | (0xc910 >> 2),
394         0x00000000,
395         (0x9e00 << 16) | (0xc910 >> 2),
396         0x00000000,
397         (0xae00 << 16) | (0xc910 >> 2),
398         0x00000000,
399         (0xbe00 << 16) | (0xc910 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0xc99c >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0x9834 >> 2),
404         0x00000000,
405         (0x0000 << 16) | (0x30f00 >> 2),
406         0x00000000,
407         (0x0001 << 16) | (0x30f00 >> 2),
408         0x00000000,
409         (0x0000 << 16) | (0x30f04 >> 2),
410         0x00000000,
411         (0x0001 << 16) | (0x30f04 >> 2),
412         0x00000000,
413         (0x0000 << 16) | (0x30f08 >> 2),
414         0x00000000,
415         (0x0001 << 16) | (0x30f08 >> 2),
416         0x00000000,
417         (0x0000 << 16) | (0x30f0c >> 2),
418         0x00000000,
419         (0x0001 << 16) | (0x30f0c >> 2),
420         0x00000000,
421         (0x0600 << 16) | (0x9b7c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x8a14 >> 2),
424         0x00000000,
425         (0x0e00 << 16) | (0x8a18 >> 2),
426         0x00000000,
427         (0x0600 << 16) | (0x30a00 >> 2),
428         0x00000000,
429         (0x0e00 << 16) | (0x8bf0 >> 2),
430         0x00000000,
431         (0x0e00 << 16) | (0x8bcc >> 2),
432         0x00000000,
433         (0x0e00 << 16) | (0x8b24 >> 2),
434         0x00000000,
435         (0x0e00 << 16) | (0x30a04 >> 2),
436         0x00000000,
437         (0x0600 << 16) | (0x30a10 >> 2),
438         0x00000000,
439         (0x0600 << 16) | (0x30a14 >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x30a18 >> 2),
442         0x00000000,
443         (0x0600 << 16) | (0x30a2c >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0xc700 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0xc704 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0xc708 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0xc768 >> 2),
452         0x00000000,
453         (0x0400 << 16) | (0xc770 >> 2),
454         0x00000000,
455         (0x0400 << 16) | (0xc774 >> 2),
456         0x00000000,
457         (0x0400 << 16) | (0xc778 >> 2),
458         0x00000000,
459         (0x0400 << 16) | (0xc77c >> 2),
460         0x00000000,
461         (0x0400 << 16) | (0xc780 >> 2),
462         0x00000000,
463         (0x0400 << 16) | (0xc784 >> 2),
464         0x00000000,
465         (0x0400 << 16) | (0xc788 >> 2),
466         0x00000000,
467         (0x0400 << 16) | (0xc78c >> 2),
468         0x00000000,
469         (0x0400 << 16) | (0xc798 >> 2),
470         0x00000000,
471         (0x0400 << 16) | (0xc79c >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc7a0 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc7a4 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc7a8 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc7ac >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc7b0 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc7b4 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x9100 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x3c010 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x92a8 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0x92ac >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0x92b4 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0x92b8 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x92bc >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0x92c0 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0x92c4 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0x92c8 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x92cc >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x92d0 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x8c00 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x8c04 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x8c20 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x8c38 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x8c3c >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0xae00 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x9604 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0xac08 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0xac0c >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0xac10 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0xac14 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0xac58 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0xac68 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0xac6c >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0xac70 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xac74 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xac78 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac7c >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac80 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac84 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac88 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac8c >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x970c >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x9714 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x9718 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x971c >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x31068 >> 2),
562         0x00000000,
563         (0x4e00 << 16) | (0x31068 >> 2),
564         0x00000000,
565         (0x5e00 << 16) | (0x31068 >> 2),
566         0x00000000,
567         (0x6e00 << 16) | (0x31068 >> 2),
568         0x00000000,
569         (0x7e00 << 16) | (0x31068 >> 2),
570         0x00000000,
571         (0x8e00 << 16) | (0x31068 >> 2),
572         0x00000000,
573         (0x9e00 << 16) | (0x31068 >> 2),
574         0x00000000,
575         (0xae00 << 16) | (0x31068 >> 2),
576         0x00000000,
577         (0xbe00 << 16) | (0x31068 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0xcd10 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xcd14 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x88b0 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x88b4 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x88b8 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x88bc >> 2),
590         0x00000000,
591         (0x0400 << 16) | (0x89c0 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0x88c4 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x88c8 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0x88d0 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0x88d4 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0x88d8 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x8980 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x30938 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x3093c >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x30940 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x89a0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x30900 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x30904 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x89b4 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x3c210 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x3c214 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x3c218 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x8904 >> 2),
626         0x00000000,
627         0x5,
628         (0x0e00 << 16) | (0x8c28 >> 2),
629         (0x0e00 << 16) | (0x8c2c >> 2),
630         (0x0e00 << 16) | (0x8c30 >> 2),
631         (0x0e00 << 16) | (0x8c34 >> 2),
632         (0x0e00 << 16) | (0x9600 >> 2),
633 };
634
635 static const u32 kalindi_rlc_save_restore_register_list[] =
636 {
637         (0x0e00 << 16) | (0xc12c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0xc140 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0xc150 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xc15c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0xc168 >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0xc170 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0xc204 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xc2b4 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xc2b8 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xc2bc >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc2c0 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x8228 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x829c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x869c >> 2),
664         0x00000000,
665         (0x0600 << 16) | (0x98f4 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x98f8 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x9900 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc260 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x90e8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x3c000 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x3c00c >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8c1c >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x9700 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xcd20 >> 2),
684         0x00000000,
685         (0x4e00 << 16) | (0xcd20 >> 2),
686         0x00000000,
687         (0x5e00 << 16) | (0xcd20 >> 2),
688         0x00000000,
689         (0x6e00 << 16) | (0xcd20 >> 2),
690         0x00000000,
691         (0x7e00 << 16) | (0xcd20 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x89bc >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x8900 >> 2),
696         0x00000000,
697         0x3,
698         (0x0e00 << 16) | (0xc130 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0xc134 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0xc1fc >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0xc208 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0xc264 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0xc268 >> 2),
709         0x00000000,
710         (0x0e00 << 16) | (0xc26c >> 2),
711         0x00000000,
712         (0x0e00 << 16) | (0xc270 >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0xc274 >> 2),
715         0x00000000,
716         (0x0e00 << 16) | (0xc28c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc290 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc294 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc298 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc2a0 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc2a4 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc2a8 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2ac >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0x301d0 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0x30238 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0x30250 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x30254 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x30258 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x3025c >> 2),
743         0x00000000,
744         (0x4e00 << 16) | (0xc900 >> 2),
745         0x00000000,
746         (0x5e00 << 16) | (0xc900 >> 2),
747         0x00000000,
748         (0x6e00 << 16) | (0xc900 >> 2),
749         0x00000000,
750         (0x7e00 << 16) | (0xc900 >> 2),
751         0x00000000,
752         (0x4e00 << 16) | (0xc904 >> 2),
753         0x00000000,
754         (0x5e00 << 16) | (0xc904 >> 2),
755         0x00000000,
756         (0x6e00 << 16) | (0xc904 >> 2),
757         0x00000000,
758         (0x7e00 << 16) | (0xc904 >> 2),
759         0x00000000,
760         (0x4e00 << 16) | (0xc908 >> 2),
761         0x00000000,
762         (0x5e00 << 16) | (0xc908 >> 2),
763         0x00000000,
764         (0x6e00 << 16) | (0xc908 >> 2),
765         0x00000000,
766         (0x7e00 << 16) | (0xc908 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xc90c >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xc90c >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xc90c >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xc90c >> 2),
775         0x00000000,
776         (0x4e00 << 16) | (0xc910 >> 2),
777         0x00000000,
778         (0x5e00 << 16) | (0xc910 >> 2),
779         0x00000000,
780         (0x6e00 << 16) | (0xc910 >> 2),
781         0x00000000,
782         (0x7e00 << 16) | (0xc910 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc99c >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0x9834 >> 2),
787         0x00000000,
788         (0x0000 << 16) | (0x30f00 >> 2),
789         0x00000000,
790         (0x0000 << 16) | (0x30f04 >> 2),
791         0x00000000,
792         (0x0000 << 16) | (0x30f08 >> 2),
793         0x00000000,
794         (0x0000 << 16) | (0x30f0c >> 2),
795         0x00000000,
796         (0x0600 << 16) | (0x9b7c >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x8a14 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x8a18 >> 2),
801         0x00000000,
802         (0x0600 << 16) | (0x30a00 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8bf0 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8bcc >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8b24 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x30a04 >> 2),
811         0x00000000,
812         (0x0600 << 16) | (0x30a10 >> 2),
813         0x00000000,
814         (0x0600 << 16) | (0x30a14 >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x30a18 >> 2),
817         0x00000000,
818         (0x0600 << 16) | (0x30a2c >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xc700 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xc704 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xc708 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xc768 >> 2),
827         0x00000000,
828         (0x0400 << 16) | (0xc770 >> 2),
829         0x00000000,
830         (0x0400 << 16) | (0xc774 >> 2),
831         0x00000000,
832         (0x0400 << 16) | (0xc798 >> 2),
833         0x00000000,
834         (0x0400 << 16) | (0xc79c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0x9100 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0x3c010 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0x8c00 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0x8c04 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0x8c20 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x8c38 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x8c3c >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0xae00 >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x9604 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0xac08 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0xac0c >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0xac10 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0xac14 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0xac58 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xac68 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0xac6c >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xac70 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xac74 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0xac78 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac7c >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac80 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac84 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac88 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac8c >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x970c >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x9714 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x9718 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x971c >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x31068 >> 2),
893         0x00000000,
894         (0x4e00 << 16) | (0x31068 >> 2),
895         0x00000000,
896         (0x5e00 << 16) | (0x31068 >> 2),
897         0x00000000,
898         (0x6e00 << 16) | (0x31068 >> 2),
899         0x00000000,
900         (0x7e00 << 16) | (0x31068 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xcd10 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xcd14 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x88b0 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x88b4 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x88b8 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x88bc >> 2),
913         0x00000000,
914         (0x0400 << 16) | (0x89c0 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x88c4 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0x88c8 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x88d0 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x88d4 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0x88d8 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x8980 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x30938 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x3093c >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x30940 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0x89a0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x30900 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x30904 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x89b4 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x3e1fc >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x3c210 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x3c214 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x3c218 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x8904 >> 2),
951         0x00000000,
952         0x5,
953         (0x0e00 << 16) | (0x8c28 >> 2),
954         (0x0e00 << 16) | (0x8c2c >> 2),
955         (0x0e00 << 16) | (0x8c30 >> 2),
956         (0x0e00 << 16) | (0x8c34 >> 2),
957         (0x0e00 << 16) | (0x9600 >> 2),
958 };
959
960 static const u32 bonaire_golden_spm_registers[] =
961 {
962         0x30800, 0xe0ffffff, 0xe0000000
963 };
964
965 static const u32 bonaire_golden_common_registers[] =
966 {
967         0xc770, 0xffffffff, 0x00000800,
968         0xc774, 0xffffffff, 0x00000800,
969         0xc798, 0xffffffff, 0x00007fbf,
970         0xc79c, 0xffffffff, 0x00007faf
971 };
972
973 static const u32 bonaire_golden_registers[] =
974 {
975         0x3354, 0x00000333, 0x00000333,
976         0x3350, 0x000c0fc0, 0x00040200,
977         0x9a10, 0x00010000, 0x00058208,
978         0x3c000, 0xffff1fff, 0x00140000,
979         0x3c200, 0xfdfc0fff, 0x00000100,
980         0x3c234, 0x40000000, 0x40000200,
981         0x9830, 0xffffffff, 0x00000000,
982         0x9834, 0xf00fffff, 0x00000400,
983         0x9838, 0x0002021c, 0x00020200,
984         0xc78, 0x00000080, 0x00000000,
985         0x5bb0, 0x000000f0, 0x00000070,
986         0x5bc0, 0xf0311fff, 0x80300000,
987         0x98f8, 0x73773777, 0x12010001,
988         0x350c, 0x00810000, 0x408af000,
989         0x7030, 0x31000111, 0x00000011,
990         0x2f48, 0x73773777, 0x12010001,
991         0x220c, 0x00007fb6, 0x0021a1b1,
992         0x2210, 0x00007fb6, 0x002021b1,
993         0x2180, 0x00007fb6, 0x00002191,
994         0x2218, 0x00007fb6, 0x002121b1,
995         0x221c, 0x00007fb6, 0x002021b1,
996         0x21dc, 0x00007fb6, 0x00002191,
997         0x21e0, 0x00007fb6, 0x00002191,
998         0x3628, 0x0000003f, 0x0000000a,
999         0x362c, 0x0000003f, 0x0000000a,
1000         0x2ae4, 0x00073ffe, 0x000022a2,
1001         0x240c, 0x000007ff, 0x00000000,
1002         0x8a14, 0xf000003f, 0x00000007,
1003         0x8bf0, 0x00002001, 0x00000001,
1004         0x8b24, 0xffffffff, 0x00ffffff,
1005         0x30a04, 0x0000ff0f, 0x00000000,
1006         0x28a4c, 0x07ffffff, 0x06000000,
1007         0x4d8, 0x00000fff, 0x00000100,
1008         0x3e78, 0x00000001, 0x00000002,
1009         0x9100, 0x03000000, 0x0362c688,
1010         0x8c00, 0x000000ff, 0x00000001,
1011         0xe40, 0x00001fff, 0x00001fff,
1012         0x9060, 0x0000007f, 0x00000020,
1013         0x9508, 0x00010000, 0x00010000,
1014         0xac14, 0x000003ff, 0x000000f3,
1015         0xac0c, 0xffffffff, 0x00001032
1016 };
1017
1018 static const u32 bonaire_mgcg_cgcg_init[] =
1019 {
1020         0xc420, 0xffffffff, 0xfffffffc,
1021         0x30800, 0xffffffff, 0xe0000000,
1022         0x3c2a0, 0xffffffff, 0x00000100,
1023         0x3c208, 0xffffffff, 0x00000100,
1024         0x3c2c0, 0xffffffff, 0xc0000100,
1025         0x3c2c8, 0xffffffff, 0xc0000100,
1026         0x3c2c4, 0xffffffff, 0xc0000100,
1027         0x55e4, 0xffffffff, 0x00600100,
1028         0x3c280, 0xffffffff, 0x00000100,
1029         0x3c214, 0xffffffff, 0x06000100,
1030         0x3c220, 0xffffffff, 0x00000100,
1031         0x3c218, 0xffffffff, 0x06000100,
1032         0x3c204, 0xffffffff, 0x00000100,
1033         0x3c2e0, 0xffffffff, 0x00000100,
1034         0x3c224, 0xffffffff, 0x00000100,
1035         0x3c200, 0xffffffff, 0x00000100,
1036         0x3c230, 0xffffffff, 0x00000100,
1037         0x3c234, 0xffffffff, 0x00000100,
1038         0x3c250, 0xffffffff, 0x00000100,
1039         0x3c254, 0xffffffff, 0x00000100,
1040         0x3c258, 0xffffffff, 0x00000100,
1041         0x3c25c, 0xffffffff, 0x00000100,
1042         0x3c260, 0xffffffff, 0x00000100,
1043         0x3c27c, 0xffffffff, 0x00000100,
1044         0x3c278, 0xffffffff, 0x00000100,
1045         0x3c210, 0xffffffff, 0x06000100,
1046         0x3c290, 0xffffffff, 0x00000100,
1047         0x3c274, 0xffffffff, 0x00000100,
1048         0x3c2b4, 0xffffffff, 0x00000100,
1049         0x3c2b0, 0xffffffff, 0x00000100,
1050         0x3c270, 0xffffffff, 0x00000100,
1051         0x30800, 0xffffffff, 0xe0000000,
1052         0x3c020, 0xffffffff, 0x00010000,
1053         0x3c024, 0xffffffff, 0x00030002,
1054         0x3c028, 0xffffffff, 0x00040007,
1055         0x3c02c, 0xffffffff, 0x00060005,
1056         0x3c030, 0xffffffff, 0x00090008,
1057         0x3c034, 0xffffffff, 0x00010000,
1058         0x3c038, 0xffffffff, 0x00030002,
1059         0x3c03c, 0xffffffff, 0x00040007,
1060         0x3c040, 0xffffffff, 0x00060005,
1061         0x3c044, 0xffffffff, 0x00090008,
1062         0x3c048, 0xffffffff, 0x00010000,
1063         0x3c04c, 0xffffffff, 0x00030002,
1064         0x3c050, 0xffffffff, 0x00040007,
1065         0x3c054, 0xffffffff, 0x00060005,
1066         0x3c058, 0xffffffff, 0x00090008,
1067         0x3c05c, 0xffffffff, 0x00010000,
1068         0x3c060, 0xffffffff, 0x00030002,
1069         0x3c064, 0xffffffff, 0x00040007,
1070         0x3c068, 0xffffffff, 0x00060005,
1071         0x3c06c, 0xffffffff, 0x00090008,
1072         0x3c070, 0xffffffff, 0x00010000,
1073         0x3c074, 0xffffffff, 0x00030002,
1074         0x3c078, 0xffffffff, 0x00040007,
1075         0x3c07c, 0xffffffff, 0x00060005,
1076         0x3c080, 0xffffffff, 0x00090008,
1077         0x3c084, 0xffffffff, 0x00010000,
1078         0x3c088, 0xffffffff, 0x00030002,
1079         0x3c08c, 0xffffffff, 0x00040007,
1080         0x3c090, 0xffffffff, 0x00060005,
1081         0x3c094, 0xffffffff, 0x00090008,
1082         0x3c098, 0xffffffff, 0x00010000,
1083         0x3c09c, 0xffffffff, 0x00030002,
1084         0x3c0a0, 0xffffffff, 0x00040007,
1085         0x3c0a4, 0xffffffff, 0x00060005,
1086         0x3c0a8, 0xffffffff, 0x00090008,
1087         0x3c000, 0xffffffff, 0x96e00200,
1088         0x8708, 0xffffffff, 0x00900100,
1089         0xc424, 0xffffffff, 0x0020003f,
1090         0x38, 0xffffffff, 0x0140001c,
1091         0x3c, 0x000f0000, 0x000f0000,
1092         0x220, 0xffffffff, 0xC060000C,
1093         0x224, 0xc0000fff, 0x00000100,
1094         0xf90, 0xffffffff, 0x00000100,
1095         0xf98, 0x00000101, 0x00000000,
1096         0x20a8, 0xffffffff, 0x00000104,
1097         0x55e4, 0xff000fff, 0x00000100,
1098         0x30cc, 0xc0000fff, 0x00000104,
1099         0xc1e4, 0x00000001, 0x00000001,
1100         0xd00c, 0xff000ff0, 0x00000100,
1101         0xd80c, 0xff000ff0, 0x00000100
1102 };
1103
1104 static const u32 spectre_golden_spm_registers[] =
1105 {
1106         0x30800, 0xe0ffffff, 0xe0000000
1107 };
1108
1109 static const u32 spectre_golden_common_registers[] =
1110 {
1111         0xc770, 0xffffffff, 0x00000800,
1112         0xc774, 0xffffffff, 0x00000800,
1113         0xc798, 0xffffffff, 0x00007fbf,
1114         0xc79c, 0xffffffff, 0x00007faf
1115 };
1116
1117 static const u32 spectre_golden_registers[] =
1118 {
1119         0x3c000, 0xffff1fff, 0x96940200,
1120         0x3c00c, 0xffff0001, 0xff000000,
1121         0x3c200, 0xfffc0fff, 0x00000100,
1122         0x6ed8, 0x00010101, 0x00010000,
1123         0x9834, 0xf00fffff, 0x00000400,
1124         0x9838, 0xfffffffc, 0x00020200,
1125         0x5bb0, 0x000000f0, 0x00000070,
1126         0x5bc0, 0xf0311fff, 0x80300000,
1127         0x98f8, 0x73773777, 0x12010001,
1128         0x9b7c, 0x00ff0000, 0x00fc0000,
1129         0x2f48, 0x73773777, 0x12010001,
1130         0x8a14, 0xf000003f, 0x00000007,
1131         0x8b24, 0xffffffff, 0x00ffffff,
1132         0x28350, 0x3f3f3fff, 0x00000082,
1133         0x28354, 0x0000003f, 0x00000000,
1134         0x3e78, 0x00000001, 0x00000002,
1135         0x913c, 0xffff03df, 0x00000004,
1136         0xc768, 0x00000008, 0x00000008,
1137         0x8c00, 0x000008ff, 0x00000800,
1138         0x9508, 0x00010000, 0x00010000,
1139         0xac0c, 0xffffffff, 0x54763210,
1140         0x214f8, 0x01ff01ff, 0x00000002,
1141         0x21498, 0x007ff800, 0x00200000,
1142         0x2015c, 0xffffffff, 0x00000f40,
1143         0x30934, 0xffffffff, 0x00000001
1144 };
1145
1146 static const u32 spectre_mgcg_cgcg_init[] =
1147 {
1148         0xc420, 0xffffffff, 0xfffffffc,
1149         0x30800, 0xffffffff, 0xe0000000,
1150         0x3c2a0, 0xffffffff, 0x00000100,
1151         0x3c208, 0xffffffff, 0x00000100,
1152         0x3c2c0, 0xffffffff, 0x00000100,
1153         0x3c2c8, 0xffffffff, 0x00000100,
1154         0x3c2c4, 0xffffffff, 0x00000100,
1155         0x55e4, 0xffffffff, 0x00600100,
1156         0x3c280, 0xffffffff, 0x00000100,
1157         0x3c214, 0xffffffff, 0x06000100,
1158         0x3c220, 0xffffffff, 0x00000100,
1159         0x3c218, 0xffffffff, 0x06000100,
1160         0x3c204, 0xffffffff, 0x00000100,
1161         0x3c2e0, 0xffffffff, 0x00000100,
1162         0x3c224, 0xffffffff, 0x00000100,
1163         0x3c200, 0xffffffff, 0x00000100,
1164         0x3c230, 0xffffffff, 0x00000100,
1165         0x3c234, 0xffffffff, 0x00000100,
1166         0x3c250, 0xffffffff, 0x00000100,
1167         0x3c254, 0xffffffff, 0x00000100,
1168         0x3c258, 0xffffffff, 0x00000100,
1169         0x3c25c, 0xffffffff, 0x00000100,
1170         0x3c260, 0xffffffff, 0x00000100,
1171         0x3c27c, 0xffffffff, 0x00000100,
1172         0x3c278, 0xffffffff, 0x00000100,
1173         0x3c210, 0xffffffff, 0x06000100,
1174         0x3c290, 0xffffffff, 0x00000100,
1175         0x3c274, 0xffffffff, 0x00000100,
1176         0x3c2b4, 0xffffffff, 0x00000100,
1177         0x3c2b0, 0xffffffff, 0x00000100,
1178         0x3c270, 0xffffffff, 0x00000100,
1179         0x30800, 0xffffffff, 0xe0000000,
1180         0x3c020, 0xffffffff, 0x00010000,
1181         0x3c024, 0xffffffff, 0x00030002,
1182         0x3c028, 0xffffffff, 0x00040007,
1183         0x3c02c, 0xffffffff, 0x00060005,
1184         0x3c030, 0xffffffff, 0x00090008,
1185         0x3c034, 0xffffffff, 0x00010000,
1186         0x3c038, 0xffffffff, 0x00030002,
1187         0x3c03c, 0xffffffff, 0x00040007,
1188         0x3c040, 0xffffffff, 0x00060005,
1189         0x3c044, 0xffffffff, 0x00090008,
1190         0x3c048, 0xffffffff, 0x00010000,
1191         0x3c04c, 0xffffffff, 0x00030002,
1192         0x3c050, 0xffffffff, 0x00040007,
1193         0x3c054, 0xffffffff, 0x00060005,
1194         0x3c058, 0xffffffff, 0x00090008,
1195         0x3c05c, 0xffffffff, 0x00010000,
1196         0x3c060, 0xffffffff, 0x00030002,
1197         0x3c064, 0xffffffff, 0x00040007,
1198         0x3c068, 0xffffffff, 0x00060005,
1199         0x3c06c, 0xffffffff, 0x00090008,
1200         0x3c070, 0xffffffff, 0x00010000,
1201         0x3c074, 0xffffffff, 0x00030002,
1202         0x3c078, 0xffffffff, 0x00040007,
1203         0x3c07c, 0xffffffff, 0x00060005,
1204         0x3c080, 0xffffffff, 0x00090008,
1205         0x3c084, 0xffffffff, 0x00010000,
1206         0x3c088, 0xffffffff, 0x00030002,
1207         0x3c08c, 0xffffffff, 0x00040007,
1208         0x3c090, 0xffffffff, 0x00060005,
1209         0x3c094, 0xffffffff, 0x00090008,
1210         0x3c098, 0xffffffff, 0x00010000,
1211         0x3c09c, 0xffffffff, 0x00030002,
1212         0x3c0a0, 0xffffffff, 0x00040007,
1213         0x3c0a4, 0xffffffff, 0x00060005,
1214         0x3c0a8, 0xffffffff, 0x00090008,
1215         0x3c0ac, 0xffffffff, 0x00010000,
1216         0x3c0b0, 0xffffffff, 0x00030002,
1217         0x3c0b4, 0xffffffff, 0x00040007,
1218         0x3c0b8, 0xffffffff, 0x00060005,
1219         0x3c0bc, 0xffffffff, 0x00090008,
1220         0x3c000, 0xffffffff, 0x96e00200,
1221         0x8708, 0xffffffff, 0x00900100,
1222         0xc424, 0xffffffff, 0x0020003f,
1223         0x38, 0xffffffff, 0x0140001c,
1224         0x3c, 0x000f0000, 0x000f0000,
1225         0x220, 0xffffffff, 0xC060000C,
1226         0x224, 0xc0000fff, 0x00000100,
1227         0xf90, 0xffffffff, 0x00000100,
1228         0xf98, 0x00000101, 0x00000000,
1229         0x20a8, 0xffffffff, 0x00000104,
1230         0x55e4, 0xff000fff, 0x00000100,
1231         0x30cc, 0xc0000fff, 0x00000104,
1232         0xc1e4, 0x00000001, 0x00000001,
1233         0xd00c, 0xff000ff0, 0x00000100,
1234         0xd80c, 0xff000ff0, 0x00000100
1235 };
1236
1237 static const u32 kalindi_golden_spm_registers[] =
1238 {
1239         0x30800, 0xe0ffffff, 0xe0000000
1240 };
1241
1242 static const u32 kalindi_golden_common_registers[] =
1243 {
1244         0xc770, 0xffffffff, 0x00000800,
1245         0xc774, 0xffffffff, 0x00000800,
1246         0xc798, 0xffffffff, 0x00007fbf,
1247         0xc79c, 0xffffffff, 0x00007faf
1248 };
1249
1250 static const u32 kalindi_golden_registers[] =
1251 {
1252         0x3c000, 0xffffdfff, 0x6e944040,
1253         0x55e4, 0xff607fff, 0xfc000100,
1254         0x3c220, 0xff000fff, 0x00000100,
1255         0x3c224, 0xff000fff, 0x00000100,
1256         0x3c200, 0xfffc0fff, 0x00000100,
1257         0x6ed8, 0x00010101, 0x00010000,
1258         0x9830, 0xffffffff, 0x00000000,
1259         0x9834, 0xf00fffff, 0x00000400,
1260         0x5bb0, 0x000000f0, 0x00000070,
1261         0x5bc0, 0xf0311fff, 0x80300000,
1262         0x98f8, 0x73773777, 0x12010001,
1263         0x98fc, 0xffffffff, 0x00000010,
1264         0x9b7c, 0x00ff0000, 0x00fc0000,
1265         0x8030, 0x00001f0f, 0x0000100a,
1266         0x2f48, 0x73773777, 0x12010001,
1267         0x2408, 0x000fffff, 0x000c007f,
1268         0x8a14, 0xf000003f, 0x00000007,
1269         0x8b24, 0x3fff3fff, 0x00ffcfff,
1270         0x30a04, 0x0000ff0f, 0x00000000,
1271         0x28a4c, 0x07ffffff, 0x06000000,
1272         0x4d8, 0x00000fff, 0x00000100,
1273         0x3e78, 0x00000001, 0x00000002,
1274         0xc768, 0x00000008, 0x00000008,
1275         0x8c00, 0x000000ff, 0x00000003,
1276         0x214f8, 0x01ff01ff, 0x00000002,
1277         0x21498, 0x007ff800, 0x00200000,
1278         0x2015c, 0xffffffff, 0x00000f40,
1279         0x88c4, 0x001f3ae3, 0x00000082,
1280         0x88d4, 0x0000001f, 0x00000010,
1281         0x30934, 0xffffffff, 0x00000000
1282 };
1283
1284 static const u32 kalindi_mgcg_cgcg_init[] =
1285 {
1286         0xc420, 0xffffffff, 0xfffffffc,
1287         0x30800, 0xffffffff, 0xe0000000,
1288         0x3c2a0, 0xffffffff, 0x00000100,
1289         0x3c208, 0xffffffff, 0x00000100,
1290         0x3c2c0, 0xffffffff, 0x00000100,
1291         0x3c2c8, 0xffffffff, 0x00000100,
1292         0x3c2c4, 0xffffffff, 0x00000100,
1293         0x55e4, 0xffffffff, 0x00600100,
1294         0x3c280, 0xffffffff, 0x00000100,
1295         0x3c214, 0xffffffff, 0x06000100,
1296         0x3c220, 0xffffffff, 0x00000100,
1297         0x3c218, 0xffffffff, 0x06000100,
1298         0x3c204, 0xffffffff, 0x00000100,
1299         0x3c2e0, 0xffffffff, 0x00000100,
1300         0x3c224, 0xffffffff, 0x00000100,
1301         0x3c200, 0xffffffff, 0x00000100,
1302         0x3c230, 0xffffffff, 0x00000100,
1303         0x3c234, 0xffffffff, 0x00000100,
1304         0x3c250, 0xffffffff, 0x00000100,
1305         0x3c254, 0xffffffff, 0x00000100,
1306         0x3c258, 0xffffffff, 0x00000100,
1307         0x3c25c, 0xffffffff, 0x00000100,
1308         0x3c260, 0xffffffff, 0x00000100,
1309         0x3c27c, 0xffffffff, 0x00000100,
1310         0x3c278, 0xffffffff, 0x00000100,
1311         0x3c210, 0xffffffff, 0x06000100,
1312         0x3c290, 0xffffffff, 0x00000100,
1313         0x3c274, 0xffffffff, 0x00000100,
1314         0x3c2b4, 0xffffffff, 0x00000100,
1315         0x3c2b0, 0xffffffff, 0x00000100,
1316         0x3c270, 0xffffffff, 0x00000100,
1317         0x30800, 0xffffffff, 0xe0000000,
1318         0x3c020, 0xffffffff, 0x00010000,
1319         0x3c024, 0xffffffff, 0x00030002,
1320         0x3c028, 0xffffffff, 0x00040007,
1321         0x3c02c, 0xffffffff, 0x00060005,
1322         0x3c030, 0xffffffff, 0x00090008,
1323         0x3c034, 0xffffffff, 0x00010000,
1324         0x3c038, 0xffffffff, 0x00030002,
1325         0x3c03c, 0xffffffff, 0x00040007,
1326         0x3c040, 0xffffffff, 0x00060005,
1327         0x3c044, 0xffffffff, 0x00090008,
1328         0x3c000, 0xffffffff, 0x96e00200,
1329         0x8708, 0xffffffff, 0x00900100,
1330         0xc424, 0xffffffff, 0x0020003f,
1331         0x38, 0xffffffff, 0x0140001c,
1332         0x3c, 0x000f0000, 0x000f0000,
1333         0x220, 0xffffffff, 0xC060000C,
1334         0x224, 0xc0000fff, 0x00000100,
1335         0x20a8, 0xffffffff, 0x00000104,
1336         0x55e4, 0xff000fff, 0x00000100,
1337         0x30cc, 0xc0000fff, 0x00000104,
1338         0xc1e4, 0x00000001, 0x00000001,
1339         0xd00c, 0xff000ff0, 0x00000100,
1340         0xd80c, 0xff000ff0, 0x00000100
1341 };
1342
1343 static const u32 hawaii_golden_spm_registers[] =
1344 {
1345         0x30800, 0xe0ffffff, 0xe0000000
1346 };
1347
1348 static const u32 hawaii_golden_common_registers[] =
1349 {
1350         0x30800, 0xffffffff, 0xe0000000,
1351         0x28350, 0xffffffff, 0x3a00161a,
1352         0x28354, 0xffffffff, 0x0000002e,
1353         0x9a10, 0xffffffff, 0x00018208,
1354         0x98f8, 0xffffffff, 0x12011003
1355 };
1356
1357 static const u32 hawaii_golden_registers[] =
1358 {
1359         0x3354, 0x00000333, 0x00000333,
1360         0x9a10, 0x00010000, 0x00058208,
1361         0x9830, 0xffffffff, 0x00000000,
1362         0x9834, 0xf00fffff, 0x00000400,
1363         0x9838, 0x0002021c, 0x00020200,
1364         0xc78, 0x00000080, 0x00000000,
1365         0x5bb0, 0x000000f0, 0x00000070,
1366         0x5bc0, 0xf0311fff, 0x80300000,
1367         0x350c, 0x00810000, 0x408af000,
1368         0x7030, 0x31000111, 0x00000011,
1369         0x2f48, 0x73773777, 0x12010001,
1370         0x2120, 0x0000007f, 0x0000001b,
1371         0x21dc, 0x00007fb6, 0x00002191,
1372         0x3628, 0x0000003f, 0x0000000a,
1373         0x362c, 0x0000003f, 0x0000000a,
1374         0x2ae4, 0x00073ffe, 0x000022a2,
1375         0x240c, 0x000007ff, 0x00000000,
1376         0x8bf0, 0x00002001, 0x00000001,
1377         0x8b24, 0xffffffff, 0x00ffffff,
1378         0x30a04, 0x0000ff0f, 0x00000000,
1379         0x28a4c, 0x07ffffff, 0x06000000,
1380         0x3e78, 0x00000001, 0x00000002,
1381         0xc768, 0x00000008, 0x00000008,
1382         0xc770, 0x00000f00, 0x00000800,
1383         0xc774, 0x00000f00, 0x00000800,
1384         0xc798, 0x00ffffff, 0x00ff7fbf,
1385         0xc79c, 0x00ffffff, 0x00ff7faf,
1386         0x8c00, 0x000000ff, 0x00000800,
1387         0xe40, 0x00001fff, 0x00001fff,
1388         0x9060, 0x0000007f, 0x00000020,
1389         0x9508, 0x00010000, 0x00010000,
1390         0xae00, 0x00100000, 0x000ff07c,
1391         0xac14, 0x000003ff, 0x0000000f,
1392         0xac10, 0xffffffff, 0x7564fdec,
1393         0xac0c, 0xffffffff, 0x3120b9a8,
1394         0xac08, 0x20000000, 0x0f9c0000
1395 };
1396
1397 static const u32 hawaii_mgcg_cgcg_init[] =
1398 {
1399         0xc420, 0xffffffff, 0xfffffffd,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c2a0, 0xffffffff, 0x00000100,
1402         0x3c208, 0xffffffff, 0x00000100,
1403         0x3c2c0, 0xffffffff, 0x00000100,
1404         0x3c2c8, 0xffffffff, 0x00000100,
1405         0x3c2c4, 0xffffffff, 0x00000100,
1406         0x55e4, 0xffffffff, 0x00200100,
1407         0x3c280, 0xffffffff, 0x00000100,
1408         0x3c214, 0xffffffff, 0x06000100,
1409         0x3c220, 0xffffffff, 0x00000100,
1410         0x3c218, 0xffffffff, 0x06000100,
1411         0x3c204, 0xffffffff, 0x00000100,
1412         0x3c2e0, 0xffffffff, 0x00000100,
1413         0x3c224, 0xffffffff, 0x00000100,
1414         0x3c200, 0xffffffff, 0x00000100,
1415         0x3c230, 0xffffffff, 0x00000100,
1416         0x3c234, 0xffffffff, 0x00000100,
1417         0x3c250, 0xffffffff, 0x00000100,
1418         0x3c254, 0xffffffff, 0x00000100,
1419         0x3c258, 0xffffffff, 0x00000100,
1420         0x3c25c, 0xffffffff, 0x00000100,
1421         0x3c260, 0xffffffff, 0x00000100,
1422         0x3c27c, 0xffffffff, 0x00000100,
1423         0x3c278, 0xffffffff, 0x00000100,
1424         0x3c210, 0xffffffff, 0x06000100,
1425         0x3c290, 0xffffffff, 0x00000100,
1426         0x3c274, 0xffffffff, 0x00000100,
1427         0x3c2b4, 0xffffffff, 0x00000100,
1428         0x3c2b0, 0xffffffff, 0x00000100,
1429         0x3c270, 0xffffffff, 0x00000100,
1430         0x30800, 0xffffffff, 0xe0000000,
1431         0x3c020, 0xffffffff, 0x00010000,
1432         0x3c024, 0xffffffff, 0x00030002,
1433         0x3c028, 0xffffffff, 0x00040007,
1434         0x3c02c, 0xffffffff, 0x00060005,
1435         0x3c030, 0xffffffff, 0x00090008,
1436         0x3c034, 0xffffffff, 0x00010000,
1437         0x3c038, 0xffffffff, 0x00030002,
1438         0x3c03c, 0xffffffff, 0x00040007,
1439         0x3c040, 0xffffffff, 0x00060005,
1440         0x3c044, 0xffffffff, 0x00090008,
1441         0x3c048, 0xffffffff, 0x00010000,
1442         0x3c04c, 0xffffffff, 0x00030002,
1443         0x3c050, 0xffffffff, 0x00040007,
1444         0x3c054, 0xffffffff, 0x00060005,
1445         0x3c058, 0xffffffff, 0x00090008,
1446         0x3c05c, 0xffffffff, 0x00010000,
1447         0x3c060, 0xffffffff, 0x00030002,
1448         0x3c064, 0xffffffff, 0x00040007,
1449         0x3c068, 0xffffffff, 0x00060005,
1450         0x3c06c, 0xffffffff, 0x00090008,
1451         0x3c070, 0xffffffff, 0x00010000,
1452         0x3c074, 0xffffffff, 0x00030002,
1453         0x3c078, 0xffffffff, 0x00040007,
1454         0x3c07c, 0xffffffff, 0x00060005,
1455         0x3c080, 0xffffffff, 0x00090008,
1456         0x3c084, 0xffffffff, 0x00010000,
1457         0x3c088, 0xffffffff, 0x00030002,
1458         0x3c08c, 0xffffffff, 0x00040007,
1459         0x3c090, 0xffffffff, 0x00060005,
1460         0x3c094, 0xffffffff, 0x00090008,
1461         0x3c098, 0xffffffff, 0x00010000,
1462         0x3c09c, 0xffffffff, 0x00030002,
1463         0x3c0a0, 0xffffffff, 0x00040007,
1464         0x3c0a4, 0xffffffff, 0x00060005,
1465         0x3c0a8, 0xffffffff, 0x00090008,
1466         0x3c0ac, 0xffffffff, 0x00010000,
1467         0x3c0b0, 0xffffffff, 0x00030002,
1468         0x3c0b4, 0xffffffff, 0x00040007,
1469         0x3c0b8, 0xffffffff, 0x00060005,
1470         0x3c0bc, 0xffffffff, 0x00090008,
1471         0x3c0c0, 0xffffffff, 0x00010000,
1472         0x3c0c4, 0xffffffff, 0x00030002,
1473         0x3c0c8, 0xffffffff, 0x00040007,
1474         0x3c0cc, 0xffffffff, 0x00060005,
1475         0x3c0d0, 0xffffffff, 0x00090008,
1476         0x3c0d4, 0xffffffff, 0x00010000,
1477         0x3c0d8, 0xffffffff, 0x00030002,
1478         0x3c0dc, 0xffffffff, 0x00040007,
1479         0x3c0e0, 0xffffffff, 0x00060005,
1480         0x3c0e4, 0xffffffff, 0x00090008,
1481         0x3c0e8, 0xffffffff, 0x00010000,
1482         0x3c0ec, 0xffffffff, 0x00030002,
1483         0x3c0f0, 0xffffffff, 0x00040007,
1484         0x3c0f4, 0xffffffff, 0x00060005,
1485         0x3c0f8, 0xffffffff, 0x00090008,
1486         0xc318, 0xffffffff, 0x00020200,
1487         0x3350, 0xffffffff, 0x00000200,
1488         0x15c0, 0xffffffff, 0x00000400,
1489         0x55e8, 0xffffffff, 0x00000000,
1490         0x2f50, 0xffffffff, 0x00000902,
1491         0x3c000, 0xffffffff, 0x96940200,
1492         0x8708, 0xffffffff, 0x00900100,
1493         0xc424, 0xffffffff, 0x0020003f,
1494         0x38, 0xffffffff, 0x0140001c,
1495         0x3c, 0x000f0000, 0x000f0000,
1496         0x220, 0xffffffff, 0xc060000c,
1497         0x224, 0xc0000fff, 0x00000100,
1498         0xf90, 0xffffffff, 0x00000100,
1499         0xf98, 0x00000101, 0x00000000,
1500         0x20a8, 0xffffffff, 0x00000104,
1501         0x55e4, 0xff000fff, 0x00000100,
1502         0x30cc, 0xc0000fff, 0x00000104,
1503         0xc1e4, 0x00000001, 0x00000001,
1504         0xd00c, 0xff000ff0, 0x00000100,
1505         0xd80c, 0xff000ff0, 0x00000100
1506 };
1507
1508 static const u32 godavari_golden_registers[] =
1509 {
1510         0x55e4, 0xff607fff, 0xfc000100,
1511         0x6ed8, 0x00010101, 0x00010000,
1512         0x9830, 0xffffffff, 0x00000000,
1513         0x98302, 0xf00fffff, 0x00000400,
1514         0x6130, 0xffffffff, 0x00010000,
1515         0x5bb0, 0x000000f0, 0x00000070,
1516         0x5bc0, 0xf0311fff, 0x80300000,
1517         0x98f8, 0x73773777, 0x12010001,
1518         0x98fc, 0xffffffff, 0x00000010,
1519         0x8030, 0x00001f0f, 0x0000100a,
1520         0x2f48, 0x73773777, 0x12010001,
1521         0x2408, 0x000fffff, 0x000c007f,
1522         0x8a14, 0xf000003f, 0x00000007,
1523         0x8b24, 0xffffffff, 0x00ff0fff,
1524         0x30a04, 0x0000ff0f, 0x00000000,
1525         0x28a4c, 0x07ffffff, 0x06000000,
1526         0x4d8, 0x00000fff, 0x00000100,
1527         0xd014, 0x00010000, 0x00810001,
1528         0xd814, 0x00010000, 0x00810001,
1529         0x3e78, 0x00000001, 0x00000002,
1530         0xc768, 0x00000008, 0x00000008,
1531         0xc770, 0x00000f00, 0x00000800,
1532         0xc774, 0x00000f00, 0x00000800,
1533         0xc798, 0x00ffffff, 0x00ff7fbf,
1534         0xc79c, 0x00ffffff, 0x00ff7faf,
1535         0x8c00, 0x000000ff, 0x00000001,
1536         0x214f8, 0x01ff01ff, 0x00000002,
1537         0x21498, 0x007ff800, 0x00200000,
1538         0x2015c, 0xffffffff, 0x00000f40,
1539         0x88c4, 0x001f3ae3, 0x00000082,
1540         0x88d4, 0x0000001f, 0x00000010,
1541         0x30934, 0xffffffff, 0x00000000
1542 };
1543
1544
1545 static void cik_init_golden_registers(struct radeon_device *rdev)
1546 {
1547         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1548         mutex_lock(&rdev->grbm_idx_mutex);
1549         switch (rdev->family) {
1550         case CHIP_BONAIRE:
1551                 radeon_program_register_sequence(rdev,
1552                                                  bonaire_mgcg_cgcg_init,
1553                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554                 radeon_program_register_sequence(rdev,
1555                                                  bonaire_golden_registers,
1556                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557                 radeon_program_register_sequence(rdev,
1558                                                  bonaire_golden_common_registers,
1559                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560                 radeon_program_register_sequence(rdev,
1561                                                  bonaire_golden_spm_registers,
1562                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1563                 break;
1564         case CHIP_KABINI:
1565                 radeon_program_register_sequence(rdev,
1566                                                  kalindi_mgcg_cgcg_init,
1567                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568                 radeon_program_register_sequence(rdev,
1569                                                  kalindi_golden_registers,
1570                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571                 radeon_program_register_sequence(rdev,
1572                                                  kalindi_golden_common_registers,
1573                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574                 radeon_program_register_sequence(rdev,
1575                                                  kalindi_golden_spm_registers,
1576                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1577                 break;
1578         case CHIP_MULLINS:
1579                 radeon_program_register_sequence(rdev,
1580                                                  kalindi_mgcg_cgcg_init,
1581                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582                 radeon_program_register_sequence(rdev,
1583                                                  godavari_golden_registers,
1584                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_golden_common_registers,
1587                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_spm_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1591                 break;
1592         case CHIP_KAVERI:
1593                 radeon_program_register_sequence(rdev,
1594                                                  spectre_mgcg_cgcg_init,
1595                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596                 radeon_program_register_sequence(rdev,
1597                                                  spectre_golden_registers,
1598                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1599                 radeon_program_register_sequence(rdev,
1600                                                  spectre_golden_common_registers,
1601                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602                 radeon_program_register_sequence(rdev,
1603                                                  spectre_golden_spm_registers,
1604                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1605                 break;
1606         case CHIP_HAWAII:
1607                 radeon_program_register_sequence(rdev,
1608                                                  hawaii_mgcg_cgcg_init,
1609                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610                 radeon_program_register_sequence(rdev,
1611                                                  hawaii_golden_registers,
1612                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613                 radeon_program_register_sequence(rdev,
1614                                                  hawaii_golden_common_registers,
1615                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616                 radeon_program_register_sequence(rdev,
1617                                                  hawaii_golden_spm_registers,
1618                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1619                 break;
1620         default:
1621                 break;
1622         }
1623         mutex_unlock(&rdev->grbm_idx_mutex);
1624 }
1625
1626 /**
1627  * cik_get_xclk - get the xclk
1628  *
1629  * @rdev: radeon_device pointer
1630  *
1631  * Returns the reference clock used by the gfx engine
1632  * (CIK).
1633  */
1634 u32 cik_get_xclk(struct radeon_device *rdev)
1635 {
1636         u32 reference_clock = rdev->clock.spll.reference_freq;
1637
1638         if (rdev->flags & RADEON_IS_IGP) {
1639                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1640                         return reference_clock / 2;
1641         } else {
1642                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1643                         return reference_clock / 4;
1644         }
1645         return reference_clock;
1646 }
1647
1648 /**
1649  * cik_mm_rdoorbell - read a doorbell dword
1650  *
1651  * @rdev: radeon_device pointer
1652  * @index: doorbell index
1653  *
1654  * Returns the value in the doorbell aperture at the
1655  * requested doorbell index (CIK).
1656  */
1657 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1658 {
1659         if (index < rdev->doorbell.num_doorbells) {
1660                 return readl(rdev->doorbell.ptr + index);
1661         } else {
1662                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1663                 return 0;
1664         }
1665 }
1666
1667 /**
1668  * cik_mm_wdoorbell - write a doorbell dword
1669  *
1670  * @rdev: radeon_device pointer
1671  * @index: doorbell index
1672  * @v: value to write
1673  *
1674  * Writes @v to the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 writel(v, rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1683         }
1684 }
1685
1686 #define BONAIRE_IO_MC_REGS_SIZE 36
1687
1688 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1689 {
1690         {0x00000070, 0x04400000},
1691         {0x00000071, 0x80c01803},
1692         {0x00000072, 0x00004004},
1693         {0x00000073, 0x00000100},
1694         {0x00000074, 0x00ff0000},
1695         {0x00000075, 0x34000000},
1696         {0x00000076, 0x08000014},
1697         {0x00000077, 0x00cc08ec},
1698         {0x00000078, 0x00000400},
1699         {0x00000079, 0x00000000},
1700         {0x0000007a, 0x04090000},
1701         {0x0000007c, 0x00000000},
1702         {0x0000007e, 0x4408a8e8},
1703         {0x0000007f, 0x00000304},
1704         {0x00000080, 0x00000000},
1705         {0x00000082, 0x00000001},
1706         {0x00000083, 0x00000002},
1707         {0x00000084, 0xf3e4f400},
1708         {0x00000085, 0x052024e3},
1709         {0x00000087, 0x00000000},
1710         {0x00000088, 0x01000000},
1711         {0x0000008a, 0x1c0a0000},
1712         {0x0000008b, 0xff010000},
1713         {0x0000008d, 0xffffefff},
1714         {0x0000008e, 0xfff3efff},
1715         {0x0000008f, 0xfff3efbf},
1716         {0x00000092, 0xf7ffffff},
1717         {0x00000093, 0xffffff7f},
1718         {0x00000095, 0x00101101},
1719         {0x00000096, 0x00000fff},
1720         {0x00000097, 0x00116fff},
1721         {0x00000098, 0x60010000},
1722         {0x00000099, 0x10010000},
1723         {0x0000009a, 0x00006000},
1724         {0x0000009b, 0x00001000},
1725         {0x0000009f, 0x00b48000}
1726 };
1727
1728 #define HAWAII_IO_MC_REGS_SIZE 22
1729
1730 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1731 {
1732         {0x0000007d, 0x40000000},
1733         {0x0000007e, 0x40180304},
1734         {0x0000007f, 0x0000ff00},
1735         {0x00000081, 0x00000000},
1736         {0x00000083, 0x00000800},
1737         {0x00000086, 0x00000000},
1738         {0x00000087, 0x00000100},
1739         {0x00000088, 0x00020100},
1740         {0x00000089, 0x00000000},
1741         {0x0000008b, 0x00040000},
1742         {0x0000008c, 0x00000100},
1743         {0x0000008e, 0xff010000},
1744         {0x00000090, 0xffffefff},
1745         {0x00000091, 0xfff3efff},
1746         {0x00000092, 0xfff3efbf},
1747         {0x00000093, 0xf7ffffff},
1748         {0x00000094, 0xffffff7f},
1749         {0x00000095, 0x00000fff},
1750         {0x00000096, 0x00116fff},
1751         {0x00000097, 0x60010000},
1752         {0x00000098, 0x10010000},
1753         {0x0000009f, 0x00c79000}
1754 };
1755
1756
1757 /**
1758  * cik_srbm_select - select specific register instances
1759  *
1760  * @rdev: radeon_device pointer
1761  * @me: selected ME (micro engine)
1762  * @pipe: pipe
1763  * @queue: queue
1764  * @vmid: VMID
1765  *
1766  * Switches the currently active registers instances.  Some
1767  * registers are instanced per VMID, others are instanced per
1768  * me/pipe/queue combination.
1769  */
1770 static void cik_srbm_select(struct radeon_device *rdev,
1771                             u32 me, u32 pipe, u32 queue, u32 vmid)
1772 {
1773         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1774                              MEID(me & 0x3) |
1775                              VMID(vmid & 0xf) |
1776                              QUEUEID(queue & 0x7));
1777         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1778 }
1779
1780 /* ucode loading */
1781 /**
1782  * ci_mc_load_microcode - load MC ucode into the hw
1783  *
1784  * @rdev: radeon_device pointer
1785  *
1786  * Load the GDDR MC ucode into the hw (CIK).
1787  * Returns 0 on success, error on failure.
1788  */
1789 int ci_mc_load_microcode(struct radeon_device *rdev)
1790 {
1791         const __be32 *fw_data = NULL;
1792         const __le32 *new_fw_data = NULL;
1793         u32 running, tmp;
1794         u32 *io_mc_regs = NULL;
1795         const __le32 *new_io_mc_regs = NULL;
1796         int i, regs_size, ucode_size;
1797
1798         if (!rdev->mc_fw)
1799                 return -EINVAL;
1800
1801         if (rdev->new_fw) {
1802                 const struct mc_firmware_header_v1_0 *hdr =
1803                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1804
1805                 radeon_ucode_print_mc_hdr(&hdr->header);
1806
1807                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1808                 new_io_mc_regs = (const __le32 *)
1809                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1810                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1811                 new_fw_data = (const __le32 *)
1812                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1813         } else {
1814                 ucode_size = rdev->mc_fw->size / 4;
1815
1816                 switch (rdev->family) {
1817                 case CHIP_BONAIRE:
1818                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1819                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1820                         break;
1821                 case CHIP_HAWAII:
1822                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1823                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1824                         break;
1825                 default:
1826                         return -EINVAL;
1827                 }
1828                 fw_data = (const __be32 *)rdev->mc_fw->data;
1829         }
1830
1831         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1832
1833         if (running == 0) {
1834                 /* reset the engine and set to writable */
1835                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1836                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1837
1838                 /* load mc io regs */
1839                 for (i = 0; i < regs_size; i++) {
1840                         if (rdev->new_fw) {
1841                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1842                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1843                         } else {
1844                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1845                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1846                         }
1847                 }
1848
1849                 tmp = RREG32(MC_SEQ_MISC0);
1850                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1851                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1852                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1853                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1854                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1855                 }
1856
1857                 /* load the MC ucode */
1858                 for (i = 0; i < ucode_size; i++) {
1859                         if (rdev->new_fw)
1860                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1861                         else
1862                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1863                 }
1864
1865                 /* put the engine back into the active state */
1866                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1867                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1868                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1869
1870                 /* wait for training to complete */
1871                 for (i = 0; i < rdev->usec_timeout; i++) {
1872                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1873                                 break;
1874                         udelay(1);
1875                 }
1876                 for (i = 0; i < rdev->usec_timeout; i++) {
1877                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1878                                 break;
1879                         udelay(1);
1880                 }
1881         }
1882
1883         return 0;
1884 }
1885
1886 /**
1887  * cik_init_microcode - load ucode images from disk
1888  *
1889  * @rdev: radeon_device pointer
1890  *
1891  * Use the firmware interface to load the ucode images into
1892  * the driver (not loaded into hw).
1893  * Returns 0 on success, error on failure.
1894  */
1895 static int cik_init_microcode(struct radeon_device *rdev)
1896 {
1897         const char *chip_name;
1898         const char *new_chip_name;
1899         size_t pfp_req_size, me_req_size, ce_req_size,
1900                 mec_req_size, rlc_req_size, mc_req_size = 0,
1901                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1902         char fw_name[30];
1903         int new_fw = 0;
1904         int err;
1905         int num_fw;
1906         bool new_smc = false;
1907
1908         DRM_DEBUG("\n");
1909
1910         switch (rdev->family) {
1911         case CHIP_BONAIRE:
1912                 chip_name = "BONAIRE";
1913                 if ((rdev->pdev->revision == 0x80) ||
1914                     (rdev->pdev->revision == 0x81) ||
1915                     (rdev->pdev->device == 0x665f))
1916                         new_smc = true;
1917                 new_chip_name = "bonaire";
1918                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1919                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1920                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1921                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1922                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1923                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1924                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1925                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1926                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1927                 num_fw = 8;
1928                 break;
1929         case CHIP_HAWAII:
1930                 chip_name = "HAWAII";
1931                 if (rdev->pdev->revision == 0x80)
1932                         new_smc = true;
1933                 new_chip_name = "hawaii";
1934                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1935                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1936                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1937                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1938                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1939                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1940                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1941                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1942                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1943                 num_fw = 8;
1944                 break;
1945         case CHIP_KAVERI:
1946                 chip_name = "KAVERI";
1947                 new_chip_name = "kaveri";
1948                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1949                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1950                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1951                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1952                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1953                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1954                 num_fw = 7;
1955                 break;
1956         case CHIP_KABINI:
1957                 chip_name = "KABINI";
1958                 new_chip_name = "kabini";
1959                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1960                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1961                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1962                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1963                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1964                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1965                 num_fw = 6;
1966                 break;
1967         case CHIP_MULLINS:
1968                 chip_name = "MULLINS";
1969                 new_chip_name = "mullins";
1970                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1972                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1975                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1976                 num_fw = 6;
1977                 break;
1978         default: BUG();
1979         }
1980
1981         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1982
1983         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1984         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1985         if (err) {
1986                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1987                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1988                 if (err)
1989                         goto out;
1990                 if (rdev->pfp_fw->size != pfp_req_size) {
1991                         printk(KERN_ERR
1992                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1993                                rdev->pfp_fw->size, fw_name);
1994                         err = -EINVAL;
1995                         goto out;
1996                 }
1997         } else {
1998                 err = radeon_ucode_validate(rdev->pfp_fw);
1999                 if (err) {
2000                         printk(KERN_ERR
2001                                "cik_fw: validation failed for firmware \"%s\"\n",
2002                                fw_name);
2003                         goto out;
2004                 } else {
2005                         new_fw++;
2006                 }
2007         }
2008
2009         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2010         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2011         if (err) {
2012                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2013                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2014                 if (err)
2015                         goto out;
2016                 if (rdev->me_fw->size != me_req_size) {
2017                         printk(KERN_ERR
2018                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2019                                rdev->me_fw->size, fw_name);
2020                         err = -EINVAL;
2021                 }
2022         } else {
2023                 err = radeon_ucode_validate(rdev->me_fw);
2024                 if (err) {
2025                         printk(KERN_ERR
2026                                "cik_fw: validation failed for firmware \"%s\"\n",
2027                                fw_name);
2028                         goto out;
2029                 } else {
2030                         new_fw++;
2031                 }
2032         }
2033
2034         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2035         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2036         if (err) {
2037                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2038                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2039                 if (err)
2040                         goto out;
2041                 if (rdev->ce_fw->size != ce_req_size) {
2042                         printk(KERN_ERR
2043                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2044                                rdev->ce_fw->size, fw_name);
2045                         err = -EINVAL;
2046                 }
2047         } else {
2048                 err = radeon_ucode_validate(rdev->ce_fw);
2049                 if (err) {
2050                         printk(KERN_ERR
2051                                "cik_fw: validation failed for firmware \"%s\"\n",
2052                                fw_name);
2053                         goto out;
2054                 } else {
2055                         new_fw++;
2056                 }
2057         }
2058
2059         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2060         err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2061         if (err) {
2062                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2063                 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2064                 if (err)
2065                         goto out;
2066                 if (rdev->mec_fw->size != mec_req_size) {
2067                         printk(KERN_ERR
2068                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2069                                rdev->mec_fw->size, fw_name);
2070                         err = -EINVAL;
2071                 }
2072         } else {
2073                 err = radeon_ucode_validate(rdev->mec_fw);
2074                 if (err) {
2075                         printk(KERN_ERR
2076                                "cik_fw: validation failed for firmware \"%s\"\n",
2077                                fw_name);
2078                         goto out;
2079                 } else {
2080                         new_fw++;
2081                 }
2082         }
2083
2084         if (rdev->family == CHIP_KAVERI) {
2085                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2086                 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2087                 if (err) {
2088                         goto out;
2089                 } else {
2090                         err = radeon_ucode_validate(rdev->mec2_fw);
2091                         if (err) {
2092                                 goto out;
2093                         } else {
2094                                 new_fw++;
2095                         }
2096                 }
2097         }
2098
2099         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2100         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2101         if (err) {
2102                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2103                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2104                 if (err)
2105                         goto out;
2106                 if (rdev->rlc_fw->size != rlc_req_size) {
2107                         printk(KERN_ERR
2108                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2109                                rdev->rlc_fw->size, fw_name);
2110                         err = -EINVAL;
2111                 }
2112         } else {
2113                 err = radeon_ucode_validate(rdev->rlc_fw);
2114                 if (err) {
2115                         printk(KERN_ERR
2116                                "cik_fw: validation failed for firmware \"%s\"\n",
2117                                fw_name);
2118                         goto out;
2119                 } else {
2120                         new_fw++;
2121                 }
2122         }
2123
2124         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2125         err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2126         if (err) {
2127                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2128                 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2129                 if (err)
2130                         goto out;
2131                 if (rdev->sdma_fw->size != sdma_req_size) {
2132                         printk(KERN_ERR
2133                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2134                                rdev->sdma_fw->size, fw_name);
2135                         err = -EINVAL;
2136                 }
2137         } else {
2138                 err = radeon_ucode_validate(rdev->sdma_fw);
2139                 if (err) {
2140                         printk(KERN_ERR
2141                                "cik_fw: validation failed for firmware \"%s\"\n",
2142                                fw_name);
2143                         goto out;
2144                 } else {
2145                         new_fw++;
2146                 }
2147         }
2148
2149         /* No SMC, MC ucode on APUs */
2150         if (!(rdev->flags & RADEON_IS_IGP)) {
2151                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2152                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2153                 if (err) {
2154                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2155                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2156                         if (err) {
2157                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2158                                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2159                                 if (err)
2160                                         goto out;
2161                         }
2162                         if ((rdev->mc_fw->size != mc_req_size) &&
2163                             (rdev->mc_fw->size != mc2_req_size)){
2164                                 printk(KERN_ERR
2165                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2166                                        rdev->mc_fw->size, fw_name);
2167                                 err = -EINVAL;
2168                         }
2169                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2170                 } else {
2171                         err = radeon_ucode_validate(rdev->mc_fw);
2172                         if (err) {
2173                                 printk(KERN_ERR
2174                                        "cik_fw: validation failed for firmware \"%s\"\n",
2175                                        fw_name);
2176                                 goto out;
2177                         } else {
2178                                 new_fw++;
2179                         }
2180                 }
2181
2182                 if (new_smc)
2183                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2184                 else
2185                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2186                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2187                 if (err) {
2188                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2189                         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2190                         if (err) {
2191                                 printk(KERN_ERR
2192                                        "smc: error loading firmware \"%s\"\n",
2193                                        fw_name);
2194                                 release_firmware(rdev->smc_fw);
2195                                 rdev->smc_fw = NULL;
2196                                 err = 0;
2197                         } else if (rdev->smc_fw->size != smc_req_size) {
2198                                 printk(KERN_ERR
2199                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2200                                        rdev->smc_fw->size, fw_name);
2201                                 err = -EINVAL;
2202                         }
2203                 } else {
2204                         err = radeon_ucode_validate(rdev->smc_fw);
2205                         if (err) {
2206                                 printk(KERN_ERR
2207                                        "cik_fw: validation failed for firmware \"%s\"\n",
2208                                        fw_name);
2209                                 goto out;
2210                         } else {
2211                                 new_fw++;
2212                         }
2213                 }
2214         }
2215
2216         if (new_fw == 0) {
2217                 rdev->new_fw = false;
2218         } else if (new_fw < num_fw) {
2219                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2220                 err = -EINVAL;
2221         } else {
2222                 rdev->new_fw = true;
2223         }
2224
2225 out:
2226         if (err) {
2227                 if (err != -EINVAL)
2228                         printk(KERN_ERR
2229                                "cik_cp: Failed to load firmware \"%s\"\n",
2230                                fw_name);
2231                 release_firmware(rdev->pfp_fw);
2232                 rdev->pfp_fw = NULL;
2233                 release_firmware(rdev->me_fw);
2234                 rdev->me_fw = NULL;
2235                 release_firmware(rdev->ce_fw);
2236                 rdev->ce_fw = NULL;
2237                 release_firmware(rdev->mec_fw);
2238                 rdev->mec_fw = NULL;
2239                 release_firmware(rdev->mec2_fw);
2240                 rdev->mec2_fw = NULL;
2241                 release_firmware(rdev->rlc_fw);
2242                 rdev->rlc_fw = NULL;
2243                 release_firmware(rdev->sdma_fw);
2244                 rdev->sdma_fw = NULL;
2245                 release_firmware(rdev->mc_fw);
2246                 rdev->mc_fw = NULL;
2247                 release_firmware(rdev->smc_fw);
2248                 rdev->smc_fw = NULL;
2249         }
2250         return err;
2251 }
2252
2253 /*
2254  * Core functions
2255  */
2256 /**
2257  * cik_tiling_mode_table_init - init the hw tiling table
2258  *
2259  * @rdev: radeon_device pointer
2260  *
2261  * Starting with SI, the tiling setup is done globally in a
2262  * set of 32 tiling modes.  Rather than selecting each set of
2263  * parameters per surface as on older asics, we just select
2264  * which index in the tiling table we want to use, and the
2265  * surface uses those parameters (CIK).
2266  */
2267 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2268 {
2269         u32 *tile = rdev->config.cik.tile_mode_array;
2270         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2271         const u32 num_tile_mode_states =
2272                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2273         const u32 num_secondary_tile_mode_states =
2274                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2275         u32 reg_offset, split_equal_to_row_size;
2276         u32 num_pipe_configs;
2277         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2278                 rdev->config.cik.max_shader_engines;
2279
2280         switch (rdev->config.cik.mem_row_size_in_kb) {
2281         case 1:
2282                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2283                 break;
2284         case 2:
2285         default:
2286                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2287                 break;
2288         case 4:
2289                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2290                 break;
2291         }
2292
2293         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2294         if (num_pipe_configs > 8)
2295                 num_pipe_configs = 16;
2296
2297         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2298                 tile[reg_offset] = 0;
2299         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300                 macrotile[reg_offset] = 0;
2301
2302         switch(num_pipe_configs) {
2303         case 16:
2304                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2306                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2308                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2320                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                            TILE_SPLIT(split_equal_to_row_size));
2324                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2328                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2332                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2333                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                            TILE_SPLIT(split_equal_to_row_size));
2335                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2336                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2337                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2338                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2340                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2347                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2349                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2355                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2362                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2370                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2372                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2375                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2376                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2377                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2379                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2380                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382
2383                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                            NUM_BANKS(ADDR_SURF_16_BANK));
2387                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390                            NUM_BANKS(ADDR_SURF_16_BANK));
2391                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2393                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2394                            NUM_BANKS(ADDR_SURF_16_BANK));
2395                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2398                            NUM_BANKS(ADDR_SURF_16_BANK));
2399                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402                            NUM_BANKS(ADDR_SURF_8_BANK));
2403                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406                            NUM_BANKS(ADDR_SURF_4_BANK));
2407                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410                            NUM_BANKS(ADDR_SURF_2_BANK));
2411                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2413                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414                            NUM_BANKS(ADDR_SURF_16_BANK));
2415                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2417                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418                            NUM_BANKS(ADDR_SURF_16_BANK));
2419                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2421                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2422                             NUM_BANKS(ADDR_SURF_16_BANK));
2423                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2425                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2426                             NUM_BANKS(ADDR_SURF_8_BANK));
2427                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2429                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2430                             NUM_BANKS(ADDR_SURF_4_BANK));
2431                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2434                             NUM_BANKS(ADDR_SURF_2_BANK));
2435                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438                             NUM_BANKS(ADDR_SURF_2_BANK));
2439
2440                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2441                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2442                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2443                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2444                 break;
2445
2446         case 8:
2447                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2449                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2450                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2451                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2455                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2459                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2463                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466                            TILE_SPLIT(split_equal_to_row_size));
2467                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2468                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2471                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2472                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2474                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2475                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2476                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                            TILE_SPLIT(split_equal_to_row_size));
2478                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2479                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2480                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2483                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2485                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2489                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2490                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2492                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2493                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2498                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2504                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2507                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2508                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2513                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2515                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2519                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2520                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2522                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525
2526                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529                                 NUM_BANKS(ADDR_SURF_16_BANK));
2530                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533                                 NUM_BANKS(ADDR_SURF_16_BANK));
2534                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537                                 NUM_BANKS(ADDR_SURF_16_BANK));
2538                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541                                 NUM_BANKS(ADDR_SURF_16_BANK));
2542                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545                                 NUM_BANKS(ADDR_SURF_8_BANK));
2546                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549                                 NUM_BANKS(ADDR_SURF_4_BANK));
2550                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553                                 NUM_BANKS(ADDR_SURF_2_BANK));
2554                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2556                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2557                                 NUM_BANKS(ADDR_SURF_16_BANK));
2558                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2561                                 NUM_BANKS(ADDR_SURF_16_BANK));
2562                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2565                                 NUM_BANKS(ADDR_SURF_16_BANK));
2566                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2569                                 NUM_BANKS(ADDR_SURF_16_BANK));
2570                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573                                 NUM_BANKS(ADDR_SURF_8_BANK));
2574                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577                                 NUM_BANKS(ADDR_SURF_4_BANK));
2578                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2580                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2581                                 NUM_BANKS(ADDR_SURF_2_BANK));
2582
2583                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2584                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2585                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2586                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2587                 break;
2588
2589         case 4:
2590                 if (num_rbs == 4) {
2591                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2593                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2595                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2599                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2607                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610                            TILE_SPLIT(split_equal_to_row_size));
2611                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2616                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2618                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2619                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2620                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2621                            TILE_SPLIT(split_equal_to_row_size));
2622                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2623                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2624                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2625                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2627                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2636                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2642                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2644                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2651                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2655                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2657                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2658                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2659                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2663                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2667                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669
2670                 } else if (num_rbs < 4) {
2671                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2675                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2679                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2687                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690                            TILE_SPLIT(split_equal_to_row_size));
2691                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2692                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2693                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2694                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2695                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2696                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2698                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2699                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2700                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2701                            TILE_SPLIT(split_equal_to_row_size));
2702                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2703                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2704                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2707                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2709                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2722                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2724                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2737                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2738                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2739                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2743                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2746                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2747                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749                 }
2750
2751                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754                                 NUM_BANKS(ADDR_SURF_16_BANK));
2755                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2762                                 NUM_BANKS(ADDR_SURF_16_BANK));
2763                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766                                 NUM_BANKS(ADDR_SURF_16_BANK));
2767                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770                                 NUM_BANKS(ADDR_SURF_16_BANK));
2771                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774                                 NUM_BANKS(ADDR_SURF_8_BANK));
2775                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2778                                 NUM_BANKS(ADDR_SURF_4_BANK));
2779                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2780                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2781                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2782                                 NUM_BANKS(ADDR_SURF_16_BANK));
2783                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2784                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786                                 NUM_BANKS(ADDR_SURF_16_BANK));
2787                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2789                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2790                                 NUM_BANKS(ADDR_SURF_16_BANK));
2791                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794                                 NUM_BANKS(ADDR_SURF_16_BANK));
2795                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798                                 NUM_BANKS(ADDR_SURF_16_BANK));
2799                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2802                                 NUM_BANKS(ADDR_SURF_8_BANK));
2803                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2806                                 NUM_BANKS(ADDR_SURF_4_BANK));
2807
2808                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2809                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2810                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2811                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2812                 break;
2813
2814         case 2:
2815                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2817                            PIPE_CONFIG(ADDR_SURF_P2) |
2818                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2819                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821                            PIPE_CONFIG(ADDR_SURF_P2) |
2822                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2823                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825                            PIPE_CONFIG(ADDR_SURF_P2) |
2826                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                            PIPE_CONFIG(ADDR_SURF_P2) |
2830                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2831                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833                            PIPE_CONFIG(ADDR_SURF_P2) |
2834                            TILE_SPLIT(split_equal_to_row_size));
2835                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836                            PIPE_CONFIG(ADDR_SURF_P2) |
2837                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2839                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2840                            PIPE_CONFIG(ADDR_SURF_P2) |
2841                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2842                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2843                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2844                            PIPE_CONFIG(ADDR_SURF_P2) |
2845                            TILE_SPLIT(split_equal_to_row_size));
2846                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2847                            PIPE_CONFIG(ADDR_SURF_P2);
2848                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850                            PIPE_CONFIG(ADDR_SURF_P2));
2851                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2853                             PIPE_CONFIG(ADDR_SURF_P2) |
2854                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2857                             PIPE_CONFIG(ADDR_SURF_P2) |
2858                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2860                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2861                             PIPE_CONFIG(ADDR_SURF_P2) |
2862                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864                             PIPE_CONFIG(ADDR_SURF_P2) |
2865                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2866                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868                             PIPE_CONFIG(ADDR_SURF_P2) |
2869                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2872                             PIPE_CONFIG(ADDR_SURF_P2) |
2873                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2875                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2876                             PIPE_CONFIG(ADDR_SURF_P2) |
2877                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2880                             PIPE_CONFIG(ADDR_SURF_P2));
2881                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                             PIPE_CONFIG(ADDR_SURF_P2) |
2884                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2886                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2887                             PIPE_CONFIG(ADDR_SURF_P2) |
2888                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2889                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891                             PIPE_CONFIG(ADDR_SURF_P2) |
2892                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893
2894                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2895                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2896                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2897                                 NUM_BANKS(ADDR_SURF_16_BANK));
2898                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2899                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2900                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2901                                 NUM_BANKS(ADDR_SURF_16_BANK));
2902                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2904                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2905                                 NUM_BANKS(ADDR_SURF_16_BANK));
2906                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913                                 NUM_BANKS(ADDR_SURF_16_BANK));
2914                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                 NUM_BANKS(ADDR_SURF_16_BANK));
2918                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2921                                 NUM_BANKS(ADDR_SURF_8_BANK));
2922                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2923                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2924                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2925                                 NUM_BANKS(ADDR_SURF_16_BANK));
2926                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                 NUM_BANKS(ADDR_SURF_16_BANK));
2934                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2935                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2937                                 NUM_BANKS(ADDR_SURF_16_BANK));
2938                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2944                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945                                 NUM_BANKS(ADDR_SURF_16_BANK));
2946                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949                                 NUM_BANKS(ADDR_SURF_8_BANK));
2950
2951                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2952                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2953                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2955                 break;
2956
2957         default:
2958                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2959         }
2960 }
2961
2962 /**
2963  * cik_select_se_sh - select which SE, SH to address
2964  *
2965  * @rdev: radeon_device pointer
2966  * @se_num: shader engine to address
2967  * @sh_num: sh block to address
2968  *
2969  * Select which SE, SH combinations to address. Certain
2970  * registers are instanced per SE or SH.  0xffffffff means
2971  * broadcast to all SEs or SHs (CIK).
2972  */
2973 static void cik_select_se_sh(struct radeon_device *rdev,
2974                              u32 se_num, u32 sh_num)
2975 {
2976         u32 data = INSTANCE_BROADCAST_WRITES;
2977
2978         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2979                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2980         else if (se_num == 0xffffffff)
2981                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2982         else if (sh_num == 0xffffffff)
2983                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2984         else
2985                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2986         WREG32(GRBM_GFX_INDEX, data);
2987 }
2988
2989 /**
2990  * cik_create_bitmask - create a bitmask
2991  *
2992  * @bit_width: length of the mask
2993  *
2994  * create a variable length bit mask (CIK).
2995  * Returns the bitmask.
2996  */
2997 static u32 cik_create_bitmask(u32 bit_width)
2998 {
2999         u32 i, mask = 0;
3000
3001         for (i = 0; i < bit_width; i++) {
3002                 mask <<= 1;
3003                 mask |= 1;
3004         }
3005         return mask;
3006 }
3007
3008 /**
3009  * cik_get_rb_disabled - computes the mask of disabled RBs
3010  *
3011  * @rdev: radeon_device pointer
3012  * @max_rb_num: max RBs (render backends) for the asic
3013  * @se_num: number of SEs (shader engines) for the asic
3014  * @sh_per_se: number of SH blocks per SE for the asic
3015  *
3016  * Calculates the bitmask of disabled RBs (CIK).
3017  * Returns the disabled RB bitmask.
3018  */
3019 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3020                               u32 max_rb_num_per_se,
3021                               u32 sh_per_se)
3022 {
3023         u32 data, mask;
3024
3025         data = RREG32(CC_RB_BACKEND_DISABLE);
3026         if (data & 1)
3027                 data &= BACKEND_DISABLE_MASK;
3028         else
3029                 data = 0;
3030         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3031
3032         data >>= BACKEND_DISABLE_SHIFT;
3033
3034         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3035
3036         return data & mask;
3037 }
3038
3039 /**
3040  * cik_setup_rb - setup the RBs on the asic
3041  *
3042  * @rdev: radeon_device pointer
3043  * @se_num: number of SEs (shader engines) for the asic
3044  * @sh_per_se: number of SH blocks per SE for the asic
3045  * @max_rb_num: max RBs (render backends) for the asic
3046  *
3047  * Configures per-SE/SH RB registers (CIK).
3048  */
3049 static void cik_setup_rb(struct radeon_device *rdev,
3050                          u32 se_num, u32 sh_per_se,
3051                          u32 max_rb_num_per_se)
3052 {
3053         int i, j;
3054         u32 data, mask;
3055         u32 disabled_rbs = 0;
3056         u32 enabled_rbs = 0;
3057
3058         mutex_lock(&rdev->grbm_idx_mutex);
3059         for (i = 0; i < se_num; i++) {
3060                 for (j = 0; j < sh_per_se; j++) {
3061                         cik_select_se_sh(rdev, i, j);
3062                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3063                         if (rdev->family == CHIP_HAWAII)
3064                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3065                         else
3066                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3067                 }
3068         }
3069         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3070         mutex_unlock(&rdev->grbm_idx_mutex);
3071
3072         mask = 1;
3073         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3074                 if (!(disabled_rbs & mask))
3075                         enabled_rbs |= mask;
3076                 mask <<= 1;
3077         }
3078
3079         rdev->config.cik.backend_enable_mask = enabled_rbs;
3080
3081         mutex_lock(&rdev->grbm_idx_mutex);
3082         for (i = 0; i < se_num; i++) {
3083                 cik_select_se_sh(rdev, i, 0xffffffff);
3084                 data = 0;
3085                 for (j = 0; j < sh_per_se; j++) {
3086                         switch (enabled_rbs & 3) {
3087                         case 0:
3088                                 if (j == 0)
3089                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3090                                 else
3091                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3092                                 break;
3093                         case 1:
3094                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3095                                 break;
3096                         case 2:
3097                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3098                                 break;
3099                         case 3:
3100                         default:
3101                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3102                                 break;
3103                         }
3104                         enabled_rbs >>= 2;
3105                 }
3106                 WREG32(PA_SC_RASTER_CONFIG, data);
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109         mutex_unlock(&rdev->grbm_idx_mutex);
3110 }
3111
3112 /**
3113  * cik_gpu_init - setup the 3D engine
3114  *
3115  * @rdev: radeon_device pointer
3116  *
3117  * Configures the 3D engine and tiling configuration
3118  * registers so that the 3D engine is usable.
3119  */
3120 static void cik_gpu_init(struct radeon_device *rdev)
3121 {
3122         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3123         u32 mc_shared_chmap, mc_arb_ramcfg;
3124         u32 hdp_host_path_cntl;
3125         u32 tmp;
3126         int i, j;
3127
3128         switch (rdev->family) {
3129         case CHIP_BONAIRE:
3130                 rdev->config.cik.max_shader_engines = 2;
3131                 rdev->config.cik.max_tile_pipes = 4;
3132                 rdev->config.cik.max_cu_per_sh = 7;
3133                 rdev->config.cik.max_sh_per_se = 1;
3134                 rdev->config.cik.max_backends_per_se = 2;
3135                 rdev->config.cik.max_texture_channel_caches = 4;
3136                 rdev->config.cik.max_gprs = 256;
3137                 rdev->config.cik.max_gs_threads = 32;
3138                 rdev->config.cik.max_hw_contexts = 8;
3139
3140                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3141                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3142                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3143                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3144                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3145                 break;
3146         case CHIP_HAWAII:
3147                 rdev->config.cik.max_shader_engines = 4;
3148                 rdev->config.cik.max_tile_pipes = 16;
3149                 rdev->config.cik.max_cu_per_sh = 11;
3150                 rdev->config.cik.max_sh_per_se = 1;
3151                 rdev->config.cik.max_backends_per_se = 4;
3152                 rdev->config.cik.max_texture_channel_caches = 16;
3153                 rdev->config.cik.max_gprs = 256;
3154                 rdev->config.cik.max_gs_threads = 32;
3155                 rdev->config.cik.max_hw_contexts = 8;
3156
3157                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3158                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3159                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3160                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3161                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3162                 break;
3163         case CHIP_KAVERI:
3164                 rdev->config.cik.max_shader_engines = 1;
3165                 rdev->config.cik.max_tile_pipes = 4;
3166                 rdev->config.cik.max_cu_per_sh = 8;
3167                 rdev->config.cik.max_backends_per_se = 2;
3168                 rdev->config.cik.max_sh_per_se = 1;
3169                 rdev->config.cik.max_texture_channel_caches = 4;
3170                 rdev->config.cik.max_gprs = 256;
3171                 rdev->config.cik.max_gs_threads = 16;
3172                 rdev->config.cik.max_hw_contexts = 8;
3173
3174                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3175                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3176                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3177                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3178                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3179                 break;
3180         case CHIP_KABINI:
3181         case CHIP_MULLINS:
3182         default:
3183                 rdev->config.cik.max_shader_engines = 1;
3184                 rdev->config.cik.max_tile_pipes = 2;
3185                 rdev->config.cik.max_cu_per_sh = 2;
3186                 rdev->config.cik.max_sh_per_se = 1;
3187                 rdev->config.cik.max_backends_per_se = 1;
3188                 rdev->config.cik.max_texture_channel_caches = 2;
3189                 rdev->config.cik.max_gprs = 256;
3190                 rdev->config.cik.max_gs_threads = 16;
3191                 rdev->config.cik.max_hw_contexts = 8;
3192
3193                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3198                 break;
3199         }
3200
3201         /* Initialize HDP */
3202         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3203                 WREG32((0x2c14 + j), 0x00000000);
3204                 WREG32((0x2c18 + j), 0x00000000);
3205                 WREG32((0x2c1c + j), 0x00000000);
3206                 WREG32((0x2c20 + j), 0x00000000);
3207                 WREG32((0x2c24 + j), 0x00000000);
3208         }
3209
3210         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3211         WREG32(SRBM_INT_CNTL, 0x1);
3212         WREG32(SRBM_INT_ACK, 0x1);
3213
3214         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3215
3216         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3217         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3218
3219         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3220         rdev->config.cik.mem_max_burst_length_bytes = 256;
3221         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3222         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3223         if (rdev->config.cik.mem_row_size_in_kb > 4)
3224                 rdev->config.cik.mem_row_size_in_kb = 4;
3225         /* XXX use MC settings? */
3226         rdev->config.cik.shader_engine_tile_size = 32;
3227         rdev->config.cik.num_gpus = 1;
3228         rdev->config.cik.multi_gpu_tile_size = 64;
3229
3230         /* fix up row size */
3231         gb_addr_config &= ~ROW_SIZE_MASK;
3232         switch (rdev->config.cik.mem_row_size_in_kb) {
3233         case 1:
3234         default:
3235                 gb_addr_config |= ROW_SIZE(0);
3236                 break;
3237         case 2:
3238                 gb_addr_config |= ROW_SIZE(1);
3239                 break;
3240         case 4:
3241                 gb_addr_config |= ROW_SIZE(2);
3242                 break;
3243         }
3244
3245         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3246          * not have bank info, so create a custom tiling dword.
3247          * bits 3:0   num_pipes
3248          * bits 7:4   num_banks
3249          * bits 11:8  group_size
3250          * bits 15:12 row_size
3251          */
3252         rdev->config.cik.tile_config = 0;
3253         switch (rdev->config.cik.num_tile_pipes) {
3254         case 1:
3255                 rdev->config.cik.tile_config |= (0 << 0);
3256                 break;
3257         case 2:
3258                 rdev->config.cik.tile_config |= (1 << 0);
3259                 break;
3260         case 4:
3261                 rdev->config.cik.tile_config |= (2 << 0);
3262                 break;
3263         case 8:
3264         default:
3265                 /* XXX what about 12? */
3266                 rdev->config.cik.tile_config |= (3 << 0);
3267                 break;
3268         }
3269         rdev->config.cik.tile_config |=
3270                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3271         rdev->config.cik.tile_config |=
3272                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3273         rdev->config.cik.tile_config |=
3274                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3275
3276         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3277         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3278         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3279         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3280         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3281         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3282         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3283         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3284
3285         cik_tiling_mode_table_init(rdev);
3286
3287         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3288                      rdev->config.cik.max_sh_per_se,
3289                      rdev->config.cik.max_backends_per_se);
3290
3291         rdev->config.cik.active_cus = 0;
3292         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3293                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3294                         rdev->config.cik.active_cus +=
3295                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3296                 }
3297         }
3298
3299         /* set HW defaults for 3D engine */
3300         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3301
3302         mutex_lock(&rdev->grbm_idx_mutex);
3303         /*
3304          * making sure that the following register writes will be broadcasted
3305          * to all the shaders
3306          */
3307         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3308         WREG32(SX_DEBUG_1, 0x20);
3309
3310         WREG32(TA_CNTL_AUX, 0x00010000);
3311
3312         tmp = RREG32(SPI_CONFIG_CNTL);
3313         tmp |= 0x03000000;
3314         WREG32(SPI_CONFIG_CNTL, tmp);
3315
3316         WREG32(SQ_CONFIG, 1);
3317
3318         WREG32(DB_DEBUG, 0);
3319
3320         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3321         tmp |= 0x00000400;
3322         WREG32(DB_DEBUG2, tmp);
3323
3324         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3325         tmp |= 0x00020200;
3326         WREG32(DB_DEBUG3, tmp);
3327
3328         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3329         tmp |= 0x00018208;
3330         WREG32(CB_HW_CONTROL, tmp);
3331
3332         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3333
3334         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3335                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3336                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3337                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3338
3339         WREG32(VGT_NUM_INSTANCES, 1);
3340
3341         WREG32(CP_PERFMON_CNTL, 0);
3342
3343         WREG32(SQ_CONFIG, 0);
3344
3345         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3346                                           FORCE_EOV_MAX_REZ_CNT(255)));
3347
3348         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3349                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3350
3351         WREG32(VGT_GS_VERTEX_REUSE, 16);
3352         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3353
3354         tmp = RREG32(HDP_MISC_CNTL);
3355         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3356         WREG32(HDP_MISC_CNTL, tmp);
3357
3358         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3359         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3360
3361         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3362         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3363         mutex_unlock(&rdev->grbm_idx_mutex);
3364
3365         udelay(50);
3366 }
3367
3368 /*
3369  * GPU scratch registers helpers function.
3370  */
3371 /**
3372  * cik_scratch_init - setup driver info for CP scratch regs
3373  *
3374  * @rdev: radeon_device pointer
3375  *
3376  * Set up the number and offset of the CP scratch registers.
3377  * NOTE: use of CP scratch registers is a legacy inferface and
3378  * is not used by default on newer asics (r6xx+).  On newer asics,
3379  * memory buffers are used for fences rather than scratch regs.
3380  */
3381 static void cik_scratch_init(struct radeon_device *rdev)
3382 {
3383         int i;
3384
3385         rdev->scratch.num_reg = 7;
3386         rdev->scratch.reg_base = SCRATCH_REG0;
3387         for (i = 0; i < rdev->scratch.num_reg; i++) {
3388                 rdev->scratch.free[i] = true;
3389                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3390         }
3391 }
3392
3393 /**
3394  * cik_ring_test - basic gfx ring test
3395  *
3396  * @rdev: radeon_device pointer
3397  * @ring: radeon_ring structure holding ring information
3398  *
3399  * Allocate a scratch register and write to it using the gfx ring (CIK).
3400  * Provides a basic gfx ring test to verify that the ring is working.
3401  * Used by cik_cp_gfx_resume();
3402  * Returns 0 on success, error on failure.
3403  */
3404 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3405 {
3406         uint32_t scratch;
3407         uint32_t tmp = 0;
3408         unsigned i;
3409         int r;
3410
3411         r = radeon_scratch_get(rdev, &scratch);
3412         if (r) {
3413                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3414                 return r;
3415         }
3416         WREG32(scratch, 0xCAFEDEAD);
3417         r = radeon_ring_lock(rdev, ring, 3);
3418         if (r) {
3419                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3420                 radeon_scratch_free(rdev, scratch);
3421                 return r;
3422         }
3423         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3424         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3425         radeon_ring_write(ring, 0xDEADBEEF);
3426         radeon_ring_unlock_commit(rdev, ring, false);
3427
3428         for (i = 0; i < rdev->usec_timeout; i++) {
3429                 tmp = RREG32(scratch);
3430                 if (tmp == 0xDEADBEEF)
3431                         break;
3432                 DRM_UDELAY(1);
3433         }
3434         if (i < rdev->usec_timeout) {
3435                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3436         } else {
3437                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3438                           ring->idx, scratch, tmp);
3439                 r = -EINVAL;
3440         }
3441         radeon_scratch_free(rdev, scratch);
3442         return r;
3443 }
3444
3445 /**
3446  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3447  *
3448  * @rdev: radeon_device pointer
3449  * @ridx: radeon ring index
3450  *
3451  * Emits an hdp flush on the cp.
3452  */
3453 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3454                                        int ridx)
3455 {
3456         struct radeon_ring *ring = &rdev->ring[ridx];
3457         u32 ref_and_mask;
3458
3459         switch (ring->idx) {
3460         case CAYMAN_RING_TYPE_CP1_INDEX:
3461         case CAYMAN_RING_TYPE_CP2_INDEX:
3462         default:
3463                 switch (ring->me) {
3464                 case 0:
3465                         ref_and_mask = CP2 << ring->pipe;
3466                         break;
3467                 case 1:
3468                         ref_and_mask = CP6 << ring->pipe;
3469                         break;
3470                 default:
3471                         return;
3472                 }
3473                 break;
3474         case RADEON_RING_TYPE_GFX_INDEX:
3475                 ref_and_mask = CP0;
3476                 break;
3477         }
3478
3479         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3480         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3481                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3482                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3483         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3484         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3485         radeon_ring_write(ring, ref_and_mask);
3486         radeon_ring_write(ring, ref_and_mask);
3487         radeon_ring_write(ring, 0x20); /* poll interval */
3488 }
3489
3490 /**
3491  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3492  *
3493  * @rdev: radeon_device pointer
3494  * @fence: radeon fence object
3495  *
3496  * Emits a fence sequnce number on the gfx ring and flushes
3497  * GPU caches.
3498  */
3499 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3500                              struct radeon_fence *fence)
3501 {
3502         struct radeon_ring *ring = &rdev->ring[fence->ring];
3503         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3504
3505         /* Workaround for cache flush problems. First send a dummy EOP
3506          * event down the pipe with seq one below.
3507          */
3508         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3509         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3510                                  EOP_TC_ACTION_EN |
3511                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3512                                  EVENT_INDEX(5)));
3513         radeon_ring_write(ring, addr & 0xfffffffc);
3514         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3515                                 DATA_SEL(1) | INT_SEL(0));
3516         radeon_ring_write(ring, fence->seq - 1);
3517         radeon_ring_write(ring, 0);
3518
3519         /* Then send the real EOP event down the pipe. */
3520         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3521         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3522                                  EOP_TC_ACTION_EN |
3523                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3524                                  EVENT_INDEX(5)));
3525         radeon_ring_write(ring, addr & 0xfffffffc);
3526         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3527         radeon_ring_write(ring, fence->seq);
3528         radeon_ring_write(ring, 0);
3529 }
3530
3531 /**
3532  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3533  *
3534  * @rdev: radeon_device pointer
3535  * @fence: radeon fence object
3536  *
3537  * Emits a fence sequnce number on the compute ring and flushes
3538  * GPU caches.
3539  */
3540 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3541                                  struct radeon_fence *fence)
3542 {
3543         struct radeon_ring *ring = &rdev->ring[fence->ring];
3544         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546         /* RELEASE_MEM - flush caches, send int */
3547         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3548         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3549                                  EOP_TC_ACTION_EN |
3550                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3551                                  EVENT_INDEX(5)));
3552         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3553         radeon_ring_write(ring, addr & 0xfffffffc);
3554         radeon_ring_write(ring, upper_32_bits(addr));
3555         radeon_ring_write(ring, fence->seq);
3556         radeon_ring_write(ring, 0);
3557 }
3558
3559 /**
3560  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3561  *
3562  * @rdev: radeon_device pointer
3563  * @ring: radeon ring buffer object
3564  * @semaphore: radeon semaphore object
3565  * @emit_wait: Is this a sempahore wait?
3566  *
3567  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3568  * from running ahead of semaphore waits.
3569  */
3570 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3571                              struct radeon_ring *ring,
3572                              struct radeon_semaphore *semaphore,
3573                              bool emit_wait)
3574 {
3575         uint64_t addr = semaphore->gpu_addr;
3576         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3577
3578         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3579         radeon_ring_write(ring, lower_32_bits(addr));
3580         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3581
3582         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3583                 /* Prevent the PFP from running ahead of the semaphore wait */
3584                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3585                 radeon_ring_write(ring, 0x0);
3586         }
3587
3588         return true;
3589 }
3590
3591 /**
3592  * cik_copy_cpdma - copy pages using the CP DMA engine
3593  *
3594  * @rdev: radeon_device pointer
3595  * @src_offset: src GPU address
3596  * @dst_offset: dst GPU address
3597  * @num_gpu_pages: number of GPU pages to xfer
3598  * @resv: reservation object to sync to
3599  *
3600  * Copy GPU paging using the CP DMA engine (CIK+).
3601  * Used by the radeon ttm implementation to move pages if
3602  * registered as the asic copy callback.
3603  */
3604 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3605                                     uint64_t src_offset, uint64_t dst_offset,
3606                                     unsigned num_gpu_pages,
3607                                     struct reservation_object *resv)
3608 {
3609         struct radeon_fence *fence;
3610         struct radeon_sync sync;
3611         int ring_index = rdev->asic->copy.blit_ring_index;
3612         struct radeon_ring *ring = &rdev->ring[ring_index];
3613         u32 size_in_bytes, cur_size_in_bytes, control;
3614         int i, num_loops;
3615         int r = 0;
3616
3617         radeon_sync_create(&sync);
3618
3619         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3620         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3621         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3622         if (r) {
3623                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3624                 radeon_sync_free(rdev, &sync, NULL);
3625                 return ERR_PTR(r);
3626         }
3627
3628         radeon_sync_resv(rdev, &sync, resv, false);
3629         radeon_sync_rings(rdev, &sync, ring->idx);
3630
3631         for (i = 0; i < num_loops; i++) {
3632                 cur_size_in_bytes = size_in_bytes;
3633                 if (cur_size_in_bytes > 0x1fffff)
3634                         cur_size_in_bytes = 0x1fffff;
3635                 size_in_bytes -= cur_size_in_bytes;
3636                 control = 0;
3637                 if (size_in_bytes == 0)
3638                         control |= PACKET3_DMA_DATA_CP_SYNC;
3639                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3640                 radeon_ring_write(ring, control);
3641                 radeon_ring_write(ring, lower_32_bits(src_offset));
3642                 radeon_ring_write(ring, upper_32_bits(src_offset));
3643                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3644                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3645                 radeon_ring_write(ring, cur_size_in_bytes);
3646                 src_offset += cur_size_in_bytes;
3647                 dst_offset += cur_size_in_bytes;
3648         }
3649
3650         r = radeon_fence_emit(rdev, &fence, ring->idx);
3651         if (r) {
3652                 radeon_ring_unlock_undo(rdev, ring);
3653                 radeon_sync_free(rdev, &sync, NULL);
3654                 return ERR_PTR(r);
3655         }
3656
3657         radeon_ring_unlock_commit(rdev, ring, false);
3658         radeon_sync_free(rdev, &sync, fence);
3659
3660         return fence;
3661 }
3662
3663 /*
3664  * IB stuff
3665  */
3666 /**
3667  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3668  *
3669  * @rdev: radeon_device pointer
3670  * @ib: radeon indirect buffer object
3671  *
3672  * Emits a DE (drawing engine) or CE (constant engine) IB
3673  * on the gfx ring.  IBs are usually generated by userspace
3674  * acceleration drivers and submitted to the kernel for
3675  * scheduling on the ring.  This function schedules the IB
3676  * on the gfx ring for execution by the GPU.
3677  */
3678 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3679 {
3680         struct radeon_ring *ring = &rdev->ring[ib->ring];
3681         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3682         u32 header, control = INDIRECT_BUFFER_VALID;
3683
3684         if (ib->is_const_ib) {
3685                 /* set switch buffer packet before const IB */
3686                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3687                 radeon_ring_write(ring, 0);
3688
3689                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3690         } else {
3691                 u32 next_rptr;
3692                 if (ring->rptr_save_reg) {
3693                         next_rptr = ring->wptr + 3 + 4;
3694                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3695                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3696                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3697                         radeon_ring_write(ring, next_rptr);
3698                 } else if (rdev->wb.enabled) {
3699                         next_rptr = ring->wptr + 5 + 4;
3700                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3701                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3702                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3703                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3704                         radeon_ring_write(ring, next_rptr);
3705                 }
3706
3707                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3708         }
3709
3710         control |= ib->length_dw | (vm_id << 24);
3711
3712         radeon_ring_write(ring, header);
3713         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3714         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3715         radeon_ring_write(ring, control);
3716 }
3717
3718 /**
3719  * cik_ib_test - basic gfx ring IB test
3720  *
3721  * @rdev: radeon_device pointer
3722  * @ring: radeon_ring structure holding ring information
3723  *
3724  * Allocate an IB and execute it on the gfx ring (CIK).
3725  * Provides a basic gfx ring test to verify that IBs are working.
3726  * Returns 0 on success, error on failure.
3727  */
3728 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3729 {
3730         struct radeon_ib ib;
3731         uint32_t scratch;
3732         uint32_t tmp = 0;
3733         unsigned i;
3734         int r;
3735
3736         r = radeon_scratch_get(rdev, &scratch);
3737         if (r) {
3738                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3739                 return r;
3740         }
3741         WREG32(scratch, 0xCAFEDEAD);
3742         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3743         if (r) {
3744                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3745                 radeon_scratch_free(rdev, scratch);
3746                 return r;
3747         }
3748         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3749         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3750         ib.ptr[2] = 0xDEADBEEF;
3751         ib.length_dw = 3;
3752         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3753         if (r) {
3754                 radeon_scratch_free(rdev, scratch);
3755                 radeon_ib_free(rdev, &ib);
3756                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3757                 return r;
3758         }
3759         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3760                 RADEON_USEC_IB_TEST_TIMEOUT));
3761         if (r < 0) {
3762                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3763                 radeon_scratch_free(rdev, scratch);
3764                 radeon_ib_free(rdev, &ib);
3765                 return r;
3766         } else if (r == 0) {
3767                 DRM_ERROR("radeon: fence wait timed out.\n");
3768                 radeon_scratch_free(rdev, scratch);
3769                 radeon_ib_free(rdev, &ib);
3770                 return -ETIMEDOUT;
3771         }
3772         r = 0;
3773         for (i = 0; i < rdev->usec_timeout; i++) {
3774                 tmp = RREG32(scratch);
3775                 if (tmp == 0xDEADBEEF)
3776                         break;
3777                 DRM_UDELAY(1);
3778         }
3779         if (i < rdev->usec_timeout) {
3780                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3781         } else {
3782                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3783                           scratch, tmp);
3784                 r = -EINVAL;
3785         }
3786         radeon_scratch_free(rdev, scratch);
3787         radeon_ib_free(rdev, &ib);
3788         return r;
3789 }
3790
3791 /*
3792  * CP.
3793  * On CIK, gfx and compute now have independant command processors.
3794  *
3795  * GFX
3796  * Gfx consists of a single ring and can process both gfx jobs and
3797  * compute jobs.  The gfx CP consists of three microengines (ME):
3798  * PFP - Pre-Fetch Parser
3799  * ME - Micro Engine
3800  * CE - Constant Engine
3801  * The PFP and ME make up what is considered the Drawing Engine (DE).
3802  * The CE is an asynchronous engine used for updating buffer desciptors
3803  * used by the DE so that they can be loaded into cache in parallel
3804  * while the DE is processing state update packets.
3805  *
3806  * Compute
3807  * The compute CP consists of two microengines (ME):
3808  * MEC1 - Compute MicroEngine 1
3809  * MEC2 - Compute MicroEngine 2
3810  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3811  * The queues are exposed to userspace and are programmed directly
3812  * by the compute runtime.
3813  */
3814 /**
3815  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3816  *
3817  * @rdev: radeon_device pointer
3818  * @enable: enable or disable the MEs
3819  *
3820  * Halts or unhalts the gfx MEs.
3821  */
3822 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3823 {
3824         if (enable)
3825                 WREG32(CP_ME_CNTL, 0);
3826         else {
3827                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3828                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3829                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3830                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3831         }
3832         udelay(50);
3833 }
3834
3835 /**
3836  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3837  *
3838  * @rdev: radeon_device pointer
3839  *
3840  * Loads the gfx PFP, ME, and CE ucode.
3841  * Returns 0 for success, -EINVAL if the ucode is not available.
3842  */
3843 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3844 {
3845         int i;
3846
3847         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3848                 return -EINVAL;
3849
3850         cik_cp_gfx_enable(rdev, false);
3851
3852         if (rdev->new_fw) {
3853                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3854                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3855                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3856                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3857                 const struct gfx_firmware_header_v1_0 *me_hdr =
3858                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3859                 const __le32 *fw_data;
3860                 u32 fw_size;
3861
3862                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3863                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3864                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3865
3866                 /* PFP */
3867                 fw_data = (const __le32 *)
3868                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3869                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3870                 WREG32(CP_PFP_UCODE_ADDR, 0);
3871                 for (i = 0; i < fw_size; i++)
3872                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3873                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3874
3875                 /* CE */
3876                 fw_data = (const __le32 *)
3877                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3878                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3879                 WREG32(CP_CE_UCODE_ADDR, 0);
3880                 for (i = 0; i < fw_size; i++)
3881                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3882                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3883
3884                 /* ME */
3885                 fw_data = (const __be32 *)
3886                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3887                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3888                 WREG32(CP_ME_RAM_WADDR, 0);
3889                 for (i = 0; i < fw_size; i++)
3890                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3891                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3892                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3893         } else {
3894                 const __be32 *fw_data;
3895
3896                 /* PFP */
3897                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3898                 WREG32(CP_PFP_UCODE_ADDR, 0);
3899                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3900                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3901                 WREG32(CP_PFP_UCODE_ADDR, 0);
3902
3903                 /* CE */
3904                 fw_data = (const __be32 *)rdev->ce_fw->data;
3905                 WREG32(CP_CE_UCODE_ADDR, 0);
3906                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3907                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3908                 WREG32(CP_CE_UCODE_ADDR, 0);
3909
3910                 /* ME */
3911                 fw_data = (const __be32 *)rdev->me_fw->data;
3912                 WREG32(CP_ME_RAM_WADDR, 0);
3913                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3914                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3915                 WREG32(CP_ME_RAM_WADDR, 0);
3916         }
3917
3918         return 0;
3919 }
3920
3921 /**
3922  * cik_cp_gfx_start - start the gfx ring
3923  *
3924  * @rdev: radeon_device pointer
3925  *
3926  * Enables the ring and loads the clear state context and other
3927  * packets required to init the ring.
3928  * Returns 0 for success, error for failure.
3929  */
3930 static int cik_cp_gfx_start(struct radeon_device *rdev)
3931 {
3932         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3933         int r, i;
3934
3935         /* init the CP */
3936         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3937         WREG32(CP_ENDIAN_SWAP, 0);
3938         WREG32(CP_DEVICE_ID, 1);
3939
3940         cik_cp_gfx_enable(rdev, true);
3941
3942         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3943         if (r) {
3944                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3945                 return r;
3946         }
3947
3948         /* init the CE partitions.  CE only used for gfx on CIK */
3949         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3950         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3951         radeon_ring_write(ring, 0x8000);
3952         radeon_ring_write(ring, 0x8000);
3953
3954         /* setup clear context state */
3955         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3956         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3957
3958         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3959         radeon_ring_write(ring, 0x80000000);
3960         radeon_ring_write(ring, 0x80000000);
3961
3962         for (i = 0; i < cik_default_size; i++)
3963                 radeon_ring_write(ring, cik_default_state[i]);
3964
3965         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3966         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3967
3968         /* set clear context state */
3969         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3970         radeon_ring_write(ring, 0);
3971
3972         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3973         radeon_ring_write(ring, 0x00000316);
3974         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3975         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3976
3977         radeon_ring_unlock_commit(rdev, ring, false);
3978
3979         return 0;
3980 }
3981
3982 /**
3983  * cik_cp_gfx_fini - stop the gfx ring
3984  *
3985  * @rdev: radeon_device pointer
3986  *
3987  * Stop the gfx ring and tear down the driver ring
3988  * info.
3989  */
3990 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3991 {
3992         cik_cp_gfx_enable(rdev, false);
3993         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3994 }
3995
3996 /**
3997  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3998  *
3999  * @rdev: radeon_device pointer
4000  *
4001  * Program the location and size of the gfx ring buffer
4002  * and test it to make sure it's working.
4003  * Returns 0 for success, error for failure.
4004  */
4005 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4006 {
4007         struct radeon_ring *ring;
4008         u32 tmp;
4009         u32 rb_bufsz;
4010         u64 rb_addr;
4011         int r;
4012
4013         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4014         if (rdev->family != CHIP_HAWAII)
4015                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4016
4017         /* Set the write pointer delay */
4018         WREG32(CP_RB_WPTR_DELAY, 0);
4019
4020         /* set the RB to use vmid 0 */
4021         WREG32(CP_RB_VMID, 0);
4022
4023         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4024
4025         /* ring 0 - compute and gfx */
4026         /* Set ring buffer size */
4027         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4028         rb_bufsz = order_base_2(ring->ring_size / 8);
4029         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4030 #ifdef __BIG_ENDIAN
4031         tmp |= BUF_SWAP_32BIT;
4032 #endif
4033         WREG32(CP_RB0_CNTL, tmp);
4034
4035         /* Initialize the ring buffer's read and write pointers */
4036         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4037         ring->wptr = 0;
4038         WREG32(CP_RB0_WPTR, ring->wptr);
4039
4040         /* set the wb address wether it's enabled or not */
4041         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4042         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4043
4044         /* scratch register shadowing is no longer supported */
4045         WREG32(SCRATCH_UMSK, 0);
4046
4047         if (!rdev->wb.enabled)
4048                 tmp |= RB_NO_UPDATE;
4049
4050         mdelay(1);
4051         WREG32(CP_RB0_CNTL, tmp);
4052
4053         rb_addr = ring->gpu_addr >> 8;
4054         WREG32(CP_RB0_BASE, rb_addr);
4055         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4056
4057         /* start the ring */
4058         cik_cp_gfx_start(rdev);
4059         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4060         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4061         if (r) {
4062                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4063                 return r;
4064         }
4065
4066         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4067                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4068
4069         return 0;
4070 }
4071
4072 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4073                      struct radeon_ring *ring)
4074 {
4075         u32 rptr;
4076
4077         if (rdev->wb.enabled)
4078                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4079         else
4080                 rptr = RREG32(CP_RB0_RPTR);
4081
4082         return rptr;
4083 }
4084
4085 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4086                      struct radeon_ring *ring)
4087 {
4088         return RREG32(CP_RB0_WPTR);
4089 }
4090
4091 void cik_gfx_set_wptr(struct radeon_device *rdev,
4092                       struct radeon_ring *ring)
4093 {
4094         WREG32(CP_RB0_WPTR, ring->wptr);
4095         (void)RREG32(CP_RB0_WPTR);
4096 }
4097
4098 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4099                          struct radeon_ring *ring)
4100 {
4101         u32 rptr;
4102
4103         if (rdev->wb.enabled) {
4104                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4105         } else {
4106                 mutex_lock(&rdev->srbm_mutex);
4107                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4108                 rptr = RREG32(CP_HQD_PQ_RPTR);
4109                 cik_srbm_select(rdev, 0, 0, 0, 0);
4110                 mutex_unlock(&rdev->srbm_mutex);
4111         }
4112
4113         return rptr;
4114 }
4115
4116 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4117                          struct radeon_ring *ring)
4118 {
4119         u32 wptr;
4120
4121         if (rdev->wb.enabled) {
4122                 /* XXX check if swapping is necessary on BE */
4123                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4124         } else {
4125                 mutex_lock(&rdev->srbm_mutex);
4126                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4127                 wptr = RREG32(CP_HQD_PQ_WPTR);
4128                 cik_srbm_select(rdev, 0, 0, 0, 0);
4129                 mutex_unlock(&rdev->srbm_mutex);
4130         }
4131
4132         return wptr;
4133 }
4134
4135 void cik_compute_set_wptr(struct radeon_device *rdev,
4136                           struct radeon_ring *ring)
4137 {
4138         /* XXX check if swapping is necessary on BE */
4139         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4140         WDOORBELL32(ring->doorbell_index, ring->wptr);
4141 }
4142
4143 static void cik_compute_stop(struct radeon_device *rdev,
4144                              struct radeon_ring *ring)
4145 {
4146         u32 j, tmp;
4147
4148         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149         /* Disable wptr polling. */
4150         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4151         tmp &= ~WPTR_POLL_EN;
4152         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4153         /* Disable HQD. */
4154         if (RREG32(CP_HQD_ACTIVE) & 1) {
4155                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4156                 for (j = 0; j < rdev->usec_timeout; j++) {
4157                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4158                                 break;
4159                         udelay(1);
4160                 }
4161                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4162                 WREG32(CP_HQD_PQ_RPTR, 0);
4163                 WREG32(CP_HQD_PQ_WPTR, 0);
4164         }
4165         cik_srbm_select(rdev, 0, 0, 0, 0);
4166 }
4167
4168 /**
4169  * cik_cp_compute_enable - enable/disable the compute CP MEs
4170  *
4171  * @rdev: radeon_device pointer
4172  * @enable: enable or disable the MEs
4173  *
4174  * Halts or unhalts the compute MEs.
4175  */
4176 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4177 {
4178         if (enable)
4179                 WREG32(CP_MEC_CNTL, 0);
4180         else {
4181                 /*
4182                  * To make hibernation reliable we need to clear compute ring
4183                  * configuration before halting the compute ring.
4184                  */
4185                 mutex_lock(&rdev->srbm_mutex);
4186                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4187                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4188                 mutex_unlock(&rdev->srbm_mutex);
4189
4190                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4191                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4192                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4193         }
4194         udelay(50);
4195 }
4196
4197 /**
4198  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4199  *
4200  * @rdev: radeon_device pointer
4201  *
4202  * Loads the compute MEC1&2 ucode.
4203  * Returns 0 for success, -EINVAL if the ucode is not available.
4204  */
4205 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4206 {
4207         int i;
4208
4209         if (!rdev->mec_fw)
4210                 return -EINVAL;
4211
4212         cik_cp_compute_enable(rdev, false);
4213
4214         if (rdev->new_fw) {
4215                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4216                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4217                 const __le32 *fw_data;
4218                 u32 fw_size;
4219
4220                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4221
4222                 /* MEC1 */
4223                 fw_data = (const __le32 *)
4224                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4225                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4226                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4227                 for (i = 0; i < fw_size; i++)
4228                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4229                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4230
4231                 /* MEC2 */
4232                 if (rdev->family == CHIP_KAVERI) {
4233                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4234                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4235
4236                         fw_data = (const __le32 *)
4237                                 (rdev->mec2_fw->data +
4238                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4239                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4240                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4241                         for (i = 0; i < fw_size; i++)
4242                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4243                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4244                 }
4245         } else {
4246                 const __be32 *fw_data;
4247
4248                 /* MEC1 */
4249                 fw_data = (const __be32 *)rdev->mec_fw->data;
4250                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4251                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4252                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4253                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4254
4255                 if (rdev->family == CHIP_KAVERI) {
4256                         /* MEC2 */
4257                         fw_data = (const __be32 *)rdev->mec_fw->data;
4258                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4259                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4260                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4261                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4262                 }
4263         }
4264
4265         return 0;
4266 }
4267
4268 /**
4269  * cik_cp_compute_start - start the compute queues
4270  *
4271  * @rdev: radeon_device pointer
4272  *
4273  * Enable the compute queues.
4274  * Returns 0 for success, error for failure.
4275  */
4276 static int cik_cp_compute_start(struct radeon_device *rdev)
4277 {
4278         cik_cp_compute_enable(rdev, true);
4279
4280         return 0;
4281 }
4282
4283 /**
4284  * cik_cp_compute_fini - stop the compute queues
4285  *
4286  * @rdev: radeon_device pointer
4287  *
4288  * Stop the compute queues and tear down the driver queue
4289  * info.
4290  */
4291 static void cik_cp_compute_fini(struct radeon_device *rdev)
4292 {
4293         int i, idx, r;
4294
4295         cik_cp_compute_enable(rdev, false);
4296
4297         for (i = 0; i < 2; i++) {
4298                 if (i == 0)
4299                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4300                 else
4301                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4302
4303                 if (rdev->ring[idx].mqd_obj) {
4304                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4305                         if (unlikely(r != 0))
4306                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4307
4308                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4309                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4310
4311                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4312                         rdev->ring[idx].mqd_obj = NULL;
4313                 }
4314         }
4315 }
4316
4317 static void cik_mec_fini(struct radeon_device *rdev)
4318 {
4319         int r;
4320
4321         if (rdev->mec.hpd_eop_obj) {
4322                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4323                 if (unlikely(r != 0))
4324                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4325                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4326                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4327
4328                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4329                 rdev->mec.hpd_eop_obj = NULL;
4330         }
4331 }
4332
4333 #define MEC_HPD_SIZE 2048
4334
4335 static int cik_mec_init(struct radeon_device *rdev)
4336 {
4337         int r;
4338         u32 *hpd;
4339
4340         /*
4341          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4342          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4343          * Nonetheless, we assign only 1 pipe because all other pipes will
4344          * be handled by KFD
4345          */
4346         rdev->mec.num_mec = 1;
4347         rdev->mec.num_pipe = 1;
4348         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4349
4350         if (rdev->mec.hpd_eop_obj == NULL) {
4351                 r = radeon_bo_create(rdev,
4352                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4353                                      PAGE_SIZE, true,
4354                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4355                                      &rdev->mec.hpd_eop_obj);
4356                 if (r) {
4357                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4358                         return r;
4359                 }
4360         }
4361
4362         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4363         if (unlikely(r != 0)) {
4364                 cik_mec_fini(rdev);
4365                 return r;
4366         }
4367         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4368                           &rdev->mec.hpd_eop_gpu_addr);
4369         if (r) {
4370                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4371                 cik_mec_fini(rdev);
4372                 return r;
4373         }
4374         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4375         if (r) {
4376                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4377                 cik_mec_fini(rdev);
4378                 return r;
4379         }
4380
4381         /* clear memory.  Not sure if this is required or not */
4382         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4383
4384         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4385         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4386
4387         return 0;
4388 }
4389
4390 struct hqd_registers
4391 {
4392         u32 cp_mqd_base_addr;
4393         u32 cp_mqd_base_addr_hi;
4394         u32 cp_hqd_active;
4395         u32 cp_hqd_vmid;
4396         u32 cp_hqd_persistent_state;
4397         u32 cp_hqd_pipe_priority;
4398         u32 cp_hqd_queue_priority;
4399         u32 cp_hqd_quantum;
4400         u32 cp_hqd_pq_base;
4401         u32 cp_hqd_pq_base_hi;
4402         u32 cp_hqd_pq_rptr;
4403         u32 cp_hqd_pq_rptr_report_addr;
4404         u32 cp_hqd_pq_rptr_report_addr_hi;
4405         u32 cp_hqd_pq_wptr_poll_addr;
4406         u32 cp_hqd_pq_wptr_poll_addr_hi;
4407         u32 cp_hqd_pq_doorbell_control;
4408         u32 cp_hqd_pq_wptr;
4409         u32 cp_hqd_pq_control;
4410         u32 cp_hqd_ib_base_addr;
4411         u32 cp_hqd_ib_base_addr_hi;
4412         u32 cp_hqd_ib_rptr;
4413         u32 cp_hqd_ib_control;
4414         u32 cp_hqd_iq_timer;
4415         u32 cp_hqd_iq_rptr;
4416         u32 cp_hqd_dequeue_request;
4417         u32 cp_hqd_dma_offload;
4418         u32 cp_hqd_sema_cmd;
4419         u32 cp_hqd_msg_type;
4420         u32 cp_hqd_atomic0_preop_lo;
4421         u32 cp_hqd_atomic0_preop_hi;
4422         u32 cp_hqd_atomic1_preop_lo;
4423         u32 cp_hqd_atomic1_preop_hi;
4424         u32 cp_hqd_hq_scheduler0;
4425         u32 cp_hqd_hq_scheduler1;
4426         u32 cp_mqd_control;
4427 };
4428
4429 struct bonaire_mqd
4430 {
4431         u32 header;
4432         u32 dispatch_initiator;
4433         u32 dimensions[3];
4434         u32 start_idx[3];
4435         u32 num_threads[3];
4436         u32 pipeline_stat_enable;
4437         u32 perf_counter_enable;
4438         u32 pgm[2];
4439         u32 tba[2];
4440         u32 tma[2];
4441         u32 pgm_rsrc[2];
4442         u32 vmid;
4443         u32 resource_limits;
4444         u32 static_thread_mgmt01[2];
4445         u32 tmp_ring_size;
4446         u32 static_thread_mgmt23[2];
4447         u32 restart[3];
4448         u32 thread_trace_enable;
4449         u32 reserved1;
4450         u32 user_data[16];
4451         u32 vgtcs_invoke_count[2];
4452         struct hqd_registers queue_state;
4453         u32 dequeue_cntr;
4454         u32 interrupt_queue[64];
4455 };
4456
4457 /**
4458  * cik_cp_compute_resume - setup the compute queue registers
4459  *
4460  * @rdev: radeon_device pointer
4461  *
4462  * Program the compute queues and test them to make sure they
4463  * are working.
4464  * Returns 0 for success, error for failure.
4465  */
4466 static int cik_cp_compute_resume(struct radeon_device *rdev)
4467 {
4468         int r, i, j, idx;
4469         u32 tmp;
4470         bool use_doorbell = true;
4471         u64 hqd_gpu_addr;
4472         u64 mqd_gpu_addr;
4473         u64 eop_gpu_addr;
4474         u64 wb_gpu_addr;
4475         u32 *buf;
4476         struct bonaire_mqd *mqd;
4477
4478         r = cik_cp_compute_start(rdev);
4479         if (r)
4480                 return r;
4481
4482         /* fix up chicken bits */
4483         tmp = RREG32(CP_CPF_DEBUG);
4484         tmp |= (1 << 23);
4485         WREG32(CP_CPF_DEBUG, tmp);
4486
4487         /* init the pipes */
4488         mutex_lock(&rdev->srbm_mutex);
4489
4490         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4491
4492         cik_srbm_select(rdev, 0, 0, 0, 0);
4493
4494         /* write the EOP addr */
4495         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4496         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4497
4498         /* set the VMID assigned */
4499         WREG32(CP_HPD_EOP_VMID, 0);
4500
4501         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4502         tmp = RREG32(CP_HPD_EOP_CONTROL);
4503         tmp &= ~EOP_SIZE_MASK;
4504         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4505         WREG32(CP_HPD_EOP_CONTROL, tmp);
4506
4507         mutex_unlock(&rdev->srbm_mutex);
4508
4509         /* init the queues.  Just two for now. */
4510         for (i = 0; i < 2; i++) {
4511                 if (i == 0)
4512                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4513                 else
4514                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4515
4516                 if (rdev->ring[idx].mqd_obj == NULL) {
4517                         r = radeon_bo_create(rdev,
4518                                              sizeof(struct bonaire_mqd),
4519                                              PAGE_SIZE, true,
4520                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4521                                              NULL, &rdev->ring[idx].mqd_obj);
4522                         if (r) {
4523                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4524                                 return r;
4525                         }
4526                 }
4527
4528                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4529                 if (unlikely(r != 0)) {
4530                         cik_cp_compute_fini(rdev);
4531                         return r;
4532                 }
4533                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4534                                   &mqd_gpu_addr);
4535                 if (r) {
4536                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4537                         cik_cp_compute_fini(rdev);
4538                         return r;
4539                 }
4540                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4541                 if (r) {
4542                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4543                         cik_cp_compute_fini(rdev);
4544                         return r;
4545                 }
4546
4547                 /* init the mqd struct */
4548                 memset(buf, 0, sizeof(struct bonaire_mqd));
4549
4550                 mqd = (struct bonaire_mqd *)buf;
4551                 mqd->header = 0xC0310800;
4552                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4553                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4554                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4555                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4556
4557                 mutex_lock(&rdev->srbm_mutex);
4558                 cik_srbm_select(rdev, rdev->ring[idx].me,
4559                                 rdev->ring[idx].pipe,
4560                                 rdev->ring[idx].queue, 0);
4561
4562                 /* disable wptr polling */
4563                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4564                 tmp &= ~WPTR_POLL_EN;
4565                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4566
4567                 /* enable doorbell? */
4568                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4569                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4570                 if (use_doorbell)
4571                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4572                 else
4573                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4574                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4575                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4576
4577                 /* disable the queue if it's active */
4578                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4579                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4580                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4581                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4582                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4583                         for (j = 0; j < rdev->usec_timeout; j++) {
4584                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4585                                         break;
4586                                 udelay(1);
4587                         }
4588                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4589                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4590                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4591                 }
4592
4593                 /* set the pointer to the MQD */
4594                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4595                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4596                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4597                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4598                 /* set MQD vmid to 0 */
4599                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4600                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4601                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4602
4603                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4604                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4605                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4606                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4607                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4608                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4609
4610                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4611                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4612                 mqd->queue_state.cp_hqd_pq_control &=
4613                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4614
4615                 mqd->queue_state.cp_hqd_pq_control |=
4616                         order_base_2(rdev->ring[idx].ring_size / 8);
4617                 mqd->queue_state.cp_hqd_pq_control |=
4618                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4619 #ifdef __BIG_ENDIAN
4620                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4621 #endif
4622                 mqd->queue_state.cp_hqd_pq_control &=
4623                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4624                 mqd->queue_state.cp_hqd_pq_control |=
4625                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4626                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4627
4628                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4629                 if (i == 0)
4630                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4631                 else
4632                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4633                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4634                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4635                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4636                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4637                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4638
4639                 /* set the wb address wether it's enabled or not */
4640                 if (i == 0)
4641                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4642                 else
4643                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4644                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4645                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4646                         upper_32_bits(wb_gpu_addr) & 0xffff;
4647                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4648                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4649                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4650                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4651
4652                 /* enable the doorbell if requested */
4653                 if (use_doorbell) {
4654                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4655                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4656                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4657                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4658                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4659                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4660                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4661                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4662
4663                 } else {
4664                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4665                 }
4666                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4667                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4668
4669                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4670                 rdev->ring[idx].wptr = 0;
4671                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4672                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4673                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4674
4675                 /* set the vmid for the queue */
4676                 mqd->queue_state.cp_hqd_vmid = 0;
4677                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4678
4679                 /* activate the queue */
4680                 mqd->queue_state.cp_hqd_active = 1;
4681                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4682
4683                 cik_srbm_select(rdev, 0, 0, 0, 0);
4684                 mutex_unlock(&rdev->srbm_mutex);
4685
4686                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4687                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4688
4689                 rdev->ring[idx].ready = true;
4690                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4691                 if (r)
4692                         rdev->ring[idx].ready = false;
4693         }
4694
4695         return 0;
4696 }
4697
4698 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4699 {
4700         cik_cp_gfx_enable(rdev, enable);
4701         cik_cp_compute_enable(rdev, enable);
4702 }
4703
4704 static int cik_cp_load_microcode(struct radeon_device *rdev)
4705 {
4706         int r;
4707
4708         r = cik_cp_gfx_load_microcode(rdev);
4709         if (r)
4710                 return r;
4711         r = cik_cp_compute_load_microcode(rdev);
4712         if (r)
4713                 return r;
4714
4715         return 0;
4716 }
4717
4718 static void cik_cp_fini(struct radeon_device *rdev)
4719 {
4720         cik_cp_gfx_fini(rdev);
4721         cik_cp_compute_fini(rdev);
4722 }
4723
4724 static int cik_cp_resume(struct radeon_device *rdev)
4725 {
4726         int r;
4727
4728         cik_enable_gui_idle_interrupt(rdev, false);
4729
4730         r = cik_cp_load_microcode(rdev);
4731         if (r)
4732                 return r;
4733
4734         r = cik_cp_gfx_resume(rdev);
4735         if (r)
4736                 return r;
4737         r = cik_cp_compute_resume(rdev);
4738         if (r)
4739                 return r;
4740
4741         cik_enable_gui_idle_interrupt(rdev, true);
4742
4743         return 0;
4744 }
4745
4746 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4747 {
4748         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4749                 RREG32(GRBM_STATUS));
4750         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4751                 RREG32(GRBM_STATUS2));
4752         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4753                 RREG32(GRBM_STATUS_SE0));
4754         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4755                 RREG32(GRBM_STATUS_SE1));
4756         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4757                 RREG32(GRBM_STATUS_SE2));
4758         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4759                 RREG32(GRBM_STATUS_SE3));
4760         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4761                 RREG32(SRBM_STATUS));
4762         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4763                 RREG32(SRBM_STATUS2));
4764         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4765                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4766         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4767                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4768         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4769         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4770                  RREG32(CP_STALLED_STAT1));
4771         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4772                  RREG32(CP_STALLED_STAT2));
4773         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4774                  RREG32(CP_STALLED_STAT3));
4775         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4776                  RREG32(CP_CPF_BUSY_STAT));
4777         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4778                  RREG32(CP_CPF_STALLED_STAT1));
4779         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4780         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4781         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4782                  RREG32(CP_CPC_STALLED_STAT1));
4783         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4784 }
4785
4786 /**
4787  * cik_gpu_check_soft_reset - check which blocks are busy
4788  *
4789  * @rdev: radeon_device pointer
4790  *
4791  * Check which blocks are busy and return the relevant reset
4792  * mask to be used by cik_gpu_soft_reset().
4793  * Returns a mask of the blocks to be reset.
4794  */
4795 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4796 {
4797         u32 reset_mask = 0;
4798         u32 tmp;
4799
4800         /* GRBM_STATUS */
4801         tmp = RREG32(GRBM_STATUS);
4802         if (tmp & (PA_BUSY | SC_BUSY |
4803                    BCI_BUSY | SX_BUSY |
4804                    TA_BUSY | VGT_BUSY |
4805                    DB_BUSY | CB_BUSY |
4806                    GDS_BUSY | SPI_BUSY |
4807                    IA_BUSY | IA_BUSY_NO_DMA))
4808                 reset_mask |= RADEON_RESET_GFX;
4809
4810         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4811                 reset_mask |= RADEON_RESET_CP;
4812
4813         /* GRBM_STATUS2 */
4814         tmp = RREG32(GRBM_STATUS2);
4815         if (tmp & RLC_BUSY)
4816                 reset_mask |= RADEON_RESET_RLC;
4817
4818         /* SDMA0_STATUS_REG */
4819         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4820         if (!(tmp & SDMA_IDLE))
4821                 reset_mask |= RADEON_RESET_DMA;
4822
4823         /* SDMA1_STATUS_REG */
4824         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4825         if (!(tmp & SDMA_IDLE))
4826                 reset_mask |= RADEON_RESET_DMA1;
4827
4828         /* SRBM_STATUS2 */
4829         tmp = RREG32(SRBM_STATUS2);
4830         if (tmp & SDMA_BUSY)
4831                 reset_mask |= RADEON_RESET_DMA;
4832
4833         if (tmp & SDMA1_BUSY)
4834                 reset_mask |= RADEON_RESET_DMA1;
4835
4836         /* SRBM_STATUS */
4837         tmp = RREG32(SRBM_STATUS);
4838
4839         if (tmp & IH_BUSY)
4840                 reset_mask |= RADEON_RESET_IH;
4841
4842         if (tmp & SEM_BUSY)
4843                 reset_mask |= RADEON_RESET_SEM;
4844
4845         if (tmp & GRBM_RQ_PENDING)
4846                 reset_mask |= RADEON_RESET_GRBM;
4847
4848         if (tmp & VMC_BUSY)
4849                 reset_mask |= RADEON_RESET_VMC;
4850
4851         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4852                    MCC_BUSY | MCD_BUSY))
4853                 reset_mask |= RADEON_RESET_MC;
4854
4855         if (evergreen_is_display_hung(rdev))
4856                 reset_mask |= RADEON_RESET_DISPLAY;
4857
4858         /* Skip MC reset as it's mostly likely not hung, just busy */
4859         if (reset_mask & RADEON_RESET_MC) {
4860                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4861                 reset_mask &= ~RADEON_RESET_MC;
4862         }
4863
4864         return reset_mask;
4865 }
4866
4867 /**
4868  * cik_gpu_soft_reset - soft reset GPU
4869  *
4870  * @rdev: radeon_device pointer
4871  * @reset_mask: mask of which blocks to reset
4872  *
4873  * Soft reset the blocks specified in @reset_mask.
4874  */
4875 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4876 {
4877         struct evergreen_mc_save save;
4878         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4879         u32 tmp;
4880
4881         if (reset_mask == 0)
4882                 return;
4883
4884         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4885
4886         cik_print_gpu_status_regs(rdev);
4887         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4888                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4889         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4890                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4891
4892         /* disable CG/PG */
4893         cik_fini_pg(rdev);
4894         cik_fini_cg(rdev);
4895
4896         /* stop the rlc */
4897         cik_rlc_stop(rdev);
4898
4899         /* Disable GFX parsing/prefetching */
4900         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4901
4902         /* Disable MEC parsing/prefetching */
4903         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4904
4905         if (reset_mask & RADEON_RESET_DMA) {
4906                 /* sdma0 */
4907                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4908                 tmp |= SDMA_HALT;
4909                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4910         }
4911         if (reset_mask & RADEON_RESET_DMA1) {
4912                 /* sdma1 */
4913                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4914                 tmp |= SDMA_HALT;
4915                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4916         }
4917
4918         evergreen_mc_stop(rdev, &save);
4919         if (evergreen_mc_wait_for_idle(rdev)) {
4920                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4921         }
4922
4923         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4924                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4925
4926         if (reset_mask & RADEON_RESET_CP) {
4927                 grbm_soft_reset |= SOFT_RESET_CP;
4928
4929                 srbm_soft_reset |= SOFT_RESET_GRBM;
4930         }
4931
4932         if (reset_mask & RADEON_RESET_DMA)
4933                 srbm_soft_reset |= SOFT_RESET_SDMA;
4934
4935         if (reset_mask & RADEON_RESET_DMA1)
4936                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4937
4938         if (reset_mask & RADEON_RESET_DISPLAY)
4939                 srbm_soft_reset |= SOFT_RESET_DC;
4940
4941         if (reset_mask & RADEON_RESET_RLC)
4942                 grbm_soft_reset |= SOFT_RESET_RLC;
4943
4944         if (reset_mask & RADEON_RESET_SEM)
4945                 srbm_soft_reset |= SOFT_RESET_SEM;
4946
4947         if (reset_mask & RADEON_RESET_IH)
4948                 srbm_soft_reset |= SOFT_RESET_IH;
4949
4950         if (reset_mask & RADEON_RESET_GRBM)
4951                 srbm_soft_reset |= SOFT_RESET_GRBM;
4952
4953         if (reset_mask & RADEON_RESET_VMC)
4954                 srbm_soft_reset |= SOFT_RESET_VMC;
4955
4956         if (!(rdev->flags & RADEON_IS_IGP)) {
4957                 if (reset_mask & RADEON_RESET_MC)
4958                         srbm_soft_reset |= SOFT_RESET_MC;
4959         }
4960
4961         if (grbm_soft_reset) {
4962                 tmp = RREG32(GRBM_SOFT_RESET);
4963                 tmp |= grbm_soft_reset;
4964                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4965                 WREG32(GRBM_SOFT_RESET, tmp);
4966                 tmp = RREG32(GRBM_SOFT_RESET);
4967
4968                 udelay(50);
4969
4970                 tmp &= ~grbm_soft_reset;
4971                 WREG32(GRBM_SOFT_RESET, tmp);
4972                 tmp = RREG32(GRBM_SOFT_RESET);
4973         }
4974
4975         if (srbm_soft_reset) {
4976                 tmp = RREG32(SRBM_SOFT_RESET);
4977                 tmp |= srbm_soft_reset;
4978                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4979                 WREG32(SRBM_SOFT_RESET, tmp);
4980                 tmp = RREG32(SRBM_SOFT_RESET);
4981
4982                 udelay(50);
4983
4984                 tmp &= ~srbm_soft_reset;
4985                 WREG32(SRBM_SOFT_RESET, tmp);
4986                 tmp = RREG32(SRBM_SOFT_RESET);
4987         }
4988
4989         /* Wait a little for things to settle down */
4990         udelay(50);
4991
4992         evergreen_mc_resume(rdev, &save);
4993         udelay(50);
4994
4995         cik_print_gpu_status_regs(rdev);
4996 }
4997
4998 struct kv_reset_save_regs {
4999         u32 gmcon_reng_execute;
5000         u32 gmcon_misc;
5001         u32 gmcon_misc3;
5002 };
5003
5004 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5005                                    struct kv_reset_save_regs *save)
5006 {
5007         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5008         save->gmcon_misc = RREG32(GMCON_MISC);
5009         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5010
5011         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5012         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5013                                                 STCTRL_STUTTER_EN));
5014 }
5015
5016 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5017                                       struct kv_reset_save_regs *save)
5018 {
5019         int i;
5020
5021         WREG32(GMCON_PGFSM_WRITE, 0);
5022         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5023
5024         for (i = 0; i < 5; i++)
5025                 WREG32(GMCON_PGFSM_WRITE, 0);
5026
5027         WREG32(GMCON_PGFSM_WRITE, 0);
5028         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5029
5030         for (i = 0; i < 5; i++)
5031                 WREG32(GMCON_PGFSM_WRITE, 0);
5032
5033         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5034         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5035
5036         for (i = 0; i < 5; i++)
5037                 WREG32(GMCON_PGFSM_WRITE, 0);
5038
5039         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5040         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5041
5042         for (i = 0; i < 5; i++)
5043                 WREG32(GMCON_PGFSM_WRITE, 0);
5044
5045         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5046         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5047
5048         for (i = 0; i < 5; i++)
5049                 WREG32(GMCON_PGFSM_WRITE, 0);
5050
5051         WREG32(GMCON_PGFSM_WRITE, 0);
5052         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5053
5054         for (i = 0; i < 5; i++)
5055                 WREG32(GMCON_PGFSM_WRITE, 0);
5056
5057         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5058         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5059
5060         for (i = 0; i < 5; i++)
5061                 WREG32(GMCON_PGFSM_WRITE, 0);
5062
5063         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5064         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5065
5066         for (i = 0; i < 5; i++)
5067                 WREG32(GMCON_PGFSM_WRITE, 0);
5068
5069         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5070         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5071
5072         for (i = 0; i < 5; i++)
5073                 WREG32(GMCON_PGFSM_WRITE, 0);
5074
5075         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5076         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5077
5078         for (i = 0; i < 5; i++)
5079                 WREG32(GMCON_PGFSM_WRITE, 0);
5080
5081         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5082         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5083
5084         WREG32(GMCON_MISC3, save->gmcon_misc3);
5085         WREG32(GMCON_MISC, save->gmcon_misc);
5086         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5087 }
5088
5089 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5090 {
5091         struct evergreen_mc_save save;
5092         struct kv_reset_save_regs kv_save = { 0 };
5093         u32 tmp, i;
5094
5095         dev_info(rdev->dev, "GPU pci config reset\n");
5096
5097         /* disable dpm? */
5098
5099         /* disable cg/pg */
5100         cik_fini_pg(rdev);
5101         cik_fini_cg(rdev);
5102
5103         /* Disable GFX parsing/prefetching */
5104         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5105
5106         /* Disable MEC parsing/prefetching */
5107         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5108
5109         /* sdma0 */
5110         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5111         tmp |= SDMA_HALT;
5112         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5113         /* sdma1 */
5114         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5115         tmp |= SDMA_HALT;
5116         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5117         /* XXX other engines? */
5118
5119         /* halt the rlc, disable cp internal ints */
5120         cik_rlc_stop(rdev);
5121
5122         udelay(50);
5123
5124         /* disable mem access */
5125         evergreen_mc_stop(rdev, &save);
5126         if (evergreen_mc_wait_for_idle(rdev)) {
5127                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5128         }
5129
5130         if (rdev->flags & RADEON_IS_IGP)
5131                 kv_save_regs_for_reset(rdev, &kv_save);
5132
5133         /* disable BM */
5134         pci_clear_master(rdev->pdev);
5135         /* reset */
5136         radeon_pci_config_reset(rdev);
5137
5138         udelay(100);
5139
5140         /* wait for asic to come out of reset */
5141         for (i = 0; i < rdev->usec_timeout; i++) {
5142                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5143                         break;
5144                 udelay(1);
5145         }
5146
5147         /* does asic init need to be run first??? */
5148         if (rdev->flags & RADEON_IS_IGP)
5149                 kv_restore_regs_for_reset(rdev, &kv_save);
5150 }
5151
5152 /**
5153  * cik_asic_reset - soft reset GPU
5154  *
5155  * @rdev: radeon_device pointer
5156  * @hard: force hard reset
5157  *
5158  * Look up which blocks are hung and attempt
5159  * to reset them.
5160  * Returns 0 for success.
5161  */
5162 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5163 {
5164         u32 reset_mask;
5165
5166         if (hard) {
5167                 cik_gpu_pci_config_reset(rdev);
5168                 return 0;
5169         }
5170
5171         reset_mask = cik_gpu_check_soft_reset(rdev);
5172
5173         if (reset_mask)
5174                 r600_set_bios_scratch_engine_hung(rdev, true);
5175
5176         /* try soft reset */
5177         cik_gpu_soft_reset(rdev, reset_mask);
5178
5179         reset_mask = cik_gpu_check_soft_reset(rdev);
5180
5181         /* try pci config reset */
5182         if (reset_mask && radeon_hard_reset)
5183                 cik_gpu_pci_config_reset(rdev);
5184
5185         reset_mask = cik_gpu_check_soft_reset(rdev);
5186
5187         if (!reset_mask)
5188                 r600_set_bios_scratch_engine_hung(rdev, false);
5189
5190         return 0;
5191 }
5192
5193 /**
5194  * cik_gfx_is_lockup - check if the 3D engine is locked up
5195  *
5196  * @rdev: radeon_device pointer
5197  * @ring: radeon_ring structure holding ring information
5198  *
5199  * Check if the 3D engine is locked up (CIK).
5200  * Returns true if the engine is locked, false if not.
5201  */
5202 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5203 {
5204         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5205
5206         if (!(reset_mask & (RADEON_RESET_GFX |
5207                             RADEON_RESET_COMPUTE |
5208                             RADEON_RESET_CP))) {
5209                 radeon_ring_lockup_update(rdev, ring);
5210                 return false;
5211         }
5212         return radeon_ring_test_lockup(rdev, ring);
5213 }
5214
5215 /* MC */
5216 /**
5217  * cik_mc_program - program the GPU memory controller
5218  *
5219  * @rdev: radeon_device pointer
5220  *
5221  * Set the location of vram, gart, and AGP in the GPU's
5222  * physical address space (CIK).
5223  */
5224 static void cik_mc_program(struct radeon_device *rdev)
5225 {
5226         struct evergreen_mc_save save;
5227         u32 tmp;
5228         int i, j;
5229
5230         /* Initialize HDP */
5231         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5232                 WREG32((0x2c14 + j), 0x00000000);
5233                 WREG32((0x2c18 + j), 0x00000000);
5234                 WREG32((0x2c1c + j), 0x00000000);
5235                 WREG32((0x2c20 + j), 0x00000000);
5236                 WREG32((0x2c24 + j), 0x00000000);
5237         }
5238         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5239
5240         evergreen_mc_stop(rdev, &save);
5241         if (radeon_mc_wait_for_idle(rdev)) {
5242                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5243         }
5244         /* Lockout access through VGA aperture*/
5245         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5246         /* Update configuration */
5247         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5248                rdev->mc.vram_start >> 12);
5249         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5250                rdev->mc.vram_end >> 12);
5251         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5252                rdev->vram_scratch.gpu_addr >> 12);
5253         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5254         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5255         WREG32(MC_VM_FB_LOCATION, tmp);
5256         /* XXX double check these! */
5257         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5258         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5259         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5260         WREG32(MC_VM_AGP_BASE, 0);
5261         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5262         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5263         if (radeon_mc_wait_for_idle(rdev)) {
5264                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5265         }
5266         evergreen_mc_resume(rdev, &save);
5267         /* we need to own VRAM, so turn off the VGA renderer here
5268          * to stop it overwriting our objects */
5269         rv515_vga_render_disable(rdev);
5270 }
5271
5272 /**
5273  * cik_mc_init - initialize the memory controller driver params
5274  *
5275  * @rdev: radeon_device pointer
5276  *
5277  * Look up the amount of vram, vram width, and decide how to place
5278  * vram and gart within the GPU's physical address space (CIK).
5279  * Returns 0 for success.
5280  */
5281 static int cik_mc_init(struct radeon_device *rdev)
5282 {
5283         u32 tmp;
5284         int chansize, numchan;
5285
5286         /* Get VRAM informations */
5287         rdev->mc.vram_is_ddr = true;
5288         tmp = RREG32(MC_ARB_RAMCFG);
5289         if (tmp & CHANSIZE_MASK) {
5290                 chansize = 64;
5291         } else {
5292                 chansize = 32;
5293         }
5294         tmp = RREG32(MC_SHARED_CHMAP);
5295         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5296         case 0:
5297         default:
5298                 numchan = 1;
5299                 break;
5300         case 1:
5301                 numchan = 2;
5302                 break;
5303         case 2:
5304                 numchan = 4;
5305                 break;
5306         case 3:
5307                 numchan = 8;
5308                 break;
5309         case 4:
5310                 numchan = 3;
5311                 break;
5312         case 5:
5313                 numchan = 6;
5314                 break;
5315         case 6:
5316                 numchan = 10;
5317                 break;
5318         case 7:
5319                 numchan = 12;
5320                 break;
5321         case 8:
5322                 numchan = 16;
5323                 break;
5324         }
5325         rdev->mc.vram_width = numchan * chansize;
5326         /* Could aper size report 0 ? */
5327         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5328         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5329         /* size in MB on si */
5330         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5331         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5332         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5333         si_vram_gtt_location(rdev, &rdev->mc);
5334         radeon_update_bandwidth_info(rdev);
5335
5336         return 0;
5337 }
5338
5339 /*
5340  * GART
5341  * VMID 0 is the physical GPU addresses as used by the kernel.
5342  * VMIDs 1-15 are used for userspace clients and are handled
5343  * by the radeon vm/hsa code.
5344  */
5345 /**
5346  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5347  *
5348  * @rdev: radeon_device pointer
5349  *
5350  * Flush the TLB for the VMID 0 page table (CIK).
5351  */
5352 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5353 {
5354         /* flush hdp cache */
5355         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5356
5357         /* bits 0-15 are the VM contexts0-15 */
5358         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5359 }
5360
5361 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5362 {
5363         int i;
5364         uint32_t sh_mem_bases, sh_mem_config;
5365
5366         sh_mem_bases = 0x6000 | 0x6000 << 16;
5367         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5368         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5369
5370         mutex_lock(&rdev->srbm_mutex);
5371         for (i = 8; i < 16; i++) {
5372                 cik_srbm_select(rdev, 0, 0, 0, i);
5373                 /* CP and shaders */
5374                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5375                 WREG32(SH_MEM_APE1_BASE, 1);
5376                 WREG32(SH_MEM_APE1_LIMIT, 0);
5377                 WREG32(SH_MEM_BASES, sh_mem_bases);
5378         }
5379         cik_srbm_select(rdev, 0, 0, 0, 0);
5380         mutex_unlock(&rdev->srbm_mutex);
5381 }
5382
5383 /**
5384  * cik_pcie_gart_enable - gart enable
5385  *
5386  * @rdev: radeon_device pointer
5387  *
5388  * This sets up the TLBs, programs the page tables for VMID0,
5389  * sets up the hw for VMIDs 1-15 which are allocated on
5390  * demand, and sets up the global locations for the LDS, GDS,
5391  * and GPUVM for FSA64 clients (CIK).
5392  * Returns 0 for success, errors for failure.
5393  */
5394 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5395 {
5396         int r, i;
5397
5398         if (rdev->gart.robj == NULL) {
5399                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5400                 return -EINVAL;
5401         }
5402         r = radeon_gart_table_vram_pin(rdev);
5403         if (r)
5404                 return r;
5405         /* Setup TLB control */
5406         WREG32(MC_VM_MX_L1_TLB_CNTL,
5407                (0xA << 7) |
5408                ENABLE_L1_TLB |
5409                ENABLE_L1_FRAGMENT_PROCESSING |
5410                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411                ENABLE_ADVANCED_DRIVER_MODEL |
5412                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5413         /* Setup L2 cache */
5414         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5415                ENABLE_L2_FRAGMENT_PROCESSING |
5416                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5417                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5418                EFFECTIVE_L2_QUEUE_SIZE(7) |
5419                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5420         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5421         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5422                BANK_SELECT(4) |
5423                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5424         /* setup context0 */
5425         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5426         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5427         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5428         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5429                         (u32)(rdev->dummy_page.addr >> 12));
5430         WREG32(VM_CONTEXT0_CNTL2, 0);
5431         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5432                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5433
5434         WREG32(0x15D4, 0);
5435         WREG32(0x15D8, 0);
5436         WREG32(0x15DC, 0);
5437
5438         /* restore context1-15 */
5439         /* set vm size, must be a multiple of 4 */
5440         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5441         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5442         for (i = 1; i < 16; i++) {
5443                 if (i < 8)
5444                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5445                                rdev->vm_manager.saved_table_addr[i]);
5446                 else
5447                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5448                                rdev->vm_manager.saved_table_addr[i]);
5449         }
5450
5451         /* enable context1-15 */
5452         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5453                (u32)(rdev->dummy_page.addr >> 12));
5454         WREG32(VM_CONTEXT1_CNTL2, 4);
5455         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5456                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5457                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5458                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5459                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5460                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5461                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5462                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5463                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5464                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5465                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5466                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5467                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5468                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5469
5470         if (rdev->family == CHIP_KAVERI) {
5471                 u32 tmp = RREG32(CHUB_CONTROL);
5472                 tmp &= ~BYPASS_VM;
5473                 WREG32(CHUB_CONTROL, tmp);
5474         }
5475
5476         /* XXX SH_MEM regs */
5477         /* where to put LDS, scratch, GPUVM in FSA64 space */
5478         mutex_lock(&rdev->srbm_mutex);
5479         for (i = 0; i < 16; i++) {
5480                 cik_srbm_select(rdev, 0, 0, 0, i);
5481                 /* CP and shaders */
5482                 WREG32(SH_MEM_CONFIG, 0);
5483                 WREG32(SH_MEM_APE1_BASE, 1);
5484                 WREG32(SH_MEM_APE1_LIMIT, 0);
5485                 WREG32(SH_MEM_BASES, 0);
5486                 /* SDMA GFX */
5487                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5488                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5489                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5490                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5491                 /* XXX SDMA RLC - todo */
5492         }
5493         cik_srbm_select(rdev, 0, 0, 0, 0);
5494         mutex_unlock(&rdev->srbm_mutex);
5495
5496         cik_pcie_init_compute_vmid(rdev);
5497
5498         cik_pcie_gart_tlb_flush(rdev);
5499         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5500                  (unsigned)(rdev->mc.gtt_size >> 20),
5501                  (unsigned long long)rdev->gart.table_addr);
5502         rdev->gart.ready = true;
5503         return 0;
5504 }
5505
5506 /**
5507  * cik_pcie_gart_disable - gart disable
5508  *
5509  * @rdev: radeon_device pointer
5510  *
5511  * This disables all VM page table (CIK).
5512  */
5513 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5514 {
5515         unsigned i;
5516
5517         for (i = 1; i < 16; ++i) {
5518                 uint32_t reg;
5519                 if (i < 8)
5520                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5521                 else
5522                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5523                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5524         }
5525
5526         /* Disable all tables */
5527         WREG32(VM_CONTEXT0_CNTL, 0);
5528         WREG32(VM_CONTEXT1_CNTL, 0);
5529         /* Setup TLB control */
5530         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5531                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5532         /* Setup L2 cache */
5533         WREG32(VM_L2_CNTL,
5534                ENABLE_L2_FRAGMENT_PROCESSING |
5535                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5536                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5537                EFFECTIVE_L2_QUEUE_SIZE(7) |
5538                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5539         WREG32(VM_L2_CNTL2, 0);
5540         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5541                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5542         radeon_gart_table_vram_unpin(rdev);
5543 }
5544
5545 /**
5546  * cik_pcie_gart_fini - vm fini callback
5547  *
5548  * @rdev: radeon_device pointer
5549  *
5550  * Tears down the driver GART/VM setup (CIK).
5551  */
5552 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5553 {
5554         cik_pcie_gart_disable(rdev);
5555         radeon_gart_table_vram_free(rdev);
5556         radeon_gart_fini(rdev);
5557 }
5558
5559 /* vm parser */
5560 /**
5561  * cik_ib_parse - vm ib_parse callback
5562  *
5563  * @rdev: radeon_device pointer
5564  * @ib: indirect buffer pointer
5565  *
5566  * CIK uses hw IB checking so this is a nop (CIK).
5567  */
5568 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5569 {
5570         return 0;
5571 }
5572
5573 /*
5574  * vm
5575  * VMID 0 is the physical GPU addresses as used by the kernel.
5576  * VMIDs 1-15 are used for userspace clients and are handled
5577  * by the radeon vm/hsa code.
5578  */
5579 /**
5580  * cik_vm_init - cik vm init callback
5581  *
5582  * @rdev: radeon_device pointer
5583  *
5584  * Inits cik specific vm parameters (number of VMs, base of vram for
5585  * VMIDs 1-15) (CIK).
5586  * Returns 0 for success.
5587  */
5588 int cik_vm_init(struct radeon_device *rdev)
5589 {
5590         /*
5591          * number of VMs
5592          * VMID 0 is reserved for System
5593          * radeon graphics/compute will use VMIDs 1-7
5594          * amdkfd will use VMIDs 8-15
5595          */
5596         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5597         /* base offset of vram pages */
5598         if (rdev->flags & RADEON_IS_IGP) {
5599                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5600                 tmp <<= 22;
5601                 rdev->vm_manager.vram_base_offset = tmp;
5602         } else
5603                 rdev->vm_manager.vram_base_offset = 0;
5604
5605         return 0;
5606 }
5607
5608 /**
5609  * cik_vm_fini - cik vm fini callback
5610  *
5611  * @rdev: radeon_device pointer
5612  *
5613  * Tear down any asic specific VM setup (CIK).
5614  */
5615 void cik_vm_fini(struct radeon_device *rdev)
5616 {
5617 }
5618
5619 /**
5620  * cik_vm_decode_fault - print human readable fault info
5621  *
5622  * @rdev: radeon_device pointer
5623  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5624  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5625  *
5626  * Print human readable fault information (CIK).
5627  */
5628 static void cik_vm_decode_fault(struct radeon_device *rdev,
5629                                 u32 status, u32 addr, u32 mc_client)
5630 {
5631         u32 mc_id;
5632         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5633         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5634         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5635                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5636
5637         if (rdev->family == CHIP_HAWAII)
5638                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5639         else
5640                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5641
5642         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5643                protections, vmid, addr,
5644                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5645                block, mc_client, mc_id);
5646 }
5647
5648 /**
5649  * cik_vm_flush - cik vm flush using the CP
5650  *
5651  * @rdev: radeon_device pointer
5652  *
5653  * Update the page table base and flush the VM TLB
5654  * using the CP (CIK).
5655  */
5656 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5657                   unsigned vm_id, uint64_t pd_addr)
5658 {
5659         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5660
5661         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5663                                  WRITE_DATA_DST_SEL(0)));
5664         if (vm_id < 8) {
5665                 radeon_ring_write(ring,
5666                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5667         } else {
5668                 radeon_ring_write(ring,
5669                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5670         }
5671         radeon_ring_write(ring, 0);
5672         radeon_ring_write(ring, pd_addr >> 12);
5673
5674         /* update SH_MEM_* regs */
5675         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5677                                  WRITE_DATA_DST_SEL(0)));
5678         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5679         radeon_ring_write(ring, 0);
5680         radeon_ring_write(ring, VMID(vm_id));
5681
5682         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5683         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684                                  WRITE_DATA_DST_SEL(0)));
5685         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5686         radeon_ring_write(ring, 0);
5687
5688         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5689         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5690         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5691         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5692
5693         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695                                  WRITE_DATA_DST_SEL(0)));
5696         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5697         radeon_ring_write(ring, 0);
5698         radeon_ring_write(ring, VMID(0));
5699
5700         /* HDP flush */
5701         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5702
5703         /* bits 0-15 are the VM contexts0-15 */
5704         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5705         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5706                                  WRITE_DATA_DST_SEL(0)));
5707         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5708         radeon_ring_write(ring, 0);
5709         radeon_ring_write(ring, 1 << vm_id);
5710
5711         /* wait for the invalidate to complete */
5712         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5713         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5714                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5715                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5716         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5717         radeon_ring_write(ring, 0);
5718         radeon_ring_write(ring, 0); /* ref */
5719         radeon_ring_write(ring, 0); /* mask */
5720         radeon_ring_write(ring, 0x20); /* poll interval */
5721
5722         /* compute doesn't have PFP */
5723         if (usepfp) {
5724                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5725                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5726                 radeon_ring_write(ring, 0x0);
5727         }
5728 }
5729
5730 /*
5731  * RLC
5732  * The RLC is a multi-purpose microengine that handles a
5733  * variety of functions, the most important of which is
5734  * the interrupt controller.
5735  */
5736 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5737                                           bool enable)
5738 {
5739         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5740
5741         if (enable)
5742                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5743         else
5744                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5745         WREG32(CP_INT_CNTL_RING0, tmp);
5746 }
5747
5748 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5749 {
5750         u32 tmp;
5751
5752         tmp = RREG32(RLC_LB_CNTL);
5753         if (enable)
5754                 tmp |= LOAD_BALANCE_ENABLE;
5755         else
5756                 tmp &= ~LOAD_BALANCE_ENABLE;
5757         WREG32(RLC_LB_CNTL, tmp);
5758 }
5759
5760 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5761 {
5762         u32 i, j, k;
5763         u32 mask;
5764
5765         mutex_lock(&rdev->grbm_idx_mutex);
5766         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5767                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5768                         cik_select_se_sh(rdev, i, j);
5769                         for (k = 0; k < rdev->usec_timeout; k++) {
5770                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5771                                         break;
5772                                 udelay(1);
5773                         }
5774                 }
5775         }
5776         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5777         mutex_unlock(&rdev->grbm_idx_mutex);
5778
5779         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5780         for (k = 0; k < rdev->usec_timeout; k++) {
5781                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5782                         break;
5783                 udelay(1);
5784         }
5785 }
5786
5787 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5788 {
5789         u32 tmp;
5790
5791         tmp = RREG32(RLC_CNTL);
5792         if (tmp != rlc)
5793                 WREG32(RLC_CNTL, rlc);
5794 }
5795
5796 static u32 cik_halt_rlc(struct radeon_device *rdev)
5797 {
5798         u32 data, orig;
5799
5800         orig = data = RREG32(RLC_CNTL);
5801
5802         if (data & RLC_ENABLE) {
5803                 u32 i;
5804
5805                 data &= ~RLC_ENABLE;
5806                 WREG32(RLC_CNTL, data);
5807
5808                 for (i = 0; i < rdev->usec_timeout; i++) {
5809                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5810                                 break;
5811                         udelay(1);
5812                 }
5813
5814                 cik_wait_for_rlc_serdes(rdev);
5815         }
5816
5817         return orig;
5818 }
5819
5820 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5821 {
5822         u32 tmp, i, mask;
5823
5824         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5825         WREG32(RLC_GPR_REG2, tmp);
5826
5827         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5828         for (i = 0; i < rdev->usec_timeout; i++) {
5829                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5830                         break;
5831                 udelay(1);
5832         }
5833
5834         for (i = 0; i < rdev->usec_timeout; i++) {
5835                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5836                         break;
5837                 udelay(1);
5838         }
5839 }
5840
5841 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5842 {
5843         u32 tmp;
5844
5845         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5846         WREG32(RLC_GPR_REG2, tmp);
5847 }
5848
5849 /**
5850  * cik_rlc_stop - stop the RLC ME
5851  *
5852  * @rdev: radeon_device pointer
5853  *
5854  * Halt the RLC ME (MicroEngine) (CIK).
5855  */
5856 static void cik_rlc_stop(struct radeon_device *rdev)
5857 {
5858         WREG32(RLC_CNTL, 0);
5859
5860         cik_enable_gui_idle_interrupt(rdev, false);
5861
5862         cik_wait_for_rlc_serdes(rdev);
5863 }
5864
5865 /**
5866  * cik_rlc_start - start the RLC ME
5867  *
5868  * @rdev: radeon_device pointer
5869  *
5870  * Unhalt the RLC ME (MicroEngine) (CIK).
5871  */
5872 static void cik_rlc_start(struct radeon_device *rdev)
5873 {
5874         WREG32(RLC_CNTL, RLC_ENABLE);
5875
5876         cik_enable_gui_idle_interrupt(rdev, true);
5877
5878         udelay(50);
5879 }
5880
5881 /**
5882  * cik_rlc_resume - setup the RLC hw
5883  *
5884  * @rdev: radeon_device pointer
5885  *
5886  * Initialize the RLC registers, load the ucode,
5887  * and start the RLC (CIK).
5888  * Returns 0 for success, -EINVAL if the ucode is not available.
5889  */
5890 static int cik_rlc_resume(struct radeon_device *rdev)
5891 {
5892         u32 i, size, tmp;
5893
5894         if (!rdev->rlc_fw)
5895                 return -EINVAL;
5896
5897         cik_rlc_stop(rdev);
5898
5899         /* disable CG */
5900         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5901         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5902
5903         si_rlc_reset(rdev);
5904
5905         cik_init_pg(rdev);
5906
5907         cik_init_cg(rdev);
5908
5909         WREG32(RLC_LB_CNTR_INIT, 0);
5910         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5911
5912         mutex_lock(&rdev->grbm_idx_mutex);
5913         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5914         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5915         WREG32(RLC_LB_PARAMS, 0x00600408);
5916         WREG32(RLC_LB_CNTL, 0x80000004);
5917         mutex_unlock(&rdev->grbm_idx_mutex);
5918
5919         WREG32(RLC_MC_CNTL, 0);
5920         WREG32(RLC_UCODE_CNTL, 0);
5921
5922         if (rdev->new_fw) {
5923                 const struct rlc_firmware_header_v1_0 *hdr =
5924                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5925                 const __le32 *fw_data = (const __le32 *)
5926                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5927
5928                 radeon_ucode_print_rlc_hdr(&hdr->header);
5929
5930                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5931                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5932                 for (i = 0; i < size; i++)
5933                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5934                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5935         } else {
5936                 const __be32 *fw_data;
5937
5938                 switch (rdev->family) {
5939                 case CHIP_BONAIRE:
5940                 case CHIP_HAWAII:
5941                 default:
5942                         size = BONAIRE_RLC_UCODE_SIZE;
5943                         break;
5944                 case CHIP_KAVERI:
5945                         size = KV_RLC_UCODE_SIZE;
5946                         break;
5947                 case CHIP_KABINI:
5948                         size = KB_RLC_UCODE_SIZE;
5949                         break;
5950                 case CHIP_MULLINS:
5951                         size = ML_RLC_UCODE_SIZE;
5952                         break;
5953                 }
5954
5955                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5956                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5957                 for (i = 0; i < size; i++)
5958                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5959                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5960         }
5961
5962         /* XXX - find out what chips support lbpw */
5963         cik_enable_lbpw(rdev, false);
5964
5965         if (rdev->family == CHIP_BONAIRE)
5966                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5967
5968         cik_rlc_start(rdev);
5969
5970         return 0;
5971 }
5972
5973 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5974 {
5975         u32 data, orig, tmp, tmp2;
5976
5977         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5978
5979         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5980                 cik_enable_gui_idle_interrupt(rdev, true);
5981
5982                 tmp = cik_halt_rlc(rdev);
5983
5984                 mutex_lock(&rdev->grbm_idx_mutex);
5985                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5986                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5987                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5988                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5989                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5990                 mutex_unlock(&rdev->grbm_idx_mutex);
5991
5992                 cik_update_rlc(rdev, tmp);
5993
5994                 data |= CGCG_EN | CGLS_EN;
5995         } else {
5996                 cik_enable_gui_idle_interrupt(rdev, false);
5997
5998                 RREG32(CB_CGTT_SCLK_CTRL);
5999                 RREG32(CB_CGTT_SCLK_CTRL);
6000                 RREG32(CB_CGTT_SCLK_CTRL);
6001                 RREG32(CB_CGTT_SCLK_CTRL);
6002
6003                 data &= ~(CGCG_EN | CGLS_EN);
6004         }
6005
6006         if (orig != data)
6007                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6008
6009 }
6010
6011 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6012 {
6013         u32 data, orig, tmp = 0;
6014
6015         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6016                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6017                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6018                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6019                                 data |= CP_MEM_LS_EN;
6020                                 if (orig != data)
6021                                         WREG32(CP_MEM_SLP_CNTL, data);
6022                         }
6023                 }
6024
6025                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6026                 data |= 0x00000001;
6027                 data &= 0xfffffffd;
6028                 if (orig != data)
6029                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6030
6031                 tmp = cik_halt_rlc(rdev);
6032
6033                 mutex_lock(&rdev->grbm_idx_mutex);
6034                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6035                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6036                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6037                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6038                 WREG32(RLC_SERDES_WR_CTRL, data);
6039                 mutex_unlock(&rdev->grbm_idx_mutex);
6040
6041                 cik_update_rlc(rdev, tmp);
6042
6043                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6044                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6045                         data &= ~SM_MODE_MASK;
6046                         data |= SM_MODE(0x2);
6047                         data |= SM_MODE_ENABLE;
6048                         data &= ~CGTS_OVERRIDE;
6049                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6050                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6051                                 data &= ~CGTS_LS_OVERRIDE;
6052                         data &= ~ON_MONITOR_ADD_MASK;
6053                         data |= ON_MONITOR_ADD_EN;
6054                         data |= ON_MONITOR_ADD(0x96);
6055                         if (orig != data)
6056                                 WREG32(CGTS_SM_CTRL_REG, data);
6057                 }
6058         } else {
6059                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6060                 data |= 0x00000003;
6061                 if (orig != data)
6062                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6063
6064                 data = RREG32(RLC_MEM_SLP_CNTL);
6065                 if (data & RLC_MEM_LS_EN) {
6066                         data &= ~RLC_MEM_LS_EN;
6067                         WREG32(RLC_MEM_SLP_CNTL, data);
6068                 }
6069
6070                 data = RREG32(CP_MEM_SLP_CNTL);
6071                 if (data & CP_MEM_LS_EN) {
6072                         data &= ~CP_MEM_LS_EN;
6073                         WREG32(CP_MEM_SLP_CNTL, data);
6074                 }
6075
6076                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6077                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6078                 if (orig != data)
6079                         WREG32(CGTS_SM_CTRL_REG, data);
6080
6081                 tmp = cik_halt_rlc(rdev);
6082
6083                 mutex_lock(&rdev->grbm_idx_mutex);
6084                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6085                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6086                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6087                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6088                 WREG32(RLC_SERDES_WR_CTRL, data);
6089                 mutex_unlock(&rdev->grbm_idx_mutex);
6090
6091                 cik_update_rlc(rdev, tmp);
6092         }
6093 }
6094
6095 static const u32 mc_cg_registers[] =
6096 {
6097         MC_HUB_MISC_HUB_CG,
6098         MC_HUB_MISC_SIP_CG,
6099         MC_HUB_MISC_VM_CG,
6100         MC_XPB_CLK_GAT,
6101         ATC_MISC_CG,
6102         MC_CITF_MISC_WR_CG,
6103         MC_CITF_MISC_RD_CG,
6104         MC_CITF_MISC_VM_CG,
6105         VM_L2_CG,
6106 };
6107
6108 static void cik_enable_mc_ls(struct radeon_device *rdev,
6109                              bool enable)
6110 {
6111         int i;
6112         u32 orig, data;
6113
6114         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6115                 orig = data = RREG32(mc_cg_registers[i]);
6116                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6117                         data |= MC_LS_ENABLE;
6118                 else
6119                         data &= ~MC_LS_ENABLE;
6120                 if (data != orig)
6121                         WREG32(mc_cg_registers[i], data);
6122         }
6123 }
6124
6125 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6126                                bool enable)
6127 {
6128         int i;
6129         u32 orig, data;
6130
6131         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6132                 orig = data = RREG32(mc_cg_registers[i]);
6133                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6134                         data |= MC_CG_ENABLE;
6135                 else
6136                         data &= ~MC_CG_ENABLE;
6137                 if (data != orig)
6138                         WREG32(mc_cg_registers[i], data);
6139         }
6140 }
6141
6142 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6143                                  bool enable)
6144 {
6145         u32 orig, data;
6146
6147         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6148                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6149                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6150         } else {
6151                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6152                 data |= 0xff000000;
6153                 if (data != orig)
6154                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6155
6156                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6157                 data |= 0xff000000;
6158                 if (data != orig)
6159                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6160         }
6161 }
6162
6163 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6164                                  bool enable)
6165 {
6166         u32 orig, data;
6167
6168         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6169                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6170                 data |= 0x100;
6171                 if (orig != data)
6172                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6173
6174                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6175                 data |= 0x100;
6176                 if (orig != data)
6177                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6178         } else {
6179                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6180                 data &= ~0x100;
6181                 if (orig != data)
6182                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6183
6184                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6185                 data &= ~0x100;
6186                 if (orig != data)
6187                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6188         }
6189 }
6190
6191 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6192                                 bool enable)
6193 {
6194         u32 orig, data;
6195
6196         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6197                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6198                 data = 0xfff;
6199                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6200
6201                 orig = data = RREG32(UVD_CGC_CTRL);
6202                 data |= DCM;
6203                 if (orig != data)
6204                         WREG32(UVD_CGC_CTRL, data);
6205         } else {
6206                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6207                 data &= ~0xfff;
6208                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6209
6210                 orig = data = RREG32(UVD_CGC_CTRL);
6211                 data &= ~DCM;
6212                 if (orig != data)
6213                         WREG32(UVD_CGC_CTRL, data);
6214         }
6215 }
6216
6217 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6218                                bool enable)
6219 {
6220         u32 orig, data;
6221
6222         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6223
6224         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6225                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6226                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6227         else
6228                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6229                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6230
6231         if (orig != data)
6232                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6233 }
6234
6235 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6236                                 bool enable)
6237 {
6238         u32 orig, data;
6239
6240         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6241
6242         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6243                 data &= ~CLOCK_GATING_DIS;
6244         else
6245                 data |= CLOCK_GATING_DIS;
6246
6247         if (orig != data)
6248                 WREG32(HDP_HOST_PATH_CNTL, data);
6249 }
6250
6251 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6252                               bool enable)
6253 {
6254         u32 orig, data;
6255
6256         orig = data = RREG32(HDP_MEM_POWER_LS);
6257
6258         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6259                 data |= HDP_LS_ENABLE;
6260         else
6261                 data &= ~HDP_LS_ENABLE;
6262
6263         if (orig != data)
6264                 WREG32(HDP_MEM_POWER_LS, data);
6265 }
6266
6267 void cik_update_cg(struct radeon_device *rdev,
6268                    u32 block, bool enable)
6269 {
6270
6271         if (block & RADEON_CG_BLOCK_GFX) {
6272                 cik_enable_gui_idle_interrupt(rdev, false);
6273                 /* order matters! */
6274                 if (enable) {
6275                         cik_enable_mgcg(rdev, true);
6276                         cik_enable_cgcg(rdev, true);
6277                 } else {
6278                         cik_enable_cgcg(rdev, false);
6279                         cik_enable_mgcg(rdev, false);
6280                 }
6281                 cik_enable_gui_idle_interrupt(rdev, true);
6282         }
6283
6284         if (block & RADEON_CG_BLOCK_MC) {
6285                 if (!(rdev->flags & RADEON_IS_IGP)) {
6286                         cik_enable_mc_mgcg(rdev, enable);
6287                         cik_enable_mc_ls(rdev, enable);
6288                 }
6289         }
6290
6291         if (block & RADEON_CG_BLOCK_SDMA) {
6292                 cik_enable_sdma_mgcg(rdev, enable);
6293                 cik_enable_sdma_mgls(rdev, enable);
6294         }
6295
6296         if (block & RADEON_CG_BLOCK_BIF) {
6297                 cik_enable_bif_mgls(rdev, enable);
6298         }
6299
6300         if (block & RADEON_CG_BLOCK_UVD) {
6301                 if (rdev->has_uvd)
6302                         cik_enable_uvd_mgcg(rdev, enable);
6303         }
6304
6305         if (block & RADEON_CG_BLOCK_HDP) {
6306                 cik_enable_hdp_mgcg(rdev, enable);
6307                 cik_enable_hdp_ls(rdev, enable);
6308         }
6309
6310         if (block & RADEON_CG_BLOCK_VCE) {
6311                 vce_v2_0_enable_mgcg(rdev, enable);
6312         }
6313 }
6314
6315 static void cik_init_cg(struct radeon_device *rdev)
6316 {
6317
6318         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6319
6320         if (rdev->has_uvd)
6321                 si_init_uvd_internal_cg(rdev);
6322
6323         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6324                              RADEON_CG_BLOCK_SDMA |
6325                              RADEON_CG_BLOCK_BIF |
6326                              RADEON_CG_BLOCK_UVD |
6327                              RADEON_CG_BLOCK_HDP), true);
6328 }
6329
6330 static void cik_fini_cg(struct radeon_device *rdev)
6331 {
6332         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6333                              RADEON_CG_BLOCK_SDMA |
6334                              RADEON_CG_BLOCK_BIF |
6335                              RADEON_CG_BLOCK_UVD |
6336                              RADEON_CG_BLOCK_HDP), false);
6337
6338         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6339 }
6340
6341 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6342                                           bool enable)
6343 {
6344         u32 data, orig;
6345
6346         orig = data = RREG32(RLC_PG_CNTL);
6347         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6348                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6349         else
6350                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6351         if (orig != data)
6352                 WREG32(RLC_PG_CNTL, data);
6353 }
6354
6355 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6356                                           bool enable)
6357 {
6358         u32 data, orig;
6359
6360         orig = data = RREG32(RLC_PG_CNTL);
6361         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6362                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6363         else
6364                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6365         if (orig != data)
6366                 WREG32(RLC_PG_CNTL, data);
6367 }
6368
6369 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6370 {
6371         u32 data, orig;
6372
6373         orig = data = RREG32(RLC_PG_CNTL);
6374         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6375                 data &= ~DISABLE_CP_PG;
6376         else
6377                 data |= DISABLE_CP_PG;
6378         if (orig != data)
6379                 WREG32(RLC_PG_CNTL, data);
6380 }
6381
6382 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6383 {
6384         u32 data, orig;
6385
6386         orig = data = RREG32(RLC_PG_CNTL);
6387         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6388                 data &= ~DISABLE_GDS_PG;
6389         else
6390                 data |= DISABLE_GDS_PG;
6391         if (orig != data)
6392                 WREG32(RLC_PG_CNTL, data);
6393 }
6394
6395 #define CP_ME_TABLE_SIZE    96
6396 #define CP_ME_TABLE_OFFSET  2048
6397 #define CP_MEC_TABLE_OFFSET 4096
6398
6399 void cik_init_cp_pg_table(struct radeon_device *rdev)
6400 {
6401         volatile u32 *dst_ptr;
6402         int me, i, max_me = 4;
6403         u32 bo_offset = 0;
6404         u32 table_offset, table_size;
6405
6406         if (rdev->family == CHIP_KAVERI)
6407                 max_me = 5;
6408
6409         if (rdev->rlc.cp_table_ptr == NULL)
6410                 return;
6411
6412         /* write the cp table buffer */
6413         dst_ptr = rdev->rlc.cp_table_ptr;
6414         for (me = 0; me < max_me; me++) {
6415                 if (rdev->new_fw) {
6416                         const __le32 *fw_data;
6417                         const struct gfx_firmware_header_v1_0 *hdr;
6418
6419                         if (me == 0) {
6420                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6421                                 fw_data = (const __le32 *)
6422                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6423                                 table_offset = le32_to_cpu(hdr->jt_offset);
6424                                 table_size = le32_to_cpu(hdr->jt_size);
6425                         } else if (me == 1) {
6426                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6427                                 fw_data = (const __le32 *)
6428                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6429                                 table_offset = le32_to_cpu(hdr->jt_offset);
6430                                 table_size = le32_to_cpu(hdr->jt_size);
6431                         } else if (me == 2) {
6432                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6433                                 fw_data = (const __le32 *)
6434                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6435                                 table_offset = le32_to_cpu(hdr->jt_offset);
6436                                 table_size = le32_to_cpu(hdr->jt_size);
6437                         } else if (me == 3) {
6438                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6439                                 fw_data = (const __le32 *)
6440                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6441                                 table_offset = le32_to_cpu(hdr->jt_offset);
6442                                 table_size = le32_to_cpu(hdr->jt_size);
6443                         } else {
6444                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6445                                 fw_data = (const __le32 *)
6446                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6447                                 table_offset = le32_to_cpu(hdr->jt_offset);
6448                                 table_size = le32_to_cpu(hdr->jt_size);
6449                         }
6450
6451                         for (i = 0; i < table_size; i ++) {
6452                                 dst_ptr[bo_offset + i] =
6453                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6454                         }
6455                         bo_offset += table_size;
6456                 } else {
6457                         const __be32 *fw_data;
6458                         table_size = CP_ME_TABLE_SIZE;
6459
6460                         if (me == 0) {
6461                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6462                                 table_offset = CP_ME_TABLE_OFFSET;
6463                         } else if (me == 1) {
6464                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6465                                 table_offset = CP_ME_TABLE_OFFSET;
6466                         } else if (me == 2) {
6467                                 fw_data = (const __be32 *)rdev->me_fw->data;
6468                                 table_offset = CP_ME_TABLE_OFFSET;
6469                         } else {
6470                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6471                                 table_offset = CP_MEC_TABLE_OFFSET;
6472                         }
6473
6474                         for (i = 0; i < table_size; i ++) {
6475                                 dst_ptr[bo_offset + i] =
6476                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6477                         }
6478                         bo_offset += table_size;
6479                 }
6480         }
6481 }
6482
6483 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6484                                 bool enable)
6485 {
6486         u32 data, orig;
6487
6488         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6489                 orig = data = RREG32(RLC_PG_CNTL);
6490                 data |= GFX_PG_ENABLE;
6491                 if (orig != data)
6492                         WREG32(RLC_PG_CNTL, data);
6493
6494                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6495                 data |= AUTO_PG_EN;
6496                 if (orig != data)
6497                         WREG32(RLC_AUTO_PG_CTRL, data);
6498         } else {
6499                 orig = data = RREG32(RLC_PG_CNTL);
6500                 data &= ~GFX_PG_ENABLE;
6501                 if (orig != data)
6502                         WREG32(RLC_PG_CNTL, data);
6503
6504                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6505                 data &= ~AUTO_PG_EN;
6506                 if (orig != data)
6507                         WREG32(RLC_AUTO_PG_CTRL, data);
6508
6509                 data = RREG32(DB_RENDER_CONTROL);
6510         }
6511 }
6512
6513 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6514 {
6515         u32 mask = 0, tmp, tmp1;
6516         int i;
6517
6518         mutex_lock(&rdev->grbm_idx_mutex);
6519         cik_select_se_sh(rdev, se, sh);
6520         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6521         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6522         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6523         mutex_unlock(&rdev->grbm_idx_mutex);
6524
6525         tmp &= 0xffff0000;
6526
6527         tmp |= tmp1;
6528         tmp >>= 16;
6529
6530         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6531                 mask <<= 1;
6532                 mask |= 1;
6533         }
6534
6535         return (~tmp) & mask;
6536 }
6537
6538 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6539 {
6540         u32 i, j, k, active_cu_number = 0;
6541         u32 mask, counter, cu_bitmap;
6542         u32 tmp = 0;
6543
6544         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6545                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6546                         mask = 1;
6547                         cu_bitmap = 0;
6548                         counter = 0;
6549                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6550                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6551                                         if (counter < 2)
6552                                                 cu_bitmap |= mask;
6553                                         counter ++;
6554                                 }
6555                                 mask <<= 1;
6556                         }
6557
6558                         active_cu_number += counter;
6559                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6560                 }
6561         }
6562
6563         WREG32(RLC_PG_AO_CU_MASK, tmp);
6564
6565         tmp = RREG32(RLC_MAX_PG_CU);
6566         tmp &= ~MAX_PU_CU_MASK;
6567         tmp |= MAX_PU_CU(active_cu_number);
6568         WREG32(RLC_MAX_PG_CU, tmp);
6569 }
6570
6571 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6572                                        bool enable)
6573 {
6574         u32 data, orig;
6575
6576         orig = data = RREG32(RLC_PG_CNTL);
6577         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6578                 data |= STATIC_PER_CU_PG_ENABLE;
6579         else
6580                 data &= ~STATIC_PER_CU_PG_ENABLE;
6581         if (orig != data)
6582                 WREG32(RLC_PG_CNTL, data);
6583 }
6584
6585 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6586                                         bool enable)
6587 {
6588         u32 data, orig;
6589
6590         orig = data = RREG32(RLC_PG_CNTL);
6591         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6592                 data |= DYN_PER_CU_PG_ENABLE;
6593         else
6594                 data &= ~DYN_PER_CU_PG_ENABLE;
6595         if (orig != data)
6596                 WREG32(RLC_PG_CNTL, data);
6597 }
6598
6599 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6600 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6601
6602 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6603 {
6604         u32 data, orig;
6605         u32 i;
6606
6607         if (rdev->rlc.cs_data) {
6608                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6609                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6610                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6611                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6612         } else {
6613                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6614                 for (i = 0; i < 3; i++)
6615                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6616         }
6617         if (rdev->rlc.reg_list) {
6618                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6619                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6620                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6621         }
6622
6623         orig = data = RREG32(RLC_PG_CNTL);
6624         data |= GFX_PG_SRC;
6625         if (orig != data)
6626                 WREG32(RLC_PG_CNTL, data);
6627
6628         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6629         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6630
6631         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6632         data &= ~IDLE_POLL_COUNT_MASK;
6633         data |= IDLE_POLL_COUNT(0x60);
6634         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6635
6636         data = 0x10101010;
6637         WREG32(RLC_PG_DELAY, data);
6638
6639         data = RREG32(RLC_PG_DELAY_2);
6640         data &= ~0xff;
6641         data |= 0x3;
6642         WREG32(RLC_PG_DELAY_2, data);
6643
6644         data = RREG32(RLC_AUTO_PG_CTRL);
6645         data &= ~GRBM_REG_SGIT_MASK;
6646         data |= GRBM_REG_SGIT(0x700);
6647         WREG32(RLC_AUTO_PG_CTRL, data);
6648
6649 }
6650
6651 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6652 {
6653         cik_enable_gfx_cgpg(rdev, enable);
6654         cik_enable_gfx_static_mgpg(rdev, enable);
6655         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6656 }
6657
6658 u32 cik_get_csb_size(struct radeon_device *rdev)
6659 {
6660         u32 count = 0;
6661         const struct cs_section_def *sect = NULL;
6662         const struct cs_extent_def *ext = NULL;
6663
6664         if (rdev->rlc.cs_data == NULL)
6665                 return 0;
6666
6667         /* begin clear state */
6668         count += 2;
6669         /* context control state */
6670         count += 3;
6671
6672         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6673                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6674                         if (sect->id == SECT_CONTEXT)
6675                                 count += 2 + ext->reg_count;
6676                         else
6677                                 return 0;
6678                 }
6679         }
6680         /* pa_sc_raster_config/pa_sc_raster_config1 */
6681         count += 4;
6682         /* end clear state */
6683         count += 2;
6684         /* clear state */
6685         count += 2;
6686
6687         return count;
6688 }
6689
6690 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6691 {
6692         u32 count = 0, i;
6693         const struct cs_section_def *sect = NULL;
6694         const struct cs_extent_def *ext = NULL;
6695
6696         if (rdev->rlc.cs_data == NULL)
6697                 return;
6698         if (buffer == NULL)
6699                 return;
6700
6701         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6702         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6703
6704         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6705         buffer[count++] = cpu_to_le32(0x80000000);
6706         buffer[count++] = cpu_to_le32(0x80000000);
6707
6708         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6709                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6710                         if (sect->id == SECT_CONTEXT) {
6711                                 buffer[count++] =
6712                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6713                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6714                                 for (i = 0; i < ext->reg_count; i++)
6715                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6716                         } else {
6717                                 return;
6718                         }
6719                 }
6720         }
6721
6722         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6723         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6724         switch (rdev->family) {
6725         case CHIP_BONAIRE:
6726                 buffer[count++] = cpu_to_le32(0x16000012);
6727                 buffer[count++] = cpu_to_le32(0x00000000);
6728                 break;
6729         case CHIP_KAVERI:
6730                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6731                 buffer[count++] = cpu_to_le32(0x00000000);
6732                 break;
6733         case CHIP_KABINI:
6734         case CHIP_MULLINS:
6735                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6736                 buffer[count++] = cpu_to_le32(0x00000000);
6737                 break;
6738         case CHIP_HAWAII:
6739                 buffer[count++] = cpu_to_le32(0x3a00161a);
6740                 buffer[count++] = cpu_to_le32(0x0000002e);
6741                 break;
6742         default:
6743                 buffer[count++] = cpu_to_le32(0x00000000);
6744                 buffer[count++] = cpu_to_le32(0x00000000);
6745                 break;
6746         }
6747
6748         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6749         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6750
6751         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6752         buffer[count++] = cpu_to_le32(0);
6753 }
6754
6755 static void cik_init_pg(struct radeon_device *rdev)
6756 {
6757         if (rdev->pg_flags) {
6758                 cik_enable_sck_slowdown_on_pu(rdev, true);
6759                 cik_enable_sck_slowdown_on_pd(rdev, true);
6760                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6761                         cik_init_gfx_cgpg(rdev);
6762                         cik_enable_cp_pg(rdev, true);
6763                         cik_enable_gds_pg(rdev, true);
6764                 }
6765                 cik_init_ao_cu_mask(rdev);
6766                 cik_update_gfx_pg(rdev, true);
6767         }
6768 }
6769
6770 static void cik_fini_pg(struct radeon_device *rdev)
6771 {
6772         if (rdev->pg_flags) {
6773                 cik_update_gfx_pg(rdev, false);
6774                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6775                         cik_enable_cp_pg(rdev, false);
6776                         cik_enable_gds_pg(rdev, false);
6777                 }
6778         }
6779 }
6780
6781 /*
6782  * Interrupts
6783  * Starting with r6xx, interrupts are handled via a ring buffer.
6784  * Ring buffers are areas of GPU accessible memory that the GPU
6785  * writes interrupt vectors into and the host reads vectors out of.
6786  * There is a rptr (read pointer) that determines where the
6787  * host is currently reading, and a wptr (write pointer)
6788  * which determines where the GPU has written.  When the
6789  * pointers are equal, the ring is idle.  When the GPU
6790  * writes vectors to the ring buffer, it increments the
6791  * wptr.  When there is an interrupt, the host then starts
6792  * fetching commands and processing them until the pointers are
6793  * equal again at which point it updates the rptr.
6794  */
6795
6796 /**
6797  * cik_enable_interrupts - Enable the interrupt ring buffer
6798  *
6799  * @rdev: radeon_device pointer
6800  *
6801  * Enable the interrupt ring buffer (CIK).
6802  */
6803 static void cik_enable_interrupts(struct radeon_device *rdev)
6804 {
6805         u32 ih_cntl = RREG32(IH_CNTL);
6806         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6807
6808         ih_cntl |= ENABLE_INTR;
6809         ih_rb_cntl |= IH_RB_ENABLE;
6810         WREG32(IH_CNTL, ih_cntl);
6811         WREG32(IH_RB_CNTL, ih_rb_cntl);
6812         rdev->ih.enabled = true;
6813 }
6814
6815 /**
6816  * cik_disable_interrupts - Disable the interrupt ring buffer
6817  *
6818  * @rdev: radeon_device pointer
6819  *
6820  * Disable the interrupt ring buffer (CIK).
6821  */
6822 static void cik_disable_interrupts(struct radeon_device *rdev)
6823 {
6824         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6825         u32 ih_cntl = RREG32(IH_CNTL);
6826
6827         ih_rb_cntl &= ~IH_RB_ENABLE;
6828         ih_cntl &= ~ENABLE_INTR;
6829         WREG32(IH_RB_CNTL, ih_rb_cntl);
6830         WREG32(IH_CNTL, ih_cntl);
6831         /* set rptr, wptr to 0 */
6832         WREG32(IH_RB_RPTR, 0);
6833         WREG32(IH_RB_WPTR, 0);
6834         rdev->ih.enabled = false;
6835         rdev->ih.rptr = 0;
6836 }
6837
6838 /**
6839  * cik_disable_interrupt_state - Disable all interrupt sources
6840  *
6841  * @rdev: radeon_device pointer
6842  *
6843  * Clear all interrupt enable bits used by the driver (CIK).
6844  */
6845 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6846 {
6847         u32 tmp;
6848
6849         /* gfx ring */
6850         tmp = RREG32(CP_INT_CNTL_RING0) &
6851                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6852         WREG32(CP_INT_CNTL_RING0, tmp);
6853         /* sdma */
6854         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6855         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6856         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6857         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6858         /* compute queues */
6859         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6860         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6861         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6862         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6863         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6864         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6865         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6866         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6867         /* grbm */
6868         WREG32(GRBM_INT_CNTL, 0);
6869         /* SRBM */
6870         WREG32(SRBM_INT_CNTL, 0);
6871         /* vline/vblank, etc. */
6872         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6873         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6874         if (rdev->num_crtc >= 4) {
6875                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6876                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6877         }
6878         if (rdev->num_crtc >= 6) {
6879                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6880                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6881         }
6882         /* pflip */
6883         if (rdev->num_crtc >= 2) {
6884                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6885                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6886         }
6887         if (rdev->num_crtc >= 4) {
6888                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6889                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6890         }
6891         if (rdev->num_crtc >= 6) {
6892                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6893                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6894         }
6895
6896         /* dac hotplug */
6897         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6898
6899         /* digital hotplug */
6900         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6901         WREG32(DC_HPD1_INT_CONTROL, tmp);
6902         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6903         WREG32(DC_HPD2_INT_CONTROL, tmp);
6904         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6905         WREG32(DC_HPD3_INT_CONTROL, tmp);
6906         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6907         WREG32(DC_HPD4_INT_CONTROL, tmp);
6908         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6909         WREG32(DC_HPD5_INT_CONTROL, tmp);
6910         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6911         WREG32(DC_HPD6_INT_CONTROL, tmp);
6912
6913 }
6914
6915 /**
6916  * cik_irq_init - init and enable the interrupt ring
6917  *
6918  * @rdev: radeon_device pointer
6919  *
6920  * Allocate a ring buffer for the interrupt controller,
6921  * enable the RLC, disable interrupts, enable the IH
6922  * ring buffer and enable it (CIK).
6923  * Called at device load and reume.
6924  * Returns 0 for success, errors for failure.
6925  */
6926 static int cik_irq_init(struct radeon_device *rdev)
6927 {
6928         int ret = 0;
6929         int rb_bufsz;
6930         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6931
6932         /* allocate ring */
6933         ret = r600_ih_ring_alloc(rdev);
6934         if (ret)
6935                 return ret;
6936
6937         /* disable irqs */
6938         cik_disable_interrupts(rdev);
6939
6940         /* init rlc */
6941         ret = cik_rlc_resume(rdev);
6942         if (ret) {
6943                 r600_ih_ring_fini(rdev);
6944                 return ret;
6945         }
6946
6947         /* setup interrupt control */
6948         /* set dummy read address to dummy page address */
6949         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6950         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6951         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6952          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6953          */
6954         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6955         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6956         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6957         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6958
6959         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6960         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6961
6962         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6963                       IH_WPTR_OVERFLOW_CLEAR |
6964                       (rb_bufsz << 1));
6965
6966         if (rdev->wb.enabled)
6967                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6968
6969         /* set the writeback address whether it's enabled or not */
6970         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6971         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6972
6973         WREG32(IH_RB_CNTL, ih_rb_cntl);
6974
6975         /* set rptr, wptr to 0 */
6976         WREG32(IH_RB_RPTR, 0);
6977         WREG32(IH_RB_WPTR, 0);
6978
6979         /* Default settings for IH_CNTL (disabled at first) */
6980         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6981         /* RPTR_REARM only works if msi's are enabled */
6982         if (rdev->msi_enabled)
6983                 ih_cntl |= RPTR_REARM;
6984         WREG32(IH_CNTL, ih_cntl);
6985
6986         /* force the active interrupt state to all disabled */
6987         cik_disable_interrupt_state(rdev);
6988
6989         pci_set_master(rdev->pdev);
6990
6991         /* enable irqs */
6992         cik_enable_interrupts(rdev);
6993
6994         return ret;
6995 }
6996
6997 /**
6998  * cik_irq_set - enable/disable interrupt sources
6999  *
7000  * @rdev: radeon_device pointer
7001  *
7002  * Enable interrupt sources on the GPU (vblanks, hpd,
7003  * etc.) (CIK).
7004  * Returns 0 for success, errors for failure.
7005  */
7006 int cik_irq_set(struct radeon_device *rdev)
7007 {
7008         u32 cp_int_cntl;
7009         u32 cp_m1p0;
7010         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7011         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7012         u32 grbm_int_cntl = 0;
7013         u32 dma_cntl, dma_cntl1;
7014
7015         if (!rdev->irq.installed) {
7016                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7017                 return -EINVAL;
7018         }
7019         /* don't enable anything if the ih is disabled */
7020         if (!rdev->ih.enabled) {
7021                 cik_disable_interrupts(rdev);
7022                 /* force the active interrupt state to all disabled */
7023                 cik_disable_interrupt_state(rdev);
7024                 return 0;
7025         }
7026
7027         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7028                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7029         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7030
7031         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7032         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7033         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7034         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7035         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7036         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7037
7038         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7039         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7040
7041         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7042
7043         /* enable CP interrupts on all rings */
7044         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7045                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7046                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7047         }
7048         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7049                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7050                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7051                 if (ring->me == 1) {
7052                         switch (ring->pipe) {
7053                         case 0:
7054                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7055                                 break;
7056                         default:
7057                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7058                                 break;
7059                         }
7060                 } else {
7061                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7062                 }
7063         }
7064         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7065                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7066                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7067                 if (ring->me == 1) {
7068                         switch (ring->pipe) {
7069                         case 0:
7070                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7071                                 break;
7072                         default:
7073                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7074                                 break;
7075                         }
7076                 } else {
7077                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7078                 }
7079         }
7080
7081         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7082                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7083                 dma_cntl |= TRAP_ENABLE;
7084         }
7085
7086         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7087                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7088                 dma_cntl1 |= TRAP_ENABLE;
7089         }
7090
7091         if (rdev->irq.crtc_vblank_int[0] ||
7092             atomic_read(&rdev->irq.pflip[0])) {
7093                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7094                 crtc1 |= VBLANK_INTERRUPT_MASK;
7095         }
7096         if (rdev->irq.crtc_vblank_int[1] ||
7097             atomic_read(&rdev->irq.pflip[1])) {
7098                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7099                 crtc2 |= VBLANK_INTERRUPT_MASK;
7100         }
7101         if (rdev->irq.crtc_vblank_int[2] ||
7102             atomic_read(&rdev->irq.pflip[2])) {
7103                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7104                 crtc3 |= VBLANK_INTERRUPT_MASK;
7105         }
7106         if (rdev->irq.crtc_vblank_int[3] ||
7107             atomic_read(&rdev->irq.pflip[3])) {
7108                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7109                 crtc4 |= VBLANK_INTERRUPT_MASK;
7110         }
7111         if (rdev->irq.crtc_vblank_int[4] ||
7112             atomic_read(&rdev->irq.pflip[4])) {
7113                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7114                 crtc5 |= VBLANK_INTERRUPT_MASK;
7115         }
7116         if (rdev->irq.crtc_vblank_int[5] ||
7117             atomic_read(&rdev->irq.pflip[5])) {
7118                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7119                 crtc6 |= VBLANK_INTERRUPT_MASK;
7120         }
7121         if (rdev->irq.hpd[0]) {
7122                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7123                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7124         }
7125         if (rdev->irq.hpd[1]) {
7126                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7127                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7128         }
7129         if (rdev->irq.hpd[2]) {
7130                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7131                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7132         }
7133         if (rdev->irq.hpd[3]) {
7134                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7135                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7136         }
7137         if (rdev->irq.hpd[4]) {
7138                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7139                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7140         }
7141         if (rdev->irq.hpd[5]) {
7142                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7143                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7144         }
7145
7146         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7147
7148         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7149         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7150
7151         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7152
7153         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7154
7155         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7156         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7157         if (rdev->num_crtc >= 4) {
7158                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7159                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7160         }
7161         if (rdev->num_crtc >= 6) {
7162                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7163                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7164         }
7165
7166         if (rdev->num_crtc >= 2) {
7167                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7168                        GRPH_PFLIP_INT_MASK);
7169                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7170                        GRPH_PFLIP_INT_MASK);
7171         }
7172         if (rdev->num_crtc >= 4) {
7173                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7174                        GRPH_PFLIP_INT_MASK);
7175                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7176                        GRPH_PFLIP_INT_MASK);
7177         }
7178         if (rdev->num_crtc >= 6) {
7179                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7180                        GRPH_PFLIP_INT_MASK);
7181                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7182                        GRPH_PFLIP_INT_MASK);
7183         }
7184
7185         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7186         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7187         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7188         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7189         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7190         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7191
7192         /* posting read */
7193         RREG32(SRBM_STATUS);
7194
7195         return 0;
7196 }
7197
7198 /**
7199  * cik_irq_ack - ack interrupt sources
7200  *
7201  * @rdev: radeon_device pointer
7202  *
7203  * Ack interrupt sources on the GPU (vblanks, hpd,
7204  * etc.) (CIK).  Certain interrupts sources are sw
7205  * generated and do not require an explicit ack.
7206  */
7207 static inline void cik_irq_ack(struct radeon_device *rdev)
7208 {
7209         u32 tmp;
7210
7211         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7212         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7213         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7214         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7215         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7216         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7217         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7218
7219         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7220                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7221         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7222                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7223         if (rdev->num_crtc >= 4) {
7224                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7225                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7226                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7227                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7228         }
7229         if (rdev->num_crtc >= 6) {
7230                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7231                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7232                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7233                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7234         }
7235
7236         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7237                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7238                        GRPH_PFLIP_INT_CLEAR);
7239         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7240                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7241                        GRPH_PFLIP_INT_CLEAR);
7242         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7243                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7244         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7245                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7246         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7247                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7248         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7249                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7250
7251         if (rdev->num_crtc >= 4) {
7252                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7253                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7254                                GRPH_PFLIP_INT_CLEAR);
7255                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7256                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7257                                GRPH_PFLIP_INT_CLEAR);
7258                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7259                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7260                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7261                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7262                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7263                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7264                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7265                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7266         }
7267
7268         if (rdev->num_crtc >= 6) {
7269                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7270                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7271                                GRPH_PFLIP_INT_CLEAR);
7272                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7273                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7274                                GRPH_PFLIP_INT_CLEAR);
7275                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7276                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7277                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7278                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7279                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7280                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7281                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7282                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7283         }
7284
7285         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7286                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7287                 tmp |= DC_HPDx_INT_ACK;
7288                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7289         }
7290         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7291                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7292                 tmp |= DC_HPDx_INT_ACK;
7293                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7294         }
7295         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7296                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7297                 tmp |= DC_HPDx_INT_ACK;
7298                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7299         }
7300         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7301                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7302                 tmp |= DC_HPDx_INT_ACK;
7303                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7304         }
7305         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7306                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7307                 tmp |= DC_HPDx_INT_ACK;
7308                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7309         }
7310         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7311                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7312                 tmp |= DC_HPDx_INT_ACK;
7313                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7314         }
7315         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7316                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7317                 tmp |= DC_HPDx_RX_INT_ACK;
7318                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7319         }
7320         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7321                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7322                 tmp |= DC_HPDx_RX_INT_ACK;
7323                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7324         }
7325         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7326                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7327                 tmp |= DC_HPDx_RX_INT_ACK;
7328                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7329         }
7330         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7331                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7332                 tmp |= DC_HPDx_RX_INT_ACK;
7333                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7334         }
7335         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7336                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7337                 tmp |= DC_HPDx_RX_INT_ACK;
7338                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7339         }
7340         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7341                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7342                 tmp |= DC_HPDx_RX_INT_ACK;
7343                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7344         }
7345 }
7346
7347 /**
7348  * cik_irq_disable - disable interrupts
7349  *
7350  * @rdev: radeon_device pointer
7351  *
7352  * Disable interrupts on the hw (CIK).
7353  */
7354 static void cik_irq_disable(struct radeon_device *rdev)
7355 {
7356         cik_disable_interrupts(rdev);
7357         /* Wait and acknowledge irq */
7358         mdelay(1);
7359         cik_irq_ack(rdev);
7360         cik_disable_interrupt_state(rdev);
7361 }
7362
7363 /**
7364  * cik_irq_disable - disable interrupts for suspend
7365  *
7366  * @rdev: radeon_device pointer
7367  *
7368  * Disable interrupts and stop the RLC (CIK).
7369  * Used for suspend.
7370  */
7371 static void cik_irq_suspend(struct radeon_device *rdev)
7372 {
7373         cik_irq_disable(rdev);
7374         cik_rlc_stop(rdev);
7375 }
7376
7377 /**
7378  * cik_irq_fini - tear down interrupt support
7379  *
7380  * @rdev: radeon_device pointer
7381  *
7382  * Disable interrupts on the hw and free the IH ring
7383  * buffer (CIK).
7384  * Used for driver unload.
7385  */
7386 static void cik_irq_fini(struct radeon_device *rdev)
7387 {
7388         cik_irq_suspend(rdev);
7389         r600_ih_ring_fini(rdev);
7390 }
7391
7392 /**
7393  * cik_get_ih_wptr - get the IH ring buffer wptr
7394  *
7395  * @rdev: radeon_device pointer
7396  *
7397  * Get the IH ring buffer wptr from either the register
7398  * or the writeback memory buffer (CIK).  Also check for
7399  * ring buffer overflow and deal with it.
7400  * Used by cik_irq_process().
7401  * Returns the value of the wptr.
7402  */
7403 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7404 {
7405         u32 wptr, tmp;
7406
7407         if (rdev->wb.enabled)
7408                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7409         else
7410                 wptr = RREG32(IH_RB_WPTR);
7411
7412         if (wptr & RB_OVERFLOW) {
7413                 wptr &= ~RB_OVERFLOW;
7414                 /* When a ring buffer overflow happen start parsing interrupt
7415                  * from the last not overwritten vector (wptr + 16). Hopefully
7416                  * this should allow us to catchup.
7417                  */
7418                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7419                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7420                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7421                 tmp = RREG32(IH_RB_CNTL);
7422                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7423                 WREG32(IH_RB_CNTL, tmp);
7424         }
7425         return (wptr & rdev->ih.ptr_mask);
7426 }
7427
7428 /*        CIK IV Ring
7429  * Each IV ring entry is 128 bits:
7430  * [7:0]    - interrupt source id
7431  * [31:8]   - reserved
7432  * [59:32]  - interrupt source data
7433  * [63:60]  - reserved
7434  * [71:64]  - RINGID
7435  *            CP:
7436  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7437  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7438  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7439  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7440  *            PIPE_ID - ME0 0=3D
7441  *                    - ME1&2 compute dispatcher (4 pipes each)
7442  *            SDMA:
7443  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7444  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7445  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7446  * [79:72]  - VMID
7447  * [95:80]  - PASID
7448  * [127:96] - reserved
7449  */
7450 /**
7451  * cik_irq_process - interrupt handler
7452  *
7453  * @rdev: radeon_device pointer
7454  *
7455  * Interrupt hander (CIK).  Walk the IH ring,
7456  * ack interrupts and schedule work to handle
7457  * interrupt events.
7458  * Returns irq process return code.
7459  */
7460 int cik_irq_process(struct radeon_device *rdev)
7461 {
7462         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7463         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7464         u32 wptr;
7465         u32 rptr;
7466         u32 src_id, src_data, ring_id;
7467         u8 me_id, pipe_id, queue_id;
7468         u32 ring_index;
7469         bool queue_hotplug = false;
7470         bool queue_dp = false;
7471         bool queue_reset = false;
7472         u32 addr, status, mc_client;
7473         bool queue_thermal = false;
7474
7475         if (!rdev->ih.enabled || rdev->shutdown)
7476                 return IRQ_NONE;
7477
7478         wptr = cik_get_ih_wptr(rdev);
7479
7480 restart_ih:
7481         /* is somebody else already processing irqs? */
7482         if (atomic_xchg(&rdev->ih.lock, 1))
7483                 return IRQ_NONE;
7484
7485         rptr = rdev->ih.rptr;
7486         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7487
7488         /* Order reading of wptr vs. reading of IH ring data */
7489         rmb();
7490
7491         /* display interrupts */
7492         cik_irq_ack(rdev);
7493
7494         while (rptr != wptr) {
7495                 /* wptr/rptr are in bytes! */
7496                 ring_index = rptr / 4;
7497
7498                 radeon_kfd_interrupt(rdev,
7499                                 (const void *) &rdev->ih.ring[ring_index]);
7500
7501                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7502                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7503                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7504
7505                 switch (src_id) {
7506                 case 1: /* D1 vblank/vline */
7507                         switch (src_data) {
7508                         case 0: /* D1 vblank */
7509                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7510                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7511
7512                                 if (rdev->irq.crtc_vblank_int[0]) {
7513                                         drm_handle_vblank(rdev->ddev, 0);
7514                                         rdev->pm.vblank_sync = true;
7515                                         wake_up(&rdev->irq.vblank_queue);
7516                                 }
7517                                 if (atomic_read(&rdev->irq.pflip[0]))
7518                                         radeon_crtc_handle_vblank(rdev, 0);
7519                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7520                                 DRM_DEBUG("IH: D1 vblank\n");
7521
7522                                 break;
7523                         case 1: /* D1 vline */
7524                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7525                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7526
7527                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7528                                 DRM_DEBUG("IH: D1 vline\n");
7529
7530                                 break;
7531                         default:
7532                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7533                                 break;
7534                         }
7535                         break;
7536                 case 2: /* D2 vblank/vline */
7537                         switch (src_data) {
7538                         case 0: /* D2 vblank */
7539                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7540                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7541
7542                                 if (rdev->irq.crtc_vblank_int[1]) {
7543                                         drm_handle_vblank(rdev->ddev, 1);
7544                                         rdev->pm.vblank_sync = true;
7545                                         wake_up(&rdev->irq.vblank_queue);
7546                                 }
7547                                 if (atomic_read(&rdev->irq.pflip[1]))
7548                                         radeon_crtc_handle_vblank(rdev, 1);
7549                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7550                                 DRM_DEBUG("IH: D2 vblank\n");
7551
7552                                 break;
7553                         case 1: /* D2 vline */
7554                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7555                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7556
7557                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7558                                 DRM_DEBUG("IH: D2 vline\n");
7559
7560                                 break;
7561                         default:
7562                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7563                                 break;
7564                         }
7565                         break;
7566                 case 3: /* D3 vblank/vline */
7567                         switch (src_data) {
7568                         case 0: /* D3 vblank */
7569                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7570                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7571
7572                                 if (rdev->irq.crtc_vblank_int[2]) {
7573                                         drm_handle_vblank(rdev->ddev, 2);
7574                                         rdev->pm.vblank_sync = true;
7575                                         wake_up(&rdev->irq.vblank_queue);
7576                                 }
7577                                 if (atomic_read(&rdev->irq.pflip[2]))
7578                                         radeon_crtc_handle_vblank(rdev, 2);
7579                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7580                                 DRM_DEBUG("IH: D3 vblank\n");
7581
7582                                 break;
7583                         case 1: /* D3 vline */
7584                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7585                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7588                                 DRM_DEBUG("IH: D3 vline\n");
7589
7590                                 break;
7591                         default:
7592                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7593                                 break;
7594                         }
7595                         break;
7596                 case 4: /* D4 vblank/vline */
7597                         switch (src_data) {
7598                         case 0: /* D4 vblank */
7599                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7600                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602                                 if (rdev->irq.crtc_vblank_int[3]) {
7603                                         drm_handle_vblank(rdev->ddev, 3);
7604                                         rdev->pm.vblank_sync = true;
7605                                         wake_up(&rdev->irq.vblank_queue);
7606                                 }
7607                                 if (atomic_read(&rdev->irq.pflip[3]))
7608                                         radeon_crtc_handle_vblank(rdev, 3);
7609                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7610                                 DRM_DEBUG("IH: D4 vblank\n");
7611
7612                                 break;
7613                         case 1: /* D4 vline */
7614                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7615                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7618                                 DRM_DEBUG("IH: D4 vline\n");
7619
7620                                 break;
7621                         default:
7622                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623                                 break;
7624                         }
7625                         break;
7626                 case 5: /* D5 vblank/vline */
7627                         switch (src_data) {
7628                         case 0: /* D5 vblank */
7629                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7630                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632                                 if (rdev->irq.crtc_vblank_int[4]) {
7633                                         drm_handle_vblank(rdev->ddev, 4);
7634                                         rdev->pm.vblank_sync = true;
7635                                         wake_up(&rdev->irq.vblank_queue);
7636                                 }
7637                                 if (atomic_read(&rdev->irq.pflip[4]))
7638                                         radeon_crtc_handle_vblank(rdev, 4);
7639                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7640                                 DRM_DEBUG("IH: D5 vblank\n");
7641
7642                                 break;
7643                         case 1: /* D5 vline */
7644                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7645                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7648                                 DRM_DEBUG("IH: D5 vline\n");
7649
7650                                 break;
7651                         default:
7652                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653                                 break;
7654                         }
7655                         break;
7656                 case 6: /* D6 vblank/vline */
7657                         switch (src_data) {
7658                         case 0: /* D6 vblank */
7659                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7660                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662                                 if (rdev->irq.crtc_vblank_int[5]) {
7663                                         drm_handle_vblank(rdev->ddev, 5);
7664                                         rdev->pm.vblank_sync = true;
7665                                         wake_up(&rdev->irq.vblank_queue);
7666                                 }
7667                                 if (atomic_read(&rdev->irq.pflip[5]))
7668                                         radeon_crtc_handle_vblank(rdev, 5);
7669                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7670                                 DRM_DEBUG("IH: D6 vblank\n");
7671
7672                                 break;
7673                         case 1: /* D6 vline */
7674                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7675                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7678                                 DRM_DEBUG("IH: D6 vline\n");
7679
7680                                 break;
7681                         default:
7682                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683                                 break;
7684                         }
7685                         break;
7686                 case 8: /* D1 page flip */
7687                 case 10: /* D2 page flip */
7688                 case 12: /* D3 page flip */
7689                 case 14: /* D4 page flip */
7690                 case 16: /* D5 page flip */
7691                 case 18: /* D6 page flip */
7692                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7693                         if (radeon_use_pflipirq > 0)
7694                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7695                         break;
7696                 case 42: /* HPD hotplug */
7697                         switch (src_data) {
7698                         case 0:
7699                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7700                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7701
7702                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7703                                 queue_hotplug = true;
7704                                 DRM_DEBUG("IH: HPD1\n");
7705
7706                                 break;
7707                         case 1:
7708                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7709                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7710
7711                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7712                                 queue_hotplug = true;
7713                                 DRM_DEBUG("IH: HPD2\n");
7714
7715                                 break;
7716                         case 2:
7717                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7718                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7719
7720                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7721                                 queue_hotplug = true;
7722                                 DRM_DEBUG("IH: HPD3\n");
7723
7724                                 break;
7725                         case 3:
7726                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7727                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7728
7729                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7730                                 queue_hotplug = true;
7731                                 DRM_DEBUG("IH: HPD4\n");
7732
7733                                 break;
7734                         case 4:
7735                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7736                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7737
7738                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7739                                 queue_hotplug = true;
7740                                 DRM_DEBUG("IH: HPD5\n");
7741
7742                                 break;
7743                         case 5:
7744                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7745                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7746
7747                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7748                                 queue_hotplug = true;
7749                                 DRM_DEBUG("IH: HPD6\n");
7750
7751                                 break;
7752                         case 6:
7753                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7754                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755
7756                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7757                                 queue_dp = true;
7758                                 DRM_DEBUG("IH: HPD_RX 1\n");
7759
7760                                 break;
7761                         case 7:
7762                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7763                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764
7765                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7766                                 queue_dp = true;
7767                                 DRM_DEBUG("IH: HPD_RX 2\n");
7768
7769                                 break;
7770                         case 8:
7771                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7772                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7773
7774                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7775                                 queue_dp = true;
7776                                 DRM_DEBUG("IH: HPD_RX 3\n");
7777
7778                                 break;
7779                         case 9:
7780                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7781                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7782
7783                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7784                                 queue_dp = true;
7785                                 DRM_DEBUG("IH: HPD_RX 4\n");
7786
7787                                 break;
7788                         case 10:
7789                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7790                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7793                                 queue_dp = true;
7794                                 DRM_DEBUG("IH: HPD_RX 5\n");
7795
7796                                 break;
7797                         case 11:
7798                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7799                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7802                                 queue_dp = true;
7803                                 DRM_DEBUG("IH: HPD_RX 6\n");
7804
7805                                 break;
7806                         default:
7807                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7808                                 break;
7809                         }
7810                         break;
7811                 case 96:
7812                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7813                         WREG32(SRBM_INT_ACK, 0x1);
7814                         break;
7815                 case 124: /* UVD */
7816                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7817                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7818                         break;
7819                 case 146:
7820                 case 147:
7821                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7822                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7823                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7824                         /* reset addr and status */
7825                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7826                         if (addr == 0x0 && status == 0x0)
7827                                 break;
7828                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7829                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7830                                 addr);
7831                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7832                                 status);
7833                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7834                         break;
7835                 case 167: /* VCE */
7836                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7837                         switch (src_data) {
7838                         case 0:
7839                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7840                                 break;
7841                         case 1:
7842                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7843                                 break;
7844                         default:
7845                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7846                                 break;
7847                         }
7848                         break;
7849                 case 176: /* GFX RB CP_INT */
7850                 case 177: /* GFX IB CP_INT */
7851                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7852                         break;
7853                 case 181: /* CP EOP event */
7854                         DRM_DEBUG("IH: CP EOP\n");
7855                         /* XXX check the bitfield order! */
7856                         me_id = (ring_id & 0x60) >> 5;
7857                         pipe_id = (ring_id & 0x18) >> 3;
7858                         queue_id = (ring_id & 0x7) >> 0;
7859                         switch (me_id) {
7860                         case 0:
7861                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7862                                 break;
7863                         case 1:
7864                         case 2:
7865                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7866                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7867                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7868                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7869                                 break;
7870                         }
7871                         break;
7872                 case 184: /* CP Privileged reg access */
7873                         DRM_ERROR("Illegal register access in command stream\n");
7874                         /* XXX check the bitfield order! */
7875                         me_id = (ring_id & 0x60) >> 5;
7876                         pipe_id = (ring_id & 0x18) >> 3;
7877                         queue_id = (ring_id & 0x7) >> 0;
7878                         switch (me_id) {
7879                         case 0:
7880                                 /* This results in a full GPU reset, but all we need to do is soft
7881                                  * reset the CP for gfx
7882                                  */
7883                                 queue_reset = true;
7884                                 break;
7885                         case 1:
7886                                 /* XXX compute */
7887                                 queue_reset = true;
7888                                 break;
7889                         case 2:
7890                                 /* XXX compute */
7891                                 queue_reset = true;
7892                                 break;
7893                         }
7894                         break;
7895                 case 185: /* CP Privileged inst */
7896                         DRM_ERROR("Illegal instruction in command stream\n");
7897                         /* XXX check the bitfield order! */
7898                         me_id = (ring_id & 0x60) >> 5;
7899                         pipe_id = (ring_id & 0x18) >> 3;
7900                         queue_id = (ring_id & 0x7) >> 0;
7901                         switch (me_id) {
7902                         case 0:
7903                                 /* This results in a full GPU reset, but all we need to do is soft
7904                                  * reset the CP for gfx
7905                                  */
7906                                 queue_reset = true;
7907                                 break;
7908                         case 1:
7909                                 /* XXX compute */
7910                                 queue_reset = true;
7911                                 break;
7912                         case 2:
7913                                 /* XXX compute */
7914                                 queue_reset = true;
7915                                 break;
7916                         }
7917                         break;
7918                 case 224: /* SDMA trap event */
7919                         /* XXX check the bitfield order! */
7920                         me_id = (ring_id & 0x3) >> 0;
7921                         queue_id = (ring_id & 0xc) >> 2;
7922                         DRM_DEBUG("IH: SDMA trap\n");
7923                         switch (me_id) {
7924                         case 0:
7925                                 switch (queue_id) {
7926                                 case 0:
7927                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7928                                         break;
7929                                 case 1:
7930                                         /* XXX compute */
7931                                         break;
7932                                 case 2:
7933                                         /* XXX compute */
7934                                         break;
7935                                 }
7936                                 break;
7937                         case 1:
7938                                 switch (queue_id) {
7939                                 case 0:
7940                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7941                                         break;
7942                                 case 1:
7943                                         /* XXX compute */
7944                                         break;
7945                                 case 2:
7946                                         /* XXX compute */
7947                                         break;
7948                                 }
7949                                 break;
7950                         }
7951                         break;
7952                 case 230: /* thermal low to high */
7953                         DRM_DEBUG("IH: thermal low to high\n");
7954                         rdev->pm.dpm.thermal.high_to_low = false;
7955                         queue_thermal = true;
7956                         break;
7957                 case 231: /* thermal high to low */
7958                         DRM_DEBUG("IH: thermal high to low\n");
7959                         rdev->pm.dpm.thermal.high_to_low = true;
7960                         queue_thermal = true;
7961                         break;
7962                 case 233: /* GUI IDLE */
7963                         DRM_DEBUG("IH: GUI idle\n");
7964                         break;
7965                 case 241: /* SDMA Privileged inst */
7966                 case 247: /* SDMA Privileged inst */
7967                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7968                         /* XXX check the bitfield order! */
7969                         me_id = (ring_id & 0x3) >> 0;
7970                         queue_id = (ring_id & 0xc) >> 2;
7971                         switch (me_id) {
7972                         case 0:
7973                                 switch (queue_id) {
7974                                 case 0:
7975                                         queue_reset = true;
7976                                         break;
7977                                 case 1:
7978                                         /* XXX compute */
7979                                         queue_reset = true;
7980                                         break;
7981                                 case 2:
7982                                         /* XXX compute */
7983                                         queue_reset = true;
7984                                         break;
7985                                 }
7986                                 break;
7987                         case 1:
7988                                 switch (queue_id) {
7989                                 case 0:
7990                                         queue_reset = true;
7991                                         break;
7992                                 case 1:
7993                                         /* XXX compute */
7994                                         queue_reset = true;
7995                                         break;
7996                                 case 2:
7997                                         /* XXX compute */
7998                                         queue_reset = true;
7999                                         break;
8000                                 }
8001                                 break;
8002                         }
8003                         break;
8004                 default:
8005                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8006                         break;
8007                 }
8008
8009                 /* wptr/rptr are in bytes! */
8010                 rptr += 16;
8011                 rptr &= rdev->ih.ptr_mask;
8012                 WREG32(IH_RB_RPTR, rptr);
8013         }
8014         if (queue_dp)
8015                 schedule_work(&rdev->dp_work);
8016         if (queue_hotplug)
8017                 schedule_delayed_work(&rdev->hotplug_work, 0);
8018         if (queue_reset) {
8019                 rdev->needs_reset = true;
8020                 wake_up_all(&rdev->fence_queue);
8021         }
8022         if (queue_thermal)
8023                 schedule_work(&rdev->pm.dpm.thermal.work);
8024         rdev->ih.rptr = rptr;
8025         atomic_set(&rdev->ih.lock, 0);
8026
8027         /* make sure wptr hasn't changed while processing */
8028         wptr = cik_get_ih_wptr(rdev);
8029         if (wptr != rptr)
8030                 goto restart_ih;
8031
8032         return IRQ_HANDLED;
8033 }
8034
8035 /*
8036  * startup/shutdown callbacks
8037  */
8038 static void cik_uvd_init(struct radeon_device *rdev)
8039 {
8040         int r;
8041
8042         if (!rdev->has_uvd)
8043                 return;
8044
8045         r = radeon_uvd_init(rdev);
8046         if (r) {
8047                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8048                 /*
8049                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8050                  * to early fails cik_uvd_start() and thus nothing happens
8051                  * there. So it is pointless to try to go through that code
8052                  * hence why we disable uvd here.
8053                  */
8054                 rdev->has_uvd = 0;
8055                 return;
8056         }
8057         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8058         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8059 }
8060
8061 static void cik_uvd_start(struct radeon_device *rdev)
8062 {
8063         int r;
8064
8065         if (!rdev->has_uvd)
8066                 return;
8067
8068         r = radeon_uvd_resume(rdev);
8069         if (r) {
8070                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8071                 goto error;
8072         }
8073         r = uvd_v4_2_resume(rdev);
8074         if (r) {
8075                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8076                 goto error;
8077         }
8078         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8079         if (r) {
8080                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8081                 goto error;
8082         }
8083         return;
8084
8085 error:
8086         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8087 }
8088
8089 static void cik_uvd_resume(struct radeon_device *rdev)
8090 {
8091         struct radeon_ring *ring;
8092         int r;
8093
8094         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8095                 return;
8096
8097         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8098         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8099         if (r) {
8100                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8101                 return;
8102         }
8103         r = uvd_v1_0_init(rdev);
8104         if (r) {
8105                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8106                 return;
8107         }
8108 }
8109
8110 static void cik_vce_init(struct radeon_device *rdev)
8111 {
8112         int r;
8113
8114         if (!rdev->has_vce)
8115                 return;
8116
8117         r = radeon_vce_init(rdev);
8118         if (r) {
8119                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8120                 /*
8121                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8122                  * to early fails cik_vce_start() and thus nothing happens
8123                  * there. So it is pointless to try to go through that code
8124                  * hence why we disable vce here.
8125                  */
8126                 rdev->has_vce = 0;
8127                 return;
8128         }
8129         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8130         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8131         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8132         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8133 }
8134
8135 static void cik_vce_start(struct radeon_device *rdev)
8136 {
8137         int r;
8138
8139         if (!rdev->has_vce)
8140                 return;
8141
8142         r = radeon_vce_resume(rdev);
8143         if (r) {
8144                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8145                 goto error;
8146         }
8147         r = vce_v2_0_resume(rdev);
8148         if (r) {
8149                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8150                 goto error;
8151         }
8152         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8153         if (r) {
8154                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8155                 goto error;
8156         }
8157         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8158         if (r) {
8159                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8160                 goto error;
8161         }
8162         return;
8163
8164 error:
8165         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8166         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8167 }
8168
8169 static void cik_vce_resume(struct radeon_device *rdev)
8170 {
8171         struct radeon_ring *ring;
8172         int r;
8173
8174         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8175                 return;
8176
8177         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8178         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8179         if (r) {
8180                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8181                 return;
8182         }
8183         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8184         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8185         if (r) {
8186                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8187                 return;
8188         }
8189         r = vce_v1_0_init(rdev);
8190         if (r) {
8191                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8192                 return;
8193         }
8194 }
8195
8196 /**
8197  * cik_startup - program the asic to a functional state
8198  *
8199  * @rdev: radeon_device pointer
8200  *
8201  * Programs the asic to a functional state (CIK).
8202  * Called by cik_init() and cik_resume().
8203  * Returns 0 for success, error for failure.
8204  */
8205 static int cik_startup(struct radeon_device *rdev)
8206 {
8207         struct radeon_ring *ring;
8208         u32 nop;
8209         int r;
8210
8211         /* enable pcie gen2/3 link */
8212         cik_pcie_gen3_enable(rdev);
8213         /* enable aspm */
8214         cik_program_aspm(rdev);
8215
8216         /* scratch needs to be initialized before MC */
8217         r = r600_vram_scratch_init(rdev);
8218         if (r)
8219                 return r;
8220
8221         cik_mc_program(rdev);
8222
8223         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8224                 r = ci_mc_load_microcode(rdev);
8225                 if (r) {
8226                         DRM_ERROR("Failed to load MC firmware!\n");
8227                         return r;
8228                 }
8229         }
8230
8231         r = cik_pcie_gart_enable(rdev);
8232         if (r)
8233                 return r;
8234         cik_gpu_init(rdev);
8235
8236         /* allocate rlc buffers */
8237         if (rdev->flags & RADEON_IS_IGP) {
8238                 if (rdev->family == CHIP_KAVERI) {
8239                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8240                         rdev->rlc.reg_list_size =
8241                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8242                 } else {
8243                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8244                         rdev->rlc.reg_list_size =
8245                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8246                 }
8247         }
8248         rdev->rlc.cs_data = ci_cs_data;
8249         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8250         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8251         r = sumo_rlc_init(rdev);
8252         if (r) {
8253                 DRM_ERROR("Failed to init rlc BOs!\n");
8254                 return r;
8255         }
8256
8257         /* allocate wb buffer */
8258         r = radeon_wb_init(rdev);
8259         if (r)
8260                 return r;
8261
8262         /* allocate mec buffers */
8263         r = cik_mec_init(rdev);
8264         if (r) {
8265                 DRM_ERROR("Failed to init MEC BOs!\n");
8266                 return r;
8267         }
8268
8269         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8270         if (r) {
8271                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8272                 return r;
8273         }
8274
8275         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8276         if (r) {
8277                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8278                 return r;
8279         }
8280
8281         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8282         if (r) {
8283                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8284                 return r;
8285         }
8286
8287         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8288         if (r) {
8289                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8290                 return r;
8291         }
8292
8293         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8294         if (r) {
8295                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8296                 return r;
8297         }
8298
8299         cik_uvd_start(rdev);
8300         cik_vce_start(rdev);
8301
8302         /* Enable IRQ */
8303         if (!rdev->irq.installed) {
8304                 r = radeon_irq_kms_init(rdev);
8305                 if (r)
8306                         return r;
8307         }
8308
8309         r = cik_irq_init(rdev);
8310         if (r) {
8311                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8312                 radeon_irq_kms_fini(rdev);
8313                 return r;
8314         }
8315         cik_irq_set(rdev);
8316
8317         if (rdev->family == CHIP_HAWAII) {
8318                 if (rdev->new_fw)
8319                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8320                 else
8321                         nop = RADEON_CP_PACKET2;
8322         } else {
8323                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8324         }
8325
8326         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8327         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8328                              nop);
8329         if (r)
8330                 return r;
8331
8332         /* set up the compute queues */
8333         /* type-2 packets are deprecated on MEC, use type-3 instead */
8334         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8335         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8336                              nop);
8337         if (r)
8338                 return r;
8339         ring->me = 1; /* first MEC */
8340         ring->pipe = 0; /* first pipe */
8341         ring->queue = 0; /* first queue */
8342         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8343
8344         /* type-2 packets are deprecated on MEC, use type-3 instead */
8345         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8346         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8347                              nop);
8348         if (r)
8349                 return r;
8350         /* dGPU only have 1 MEC */
8351         ring->me = 1; /* first MEC */
8352         ring->pipe = 0; /* first pipe */
8353         ring->queue = 1; /* second queue */
8354         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8355
8356         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8357         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8358                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8359         if (r)
8360                 return r;
8361
8362         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8363         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8364                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8365         if (r)
8366                 return r;
8367
8368         r = cik_cp_resume(rdev);
8369         if (r)
8370                 return r;
8371
8372         r = cik_sdma_resume(rdev);
8373         if (r)
8374                 return r;
8375
8376         cik_uvd_resume(rdev);
8377         cik_vce_resume(rdev);
8378
8379         r = radeon_ib_pool_init(rdev);
8380         if (r) {
8381                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8382                 return r;
8383         }
8384
8385         r = radeon_vm_manager_init(rdev);
8386         if (r) {
8387                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8388                 return r;
8389         }
8390
8391         r = radeon_audio_init(rdev);
8392         if (r)
8393                 return r;
8394
8395         r = radeon_kfd_resume(rdev);
8396         if (r)
8397                 return r;
8398
8399         return 0;
8400 }
8401
8402 /**
8403  * cik_resume - resume the asic to a functional state
8404  *
8405  * @rdev: radeon_device pointer
8406  *
8407  * Programs the asic to a functional state (CIK).
8408  * Called at resume.
8409  * Returns 0 for success, error for failure.
8410  */
8411 int cik_resume(struct radeon_device *rdev)
8412 {
8413         int r;
8414
8415         /* post card */
8416         atom_asic_init(rdev->mode_info.atom_context);
8417
8418         /* init golden registers */
8419         cik_init_golden_registers(rdev);
8420
8421         if (rdev->pm.pm_method == PM_METHOD_DPM)
8422                 radeon_pm_resume(rdev);
8423
8424         rdev->accel_working = true;
8425         r = cik_startup(rdev);
8426         if (r) {
8427                 DRM_ERROR("cik startup failed on resume\n");
8428                 rdev->accel_working = false;
8429                 return r;
8430         }
8431
8432         return r;
8433
8434 }
8435
8436 /**
8437  * cik_suspend - suspend the asic
8438  *
8439  * @rdev: radeon_device pointer
8440  *
8441  * Bring the chip into a state suitable for suspend (CIK).
8442  * Called at suspend.
8443  * Returns 0 for success.
8444  */
8445 int cik_suspend(struct radeon_device *rdev)
8446 {
8447         radeon_kfd_suspend(rdev);
8448         radeon_pm_suspend(rdev);
8449         radeon_audio_fini(rdev);
8450         radeon_vm_manager_fini(rdev);
8451         cik_cp_enable(rdev, false);
8452         cik_sdma_enable(rdev, false);
8453         if (rdev->has_uvd) {
8454                 uvd_v1_0_fini(rdev);
8455                 radeon_uvd_suspend(rdev);
8456         }
8457         if (rdev->has_vce)
8458                 radeon_vce_suspend(rdev);
8459         cik_fini_pg(rdev);
8460         cik_fini_cg(rdev);
8461         cik_irq_suspend(rdev);
8462         radeon_wb_disable(rdev);
8463         cik_pcie_gart_disable(rdev);
8464         return 0;
8465 }
8466
8467 /* Plan is to move initialization in that function and use
8468  * helper function so that radeon_device_init pretty much
8469  * do nothing more than calling asic specific function. This
8470  * should also allow to remove a bunch of callback function
8471  * like vram_info.
8472  */
8473 /**
8474  * cik_init - asic specific driver and hw init
8475  *
8476  * @rdev: radeon_device pointer
8477  *
8478  * Setup asic specific driver variables and program the hw
8479  * to a functional state (CIK).
8480  * Called at driver startup.
8481  * Returns 0 for success, errors for failure.
8482  */
8483 int cik_init(struct radeon_device *rdev)
8484 {
8485         struct radeon_ring *ring;
8486         int r;
8487
8488         /* Read BIOS */
8489         if (!radeon_get_bios(rdev)) {
8490                 if (ASIC_IS_AVIVO(rdev))
8491                         return -EINVAL;
8492         }
8493         /* Must be an ATOMBIOS */
8494         if (!rdev->is_atom_bios) {
8495                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8496                 return -EINVAL;
8497         }
8498         r = radeon_atombios_init(rdev);
8499         if (r)
8500                 return r;
8501
8502         /* Post card if necessary */
8503         if (!radeon_card_posted(rdev)) {
8504                 if (!rdev->bios) {
8505                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8506                         return -EINVAL;
8507                 }
8508                 DRM_INFO("GPU not posted. posting now...\n");
8509                 atom_asic_init(rdev->mode_info.atom_context);
8510         }
8511         /* init golden registers */
8512         cik_init_golden_registers(rdev);
8513         /* Initialize scratch registers */
8514         cik_scratch_init(rdev);
8515         /* Initialize surface registers */
8516         radeon_surface_init(rdev);
8517         /* Initialize clocks */
8518         radeon_get_clock_info(rdev->ddev);
8519
8520         /* Fence driver */
8521         r = radeon_fence_driver_init(rdev);
8522         if (r)
8523                 return r;
8524
8525         /* initialize memory controller */
8526         r = cik_mc_init(rdev);
8527         if (r)
8528                 return r;
8529         /* Memory manager */
8530         r = radeon_bo_init(rdev);
8531         if (r)
8532                 return r;
8533
8534         if (rdev->flags & RADEON_IS_IGP) {
8535                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8536                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8537                         r = cik_init_microcode(rdev);
8538                         if (r) {
8539                                 DRM_ERROR("Failed to load firmware!\n");
8540                                 return r;
8541                         }
8542                 }
8543         } else {
8544                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8545                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8546                     !rdev->mc_fw) {
8547                         r = cik_init_microcode(rdev);
8548                         if (r) {
8549                                 DRM_ERROR("Failed to load firmware!\n");
8550                                 return r;
8551                         }
8552                 }
8553         }
8554
8555         /* Initialize power management */
8556         radeon_pm_init(rdev);
8557
8558         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8559         ring->ring_obj = NULL;
8560         r600_ring_init(rdev, ring, 1024 * 1024);
8561
8562         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8563         ring->ring_obj = NULL;
8564         r600_ring_init(rdev, ring, 1024 * 1024);
8565         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8566         if (r)
8567                 return r;
8568
8569         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8570         ring->ring_obj = NULL;
8571         r600_ring_init(rdev, ring, 1024 * 1024);
8572         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8573         if (r)
8574                 return r;
8575
8576         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8577         ring->ring_obj = NULL;
8578         r600_ring_init(rdev, ring, 256 * 1024);
8579
8580         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581         ring->ring_obj = NULL;
8582         r600_ring_init(rdev, ring, 256 * 1024);
8583
8584         cik_uvd_init(rdev);
8585         cik_vce_init(rdev);
8586
8587         rdev->ih.ring_obj = NULL;
8588         r600_ih_ring_init(rdev, 64 * 1024);
8589
8590         r = r600_pcie_gart_init(rdev);
8591         if (r)
8592                 return r;
8593
8594         rdev->accel_working = true;
8595         r = cik_startup(rdev);
8596         if (r) {
8597                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8598                 cik_cp_fini(rdev);
8599                 cik_sdma_fini(rdev);
8600                 cik_irq_fini(rdev);
8601                 sumo_rlc_fini(rdev);
8602                 cik_mec_fini(rdev);
8603                 radeon_wb_fini(rdev);
8604                 radeon_ib_pool_fini(rdev);
8605                 radeon_vm_manager_fini(rdev);
8606                 radeon_irq_kms_fini(rdev);
8607                 cik_pcie_gart_fini(rdev);
8608                 rdev->accel_working = false;
8609         }
8610
8611         /* Don't start up if the MC ucode is missing.
8612          * The default clocks and voltages before the MC ucode
8613          * is loaded are not suffient for advanced operations.
8614          */
8615         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8616                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8617                 return -EINVAL;
8618         }
8619
8620         return 0;
8621 }
8622
8623 /**
8624  * cik_fini - asic specific driver and hw fini
8625  *
8626  * @rdev: radeon_device pointer
8627  *
8628  * Tear down the asic specific driver variables and program the hw
8629  * to an idle state (CIK).
8630  * Called at driver unload.
8631  */
8632 void cik_fini(struct radeon_device *rdev)
8633 {
8634         radeon_pm_fini(rdev);
8635         cik_cp_fini(rdev);
8636         cik_sdma_fini(rdev);
8637         cik_fini_pg(rdev);
8638         cik_fini_cg(rdev);
8639         cik_irq_fini(rdev);
8640         sumo_rlc_fini(rdev);
8641         cik_mec_fini(rdev);
8642         radeon_wb_fini(rdev);
8643         radeon_vm_manager_fini(rdev);
8644         radeon_ib_pool_fini(rdev);
8645         radeon_irq_kms_fini(rdev);
8646         uvd_v1_0_fini(rdev);
8647         radeon_uvd_fini(rdev);
8648         radeon_vce_fini(rdev);
8649         cik_pcie_gart_fini(rdev);
8650         r600_vram_scratch_fini(rdev);
8651         radeon_gem_fini(rdev);
8652         radeon_fence_driver_fini(rdev);
8653         radeon_bo_fini(rdev);
8654         radeon_atombios_fini(rdev);
8655         kfree(rdev->bios);
8656         rdev->bios = NULL;
8657 }
8658
8659 void dce8_program_fmt(struct drm_encoder *encoder)
8660 {
8661         struct drm_device *dev = encoder->dev;
8662         struct radeon_device *rdev = dev->dev_private;
8663         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8664         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8665         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8666         int bpc = 0;
8667         u32 tmp = 0;
8668         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8669
8670         if (connector) {
8671                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8672                 bpc = radeon_get_monitor_bpc(connector);
8673                 dither = radeon_connector->dither;
8674         }
8675
8676         /* LVDS/eDP FMT is set up by atom */
8677         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8678                 return;
8679
8680         /* not needed for analog */
8681         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8682             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8683                 return;
8684
8685         if (bpc == 0)
8686                 return;
8687
8688         switch (bpc) {
8689         case 6:
8690                 if (dither == RADEON_FMT_DITHER_ENABLE)
8691                         /* XXX sort out optimal dither settings */
8692                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8693                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8694                 else
8695                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8696                 break;
8697         case 8:
8698                 if (dither == RADEON_FMT_DITHER_ENABLE)
8699                         /* XXX sort out optimal dither settings */
8700                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8701                                 FMT_RGB_RANDOM_ENABLE |
8702                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8703                 else
8704                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8705                 break;
8706         case 10:
8707                 if (dither == RADEON_FMT_DITHER_ENABLE)
8708                         /* XXX sort out optimal dither settings */
8709                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8710                                 FMT_RGB_RANDOM_ENABLE |
8711                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8712                 else
8713                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8714                 break;
8715         default:
8716                 /* not needed */
8717                 break;
8718         }
8719
8720         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8721 }
8722
8723 /* display watermark setup */
8724 /**
8725  * dce8_line_buffer_adjust - Set up the line buffer
8726  *
8727  * @rdev: radeon_device pointer
8728  * @radeon_crtc: the selected display controller
8729  * @mode: the current display mode on the selected display
8730  * controller
8731  *
8732  * Setup up the line buffer allocation for
8733  * the selected display controller (CIK).
8734  * Returns the line buffer size in pixels.
8735  */
8736 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8737                                    struct radeon_crtc *radeon_crtc,
8738                                    struct drm_display_mode *mode)
8739 {
8740         u32 tmp, buffer_alloc, i;
8741         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8742         /*
8743          * Line Buffer Setup
8744          * There are 6 line buffers, one for each display controllers.
8745          * There are 3 partitions per LB. Select the number of partitions
8746          * to enable based on the display width.  For display widths larger
8747          * than 4096, you need use to use 2 display controllers and combine
8748          * them using the stereo blender.
8749          */
8750         if (radeon_crtc->base.enabled && mode) {
8751                 if (mode->crtc_hdisplay < 1920) {
8752                         tmp = 1;
8753                         buffer_alloc = 2;
8754                 } else if (mode->crtc_hdisplay < 2560) {
8755                         tmp = 2;
8756                         buffer_alloc = 2;
8757                 } else if (mode->crtc_hdisplay < 4096) {
8758                         tmp = 0;
8759                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8760                 } else {
8761                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8762                         tmp = 0;
8763                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8764                 }
8765         } else {
8766                 tmp = 1;
8767                 buffer_alloc = 0;
8768         }
8769
8770         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8771                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8772
8773         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8774                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8775         for (i = 0; i < rdev->usec_timeout; i++) {
8776                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8777                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8778                         break;
8779                 udelay(1);
8780         }
8781
8782         if (radeon_crtc->base.enabled && mode) {
8783                 switch (tmp) {
8784                 case 0:
8785                 default:
8786                         return 4096 * 2;
8787                 case 1:
8788                         return 1920 * 2;
8789                 case 2:
8790                         return 2560 * 2;
8791                 }
8792         }
8793
8794         /* controller not enabled, so no lb used */
8795         return 0;
8796 }
8797
8798 /**
8799  * cik_get_number_of_dram_channels - get the number of dram channels
8800  *
8801  * @rdev: radeon_device pointer
8802  *
8803  * Look up the number of video ram channels (CIK).
8804  * Used for display watermark bandwidth calculations
8805  * Returns the number of dram channels
8806  */
8807 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8808 {
8809         u32 tmp = RREG32(MC_SHARED_CHMAP);
8810
8811         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8812         case 0:
8813         default:
8814                 return 1;
8815         case 1:
8816                 return 2;
8817         case 2:
8818                 return 4;
8819         case 3:
8820                 return 8;
8821         case 4:
8822                 return 3;
8823         case 5:
8824                 return 6;
8825         case 6:
8826                 return 10;
8827         case 7:
8828                 return 12;
8829         case 8:
8830                 return 16;
8831         }
8832 }
8833
8834 struct dce8_wm_params {
8835         u32 dram_channels; /* number of dram channels */
8836         u32 yclk;          /* bandwidth per dram data pin in kHz */
8837         u32 sclk;          /* engine clock in kHz */
8838         u32 disp_clk;      /* display clock in kHz */
8839         u32 src_width;     /* viewport width */
8840         u32 active_time;   /* active display time in ns */
8841         u32 blank_time;    /* blank time in ns */
8842         bool interlaced;    /* mode is interlaced */
8843         fixed20_12 vsc;    /* vertical scale ratio */
8844         u32 num_heads;     /* number of active crtcs */
8845         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8846         u32 lb_size;       /* line buffer allocated to pipe */
8847         u32 vtaps;         /* vertical scaler taps */
8848 };
8849
8850 /**
8851  * dce8_dram_bandwidth - get the dram bandwidth
8852  *
8853  * @wm: watermark calculation data
8854  *
8855  * Calculate the raw dram bandwidth (CIK).
8856  * Used for display watermark bandwidth calculations
8857  * Returns the dram bandwidth in MBytes/s
8858  */
8859 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8860 {
8861         /* Calculate raw DRAM Bandwidth */
8862         fixed20_12 dram_efficiency; /* 0.7 */
8863         fixed20_12 yclk, dram_channels, bandwidth;
8864         fixed20_12 a;
8865
8866         a.full = dfixed_const(1000);
8867         yclk.full = dfixed_const(wm->yclk);
8868         yclk.full = dfixed_div(yclk, a);
8869         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8870         a.full = dfixed_const(10);
8871         dram_efficiency.full = dfixed_const(7);
8872         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8873         bandwidth.full = dfixed_mul(dram_channels, yclk);
8874         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8875
8876         return dfixed_trunc(bandwidth);
8877 }
8878
8879 /**
8880  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8881  *
8882  * @wm: watermark calculation data
8883  *
8884  * Calculate the dram bandwidth used for display (CIK).
8885  * Used for display watermark bandwidth calculations
8886  * Returns the dram bandwidth for display in MBytes/s
8887  */
8888 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8889 {
8890         /* Calculate DRAM Bandwidth and the part allocated to display. */
8891         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8892         fixed20_12 yclk, dram_channels, bandwidth;
8893         fixed20_12 a;
8894
8895         a.full = dfixed_const(1000);
8896         yclk.full = dfixed_const(wm->yclk);
8897         yclk.full = dfixed_div(yclk, a);
8898         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8899         a.full = dfixed_const(10);
8900         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8901         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8902         bandwidth.full = dfixed_mul(dram_channels, yclk);
8903         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8904
8905         return dfixed_trunc(bandwidth);
8906 }
8907
8908 /**
8909  * dce8_data_return_bandwidth - get the data return bandwidth
8910  *
8911  * @wm: watermark calculation data
8912  *
8913  * Calculate the data return bandwidth used for display (CIK).
8914  * Used for display watermark bandwidth calculations
8915  * Returns the data return bandwidth in MBytes/s
8916  */
8917 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8918 {
8919         /* Calculate the display Data return Bandwidth */
8920         fixed20_12 return_efficiency; /* 0.8 */
8921         fixed20_12 sclk, bandwidth;
8922         fixed20_12 a;
8923
8924         a.full = dfixed_const(1000);
8925         sclk.full = dfixed_const(wm->sclk);
8926         sclk.full = dfixed_div(sclk, a);
8927         a.full = dfixed_const(10);
8928         return_efficiency.full = dfixed_const(8);
8929         return_efficiency.full = dfixed_div(return_efficiency, a);
8930         a.full = dfixed_const(32);
8931         bandwidth.full = dfixed_mul(a, sclk);
8932         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8933
8934         return dfixed_trunc(bandwidth);
8935 }
8936
8937 /**
8938  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8939  *
8940  * @wm: watermark calculation data
8941  *
8942  * Calculate the dmif bandwidth used for display (CIK).
8943  * Used for display watermark bandwidth calculations
8944  * Returns the dmif bandwidth in MBytes/s
8945  */
8946 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8947 {
8948         /* Calculate the DMIF Request Bandwidth */
8949         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8950         fixed20_12 disp_clk, bandwidth;
8951         fixed20_12 a, b;
8952
8953         a.full = dfixed_const(1000);
8954         disp_clk.full = dfixed_const(wm->disp_clk);
8955         disp_clk.full = dfixed_div(disp_clk, a);
8956         a.full = dfixed_const(32);
8957         b.full = dfixed_mul(a, disp_clk);
8958
8959         a.full = dfixed_const(10);
8960         disp_clk_request_efficiency.full = dfixed_const(8);
8961         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8962
8963         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8964
8965         return dfixed_trunc(bandwidth);
8966 }
8967
8968 /**
8969  * dce8_available_bandwidth - get the min available bandwidth
8970  *
8971  * @wm: watermark calculation data
8972  *
8973  * Calculate the min available bandwidth used for display (CIK).
8974  * Used for display watermark bandwidth calculations
8975  * Returns the min available bandwidth in MBytes/s
8976  */
8977 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8978 {
8979         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8980         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8981         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8982         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8983
8984         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8985 }
8986
8987 /**
8988  * dce8_average_bandwidth - get the average available bandwidth
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the average available bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the average available bandwidth in MBytes/s
8995  */
8996 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8997 {
8998         /* Calculate the display mode Average Bandwidth
8999          * DisplayMode should contain the source and destination dimensions,
9000          * timing, etc.
9001          */
9002         fixed20_12 bpp;
9003         fixed20_12 line_time;
9004         fixed20_12 src_width;
9005         fixed20_12 bandwidth;
9006         fixed20_12 a;
9007
9008         a.full = dfixed_const(1000);
9009         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9010         line_time.full = dfixed_div(line_time, a);
9011         bpp.full = dfixed_const(wm->bytes_per_pixel);
9012         src_width.full = dfixed_const(wm->src_width);
9013         bandwidth.full = dfixed_mul(src_width, bpp);
9014         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9015         bandwidth.full = dfixed_div(bandwidth, line_time);
9016
9017         return dfixed_trunc(bandwidth);
9018 }
9019
9020 /**
9021  * dce8_latency_watermark - get the latency watermark
9022  *
9023  * @wm: watermark calculation data
9024  *
9025  * Calculate the latency watermark (CIK).
9026  * Used for display watermark bandwidth calculations
9027  * Returns the latency watermark in ns
9028  */
9029 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9030 {
9031         /* First calculate the latency in ns */
9032         u32 mc_latency = 2000; /* 2000 ns. */
9033         u32 available_bandwidth = dce8_available_bandwidth(wm);
9034         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9035         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9036         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9037         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9038                 (wm->num_heads * cursor_line_pair_return_time);
9039         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9040         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9041         u32 tmp, dmif_size = 12288;
9042         fixed20_12 a, b, c;
9043
9044         if (wm->num_heads == 0)
9045                 return 0;
9046
9047         a.full = dfixed_const(2);
9048         b.full = dfixed_const(1);
9049         if ((wm->vsc.full > a.full) ||
9050             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9051             (wm->vtaps >= 5) ||
9052             ((wm->vsc.full >= a.full) && wm->interlaced))
9053                 max_src_lines_per_dst_line = 4;
9054         else
9055                 max_src_lines_per_dst_line = 2;
9056
9057         a.full = dfixed_const(available_bandwidth);
9058         b.full = dfixed_const(wm->num_heads);
9059         a.full = dfixed_div(a, b);
9060
9061         b.full = dfixed_const(mc_latency + 512);
9062         c.full = dfixed_const(wm->disp_clk);
9063         b.full = dfixed_div(b, c);
9064
9065         c.full = dfixed_const(dmif_size);
9066         b.full = dfixed_div(c, b);
9067
9068         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9069
9070         b.full = dfixed_const(1000);
9071         c.full = dfixed_const(wm->disp_clk);
9072         b.full = dfixed_div(c, b);
9073         c.full = dfixed_const(wm->bytes_per_pixel);
9074         b.full = dfixed_mul(b, c);
9075
9076         lb_fill_bw = min(tmp, dfixed_trunc(b));
9077
9078         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9079         b.full = dfixed_const(1000);
9080         c.full = dfixed_const(lb_fill_bw);
9081         b.full = dfixed_div(c, b);
9082         a.full = dfixed_div(a, b);
9083         line_fill_time = dfixed_trunc(a);
9084
9085         if (line_fill_time < wm->active_time)
9086                 return latency;
9087         else
9088                 return latency + (line_fill_time - wm->active_time);
9089
9090 }
9091
9092 /**
9093  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9094  * average and available dram bandwidth
9095  *
9096  * @wm: watermark calculation data
9097  *
9098  * Check if the display average bandwidth fits in the display
9099  * dram bandwidth (CIK).
9100  * Used for display watermark bandwidth calculations
9101  * Returns true if the display fits, false if not.
9102  */
9103 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9104 {
9105         if (dce8_average_bandwidth(wm) <=
9106             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9107                 return true;
9108         else
9109                 return false;
9110 }
9111
9112 /**
9113  * dce8_average_bandwidth_vs_available_bandwidth - check
9114  * average and available bandwidth
9115  *
9116  * @wm: watermark calculation data
9117  *
9118  * Check if the display average bandwidth fits in the display
9119  * available bandwidth (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns true if the display fits, false if not.
9122  */
9123 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9124 {
9125         if (dce8_average_bandwidth(wm) <=
9126             (dce8_available_bandwidth(wm) / wm->num_heads))
9127                 return true;
9128         else
9129                 return false;
9130 }
9131
9132 /**
9133  * dce8_check_latency_hiding - check latency hiding
9134  *
9135  * @wm: watermark calculation data
9136  *
9137  * Check latency hiding (CIK).
9138  * Used for display watermark bandwidth calculations
9139  * Returns true if the display fits, false if not.
9140  */
9141 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9142 {
9143         u32 lb_partitions = wm->lb_size / wm->src_width;
9144         u32 line_time = wm->active_time + wm->blank_time;
9145         u32 latency_tolerant_lines;
9146         u32 latency_hiding;
9147         fixed20_12 a;
9148
9149         a.full = dfixed_const(1);
9150         if (wm->vsc.full > a.full)
9151                 latency_tolerant_lines = 1;
9152         else {
9153                 if (lb_partitions <= (wm->vtaps + 1))
9154                         latency_tolerant_lines = 1;
9155                 else
9156                         latency_tolerant_lines = 2;
9157         }
9158
9159         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9160
9161         if (dce8_latency_watermark(wm) <= latency_hiding)
9162                 return true;
9163         else
9164                 return false;
9165 }
9166
9167 /**
9168  * dce8_program_watermarks - program display watermarks
9169  *
9170  * @rdev: radeon_device pointer
9171  * @radeon_crtc: the selected display controller
9172  * @lb_size: line buffer size
9173  * @num_heads: number of display controllers in use
9174  *
9175  * Calculate and program the display watermarks for the
9176  * selected display controller (CIK).
9177  */
9178 static void dce8_program_watermarks(struct radeon_device *rdev,
9179                                     struct radeon_crtc *radeon_crtc,
9180                                     u32 lb_size, u32 num_heads)
9181 {
9182         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9183         struct dce8_wm_params wm_low, wm_high;
9184         u32 pixel_period;
9185         u32 line_time = 0;
9186         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9187         u32 tmp, wm_mask;
9188
9189         if (radeon_crtc->base.enabled && num_heads && mode) {
9190                 pixel_period = 1000000 / (u32)mode->clock;
9191                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9192
9193                 /* watermark for high clocks */
9194                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9195                     rdev->pm.dpm_enabled) {
9196                         wm_high.yclk =
9197                                 radeon_dpm_get_mclk(rdev, false) * 10;
9198                         wm_high.sclk =
9199                                 radeon_dpm_get_sclk(rdev, false) * 10;
9200                 } else {
9201                         wm_high.yclk = rdev->pm.current_mclk * 10;
9202                         wm_high.sclk = rdev->pm.current_sclk * 10;
9203                 }
9204
9205                 wm_high.disp_clk = mode->clock;
9206                 wm_high.src_width = mode->crtc_hdisplay;
9207                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9208                 wm_high.blank_time = line_time - wm_high.active_time;
9209                 wm_high.interlaced = false;
9210                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9211                         wm_high.interlaced = true;
9212                 wm_high.vsc = radeon_crtc->vsc;
9213                 wm_high.vtaps = 1;
9214                 if (radeon_crtc->rmx_type != RMX_OFF)
9215                         wm_high.vtaps = 2;
9216                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9217                 wm_high.lb_size = lb_size;
9218                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9219                 wm_high.num_heads = num_heads;
9220
9221                 /* set for high clocks */
9222                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9223
9224                 /* possibly force display priority to high */
9225                 /* should really do this at mode validation time... */
9226                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9227                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9228                     !dce8_check_latency_hiding(&wm_high) ||
9229                     (rdev->disp_priority == 2)) {
9230                         DRM_DEBUG_KMS("force priority to high\n");
9231                 }
9232
9233                 /* watermark for low clocks */
9234                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9235                     rdev->pm.dpm_enabled) {
9236                         wm_low.yclk =
9237                                 radeon_dpm_get_mclk(rdev, true) * 10;
9238                         wm_low.sclk =
9239                                 radeon_dpm_get_sclk(rdev, true) * 10;
9240                 } else {
9241                         wm_low.yclk = rdev->pm.current_mclk * 10;
9242                         wm_low.sclk = rdev->pm.current_sclk * 10;
9243                 }
9244
9245                 wm_low.disp_clk = mode->clock;
9246                 wm_low.src_width = mode->crtc_hdisplay;
9247                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9248                 wm_low.blank_time = line_time - wm_low.active_time;
9249                 wm_low.interlaced = false;
9250                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9251                         wm_low.interlaced = true;
9252                 wm_low.vsc = radeon_crtc->vsc;
9253                 wm_low.vtaps = 1;
9254                 if (radeon_crtc->rmx_type != RMX_OFF)
9255                         wm_low.vtaps = 2;
9256                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9257                 wm_low.lb_size = lb_size;
9258                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9259                 wm_low.num_heads = num_heads;
9260
9261                 /* set for low clocks */
9262                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9263
9264                 /* possibly force display priority to high */
9265                 /* should really do this at mode validation time... */
9266                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9267                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9268                     !dce8_check_latency_hiding(&wm_low) ||
9269                     (rdev->disp_priority == 2)) {
9270                         DRM_DEBUG_KMS("force priority to high\n");
9271                 }
9272
9273                 /* Save number of lines the linebuffer leads before the scanout */
9274                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9275         }
9276
9277         /* select wm A */
9278         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9279         tmp = wm_mask;
9280         tmp &= ~LATENCY_WATERMARK_MASK(3);
9281         tmp |= LATENCY_WATERMARK_MASK(1);
9282         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9283         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9284                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9285                 LATENCY_HIGH_WATERMARK(line_time)));
9286         /* select wm B */
9287         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9288         tmp &= ~LATENCY_WATERMARK_MASK(3);
9289         tmp |= LATENCY_WATERMARK_MASK(2);
9290         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9291         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9292                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9293                 LATENCY_HIGH_WATERMARK(line_time)));
9294         /* restore original selection */
9295         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9296
9297         /* save values for DPM */
9298         radeon_crtc->line_time = line_time;
9299         radeon_crtc->wm_high = latency_watermark_a;
9300         radeon_crtc->wm_low = latency_watermark_b;
9301 }
9302
9303 /**
9304  * dce8_bandwidth_update - program display watermarks
9305  *
9306  * @rdev: radeon_device pointer
9307  *
9308  * Calculate and program the display watermarks and line
9309  * buffer allocation (CIK).
9310  */
9311 void dce8_bandwidth_update(struct radeon_device *rdev)
9312 {
9313         struct drm_display_mode *mode = NULL;
9314         u32 num_heads = 0, lb_size;
9315         int i;
9316
9317         if (!rdev->mode_info.mode_config_initialized)
9318                 return;
9319
9320         radeon_update_display_priority(rdev);
9321
9322         for (i = 0; i < rdev->num_crtc; i++) {
9323                 if (rdev->mode_info.crtcs[i]->base.enabled)
9324                         num_heads++;
9325         }
9326         for (i = 0; i < rdev->num_crtc; i++) {
9327                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9328                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9329                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9330         }
9331 }
9332
9333 /**
9334  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9335  *
9336  * @rdev: radeon_device pointer
9337  *
9338  * Fetches a GPU clock counter snapshot (SI).
9339  * Returns the 64 bit clock counter snapshot.
9340  */
9341 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9342 {
9343         uint64_t clock;
9344
9345         mutex_lock(&rdev->gpu_clock_mutex);
9346         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9347         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9348                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9349         mutex_unlock(&rdev->gpu_clock_mutex);
9350         return clock;
9351 }
9352
9353 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9354                              u32 cntl_reg, u32 status_reg)
9355 {
9356         int r, i;
9357         struct atom_clock_dividers dividers;
9358         uint32_t tmp;
9359
9360         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9361                                            clock, false, &dividers);
9362         if (r)
9363                 return r;
9364
9365         tmp = RREG32_SMC(cntl_reg);
9366         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9367         tmp |= dividers.post_divider;
9368         WREG32_SMC(cntl_reg, tmp);
9369
9370         for (i = 0; i < 100; i++) {
9371                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9372                         break;
9373                 mdelay(10);
9374         }
9375         if (i == 100)
9376                 return -ETIMEDOUT;
9377
9378         return 0;
9379 }
9380
9381 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9382 {
9383         int r = 0;
9384
9385         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9386         if (r)
9387                 return r;
9388
9389         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9390         return r;
9391 }
9392
9393 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9394 {
9395         int r, i;
9396         struct atom_clock_dividers dividers;
9397         u32 tmp;
9398
9399         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9400                                            ecclk, false, &dividers);
9401         if (r)
9402                 return r;
9403
9404         for (i = 0; i < 100; i++) {
9405                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9406                         break;
9407                 mdelay(10);
9408         }
9409         if (i == 100)
9410                 return -ETIMEDOUT;
9411
9412         tmp = RREG32_SMC(CG_ECLK_CNTL);
9413         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9414         tmp |= dividers.post_divider;
9415         WREG32_SMC(CG_ECLK_CNTL, tmp);
9416
9417         for (i = 0; i < 100; i++) {
9418                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9419                         break;
9420                 mdelay(10);
9421         }
9422         if (i == 100)
9423                 return -ETIMEDOUT;
9424
9425         return 0;
9426 }
9427
9428 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9429 {
9430         struct pci_dev *root = rdev->pdev->bus->self;
9431         int bridge_pos, gpu_pos;
9432         u32 speed_cntl, mask, current_data_rate;
9433         int ret, i;
9434         u16 tmp16;
9435
9436         if (pci_is_root_bus(rdev->pdev->bus))
9437                 return;
9438
9439         if (radeon_pcie_gen2 == 0)
9440                 return;
9441
9442         if (rdev->flags & RADEON_IS_IGP)
9443                 return;
9444
9445         if (!(rdev->flags & RADEON_IS_PCIE))
9446                 return;
9447
9448         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9449         if (ret != 0)
9450                 return;
9451
9452         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9453                 return;
9454
9455         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9456         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9457                 LC_CURRENT_DATA_RATE_SHIFT;
9458         if (mask & DRM_PCIE_SPEED_80) {
9459                 if (current_data_rate == 2) {
9460                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9461                         return;
9462                 }
9463                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9464         } else if (mask & DRM_PCIE_SPEED_50) {
9465                 if (current_data_rate == 1) {
9466                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9467                         return;
9468                 }
9469                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9470         }
9471
9472         bridge_pos = pci_pcie_cap(root);
9473         if (!bridge_pos)
9474                 return;
9475
9476         gpu_pos = pci_pcie_cap(rdev->pdev);
9477         if (!gpu_pos)
9478                 return;
9479
9480         if (mask & DRM_PCIE_SPEED_80) {
9481                 /* re-try equalization if gen3 is not already enabled */
9482                 if (current_data_rate != 2) {
9483                         u16 bridge_cfg, gpu_cfg;
9484                         u16 bridge_cfg2, gpu_cfg2;
9485                         u32 max_lw, current_lw, tmp;
9486
9487                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9488                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9489
9490                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9491                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9492
9493                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9494                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9495
9496                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9497                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9498                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9499
9500                         if (current_lw < max_lw) {
9501                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9502                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9503                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9504                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9505                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9506                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9507                                 }
9508                         }
9509
9510                         for (i = 0; i < 10; i++) {
9511                                 /* check status */
9512                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9513                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9514                                         break;
9515
9516                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9517                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9518
9519                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9520                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9521
9522                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9523                                 tmp |= LC_SET_QUIESCE;
9524                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9525
9526                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9527                                 tmp |= LC_REDO_EQ;
9528                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9529
9530                                 mdelay(100);
9531
9532                                 /* linkctl */
9533                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9534                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9535                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9536                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9537
9538                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9539                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9540                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9541                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9542
9543                                 /* linkctl2 */
9544                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9545                                 tmp16 &= ~((1 << 4) | (7 << 9));
9546                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9547                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9548
9549                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9550                                 tmp16 &= ~((1 << 4) | (7 << 9));
9551                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9552                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9553
9554                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9555                                 tmp &= ~LC_SET_QUIESCE;
9556                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9557                         }
9558                 }
9559         }
9560
9561         /* set the link speed */
9562         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9563         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9564         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9565
9566         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9567         tmp16 &= ~0xf;
9568         if (mask & DRM_PCIE_SPEED_80)
9569                 tmp16 |= 3; /* gen3 */
9570         else if (mask & DRM_PCIE_SPEED_50)
9571                 tmp16 |= 2; /* gen2 */
9572         else
9573                 tmp16 |= 1; /* gen1 */
9574         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9575
9576         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9577         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9578         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9579
9580         for (i = 0; i < rdev->usec_timeout; i++) {
9581                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9582                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9583                         break;
9584                 udelay(1);
9585         }
9586 }
9587
9588 static void cik_program_aspm(struct radeon_device *rdev)
9589 {
9590         u32 data, orig;
9591         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9592         bool disable_clkreq = false;
9593
9594         if (radeon_aspm == 0)
9595                 return;
9596
9597         /* XXX double check IGPs */
9598         if (rdev->flags & RADEON_IS_IGP)
9599                 return;
9600
9601         if (!(rdev->flags & RADEON_IS_PCIE))
9602                 return;
9603
9604         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9605         data &= ~LC_XMIT_N_FTS_MASK;
9606         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9607         if (orig != data)
9608                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9609
9610         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9611         data |= LC_GO_TO_RECOVERY;
9612         if (orig != data)
9613                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9614
9615         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9616         data |= P_IGNORE_EDB_ERR;
9617         if (orig != data)
9618                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9619
9620         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9621         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9622         data |= LC_PMI_TO_L1_DIS;
9623         if (!disable_l0s)
9624                 data |= LC_L0S_INACTIVITY(7);
9625
9626         if (!disable_l1) {
9627                 data |= LC_L1_INACTIVITY(7);
9628                 data &= ~LC_PMI_TO_L1_DIS;
9629                 if (orig != data)
9630                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9631
9632                 if (!disable_plloff_in_l1) {
9633                         bool clk_req_support;
9634
9635                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9636                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9637                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9638                         if (orig != data)
9639                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9640
9641                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9642                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9643                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9644                         if (orig != data)
9645                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9646
9647                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9648                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9649                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9650                         if (orig != data)
9651                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9652
9653                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9654                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9655                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9656                         if (orig != data)
9657                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9658
9659                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9660                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9661                         data |= LC_DYN_LANES_PWR_STATE(3);
9662                         if (orig != data)
9663                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9664
9665                         if (!disable_clkreq &&
9666                             !pci_is_root_bus(rdev->pdev->bus)) {
9667                                 struct pci_dev *root = rdev->pdev->bus->self;
9668                                 u32 lnkcap;
9669
9670                                 clk_req_support = false;
9671                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9672                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9673                                         clk_req_support = true;
9674                         } else {
9675                                 clk_req_support = false;
9676                         }
9677
9678                         if (clk_req_support) {
9679                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9680                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9681                                 if (orig != data)
9682                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9683
9684                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9685                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9686                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9687                                 if (orig != data)
9688                                         WREG32_SMC(THM_CLK_CNTL, data);
9689
9690                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9691                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9692                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9693                                 if (orig != data)
9694                                         WREG32_SMC(MISC_CLK_CTRL, data);
9695
9696                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9697                                 data &= ~BCLK_AS_XCLK;
9698                                 if (orig != data)
9699                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9700
9701                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9702                                 data &= ~FORCE_BIF_REFCLK_EN;
9703                                 if (orig != data)
9704                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9705
9706                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9707                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9708                                 data |= MPLL_CLKOUT_SEL(4);
9709                                 if (orig != data)
9710                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9711                         }
9712                 }
9713         } else {
9714                 if (orig != data)
9715                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9716         }
9717
9718         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9719         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9720         if (orig != data)
9721                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9722
9723         if (!disable_l0s) {
9724                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9725                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9726                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9727                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9728                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9729                                 data &= ~LC_L0S_INACTIVITY_MASK;
9730                                 if (orig != data)
9731                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9732                         }
9733                 }
9734         }
9735 }