GNU Linux-libre 4.9.309-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 /*(DEBLOBBED)*/
95
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
97 {
98         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
114 };
115
116 static const u32 golden_settings_tonga_a11[] =
117 {
118         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121         mmGB_GPU_ID, 0x0000000f, 0x00000000,
122         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
126         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
127         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
128         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
129         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
130         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
131         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
132         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
133 };
134
135 static const u32 tonga_golden_common_all[] =
136 {
137         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
138         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
139         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
140         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
141         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
142         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
143         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
144         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
145 };
146
147 static const u32 tonga_mgcg_cgcg_init[] =
148 {
149         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
150         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
151         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
152         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
153         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
154         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
155         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
156         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
157         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
158         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
160         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
163         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
165         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
167         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
168         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
169         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
170         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
171         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
172         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
174         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
175         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
176         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
177         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
181         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
182         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
183         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
184         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
185         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
186         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
187         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
188         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
189         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
190         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
191         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
192         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
193         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
194         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
195         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
196         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
197         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
198         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
199         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
200         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
201         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
202         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
203         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
204         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
205         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
206         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
207         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
208         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
209         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
210         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
211         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
212         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
213         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
214         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
215         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
216         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
217         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
218         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
219         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
220         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
221         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
222         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
223         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
224 };
225
226 static const u32 golden_settings_polaris11_a11[] =
227 {
228         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
229         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
230         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
231         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
232         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
233         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
234         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
235         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
236         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
237         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
238         mmSQ_CONFIG, 0x07f80000, 0x01180000,
239         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
240         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
241         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
242         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
243         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
244         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
245 };
246
247 static const u32 polaris11_golden_common_all[] =
248 {
249         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
250         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
251         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
252         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
253         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
254         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
255 };
256
257 static const u32 golden_settings_polaris10_a11[] =
258 {
259         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
260         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
261         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
262         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
263         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
264         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
265         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
266         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
267         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
268         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
269         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
270         mmSQ_CONFIG, 0x07f80000, 0x07180000,
271         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
272         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
273         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
274         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
275         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
276 };
277
278 static const u32 polaris10_golden_common_all[] =
279 {
280         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
281         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
282         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
283         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
284         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
285         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
286         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
287         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
288 };
289
290 static const u32 fiji_golden_common_all[] =
291 {
292         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
293         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
294         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
301         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
302 };
303
304 static const u32 golden_settings_fiji_a10[] =
305 {
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
316         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
317 };
318
319 static const u32 fiji_mgcg_cgcg_init[] =
320 {
321         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
322         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
324         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
325         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
326         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
327         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
328         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
329         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
330         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
331         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
332         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
333         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
334         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
335         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
336         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
337         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
338         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
339         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
340         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
341         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
342         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
343         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
344         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
345         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
346         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
347         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
348         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
349         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
351         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
352         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
353         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
354         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
355         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
356 };
357
358 static const u32 golden_settings_iceland_a11[] =
359 {
360         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
363         mmGB_GPU_ID, 0x0000000f, 0x00000000,
364         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
365         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
366         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
367         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
368         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
369         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
370         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
371         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
372         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
373         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
374         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
375         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
376 };
377
378 static const u32 iceland_golden_common_all[] =
379 {
380         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
381         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
382         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
383         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
384         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
385         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
386         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
387         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
388 };
389
390 static const u32 iceland_mgcg_cgcg_init[] =
391 {
392         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
397         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
398         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
399         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
403         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
408         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
412         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
413         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
414         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
415         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
416         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
417         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
418         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
419         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
420         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
422         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
423         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
424         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
425         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
426         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
427         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
428         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
429         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
430         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
431         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
432         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
433         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
434         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
435         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
436         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
437         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
438         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
439         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
440         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
441         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
442         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
443         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
444         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
445         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
446         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
447         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
448         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
449         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
450         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
451         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
452         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
453         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
454         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
455         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
456 };
457
458 static const u32 cz_golden_settings_a11[] =
459 {
460         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
461         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
462         mmGB_GPU_ID, 0x0000000f, 0x00000000,
463         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
464         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
465         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
466         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
467         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
468         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
469         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
470         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
471         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
472 };
473
474 static const u32 cz_golden_common_all[] =
475 {
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
478         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
479         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
480         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
481         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
482         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
483         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
484 };
485
486 static const u32 cz_mgcg_cgcg_init[] =
487 {
488         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
491         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
492         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
493         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
494         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
495         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
496         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
497         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
498         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
499         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
500         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
501         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
506         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
507         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
508         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
509         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
510         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
514         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
515         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
516         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
518         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
519         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
520         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
521         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
522         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
523         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
524         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
525         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
526         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
527         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
528         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
529         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
530         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
531         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
532         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
533         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
534         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
535         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
536         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
537         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
538         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
539         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
540         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
541         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
542         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
543         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
544         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
545         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
546         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
547         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
548         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
549         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
550         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
551         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
552         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
553         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
554         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
557         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
560         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
561         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
562         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
563 };
564
565 static const u32 stoney_golden_settings_a11[] =
566 {
567         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
568         mmGB_GPU_ID, 0x0000000f, 0x00000000,
569         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
570         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
571         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
572         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
573         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
574         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
575         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
576         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
577 };
578
579 static const u32 stoney_golden_common_all[] =
580 {
581         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
582         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
583         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
584         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
585         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
586         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
587         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
588         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
589 };
590
591 static const u32 stoney_mgcg_cgcg_init[] =
592 {
593         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
594         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
595         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
596         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
598 };
599
600 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
601 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
604 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
605 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
606
607 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
608 {
609         switch (adev->asic_type) {
610         case CHIP_TOPAZ:
611                 amdgpu_program_register_sequence(adev,
612                                                  iceland_mgcg_cgcg_init,
613                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
614                 amdgpu_program_register_sequence(adev,
615                                                  golden_settings_iceland_a11,
616                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
617                 amdgpu_program_register_sequence(adev,
618                                                  iceland_golden_common_all,
619                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
620                 break;
621         case CHIP_FIJI:
622                 amdgpu_program_register_sequence(adev,
623                                                  fiji_mgcg_cgcg_init,
624                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
625                 amdgpu_program_register_sequence(adev,
626                                                  golden_settings_fiji_a10,
627                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
628                 amdgpu_program_register_sequence(adev,
629                                                  fiji_golden_common_all,
630                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
631                 break;
632
633         case CHIP_TONGA:
634                 amdgpu_program_register_sequence(adev,
635                                                  tonga_mgcg_cgcg_init,
636                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
637                 amdgpu_program_register_sequence(adev,
638                                                  golden_settings_tonga_a11,
639                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
640                 amdgpu_program_register_sequence(adev,
641                                                  tonga_golden_common_all,
642                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
643                 break;
644         case CHIP_POLARIS11:
645                 amdgpu_program_register_sequence(adev,
646                                                  golden_settings_polaris11_a11,
647                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
648                 amdgpu_program_register_sequence(adev,
649                                                  polaris11_golden_common_all,
650                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
651                 break;
652         case CHIP_POLARIS10:
653                 amdgpu_program_register_sequence(adev,
654                                                  golden_settings_polaris10_a11,
655                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
656                 amdgpu_program_register_sequence(adev,
657                                                  polaris10_golden_common_all,
658                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
659                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
660                 if (adev->pdev->revision == 0xc7 &&
661                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
662                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
663                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
664                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
665                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
666                 }
667                 break;
668         case CHIP_CARRIZO:
669                 amdgpu_program_register_sequence(adev,
670                                                  cz_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  cz_golden_settings_a11,
674                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  cz_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
678                 break;
679         case CHIP_STONEY:
680                 amdgpu_program_register_sequence(adev,
681                                                  stoney_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  stoney_golden_settings_a11,
685                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  stoney_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
689                 break;
690         default:
691                 break;
692         }
693 }
694
695 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
696 {
697         int i;
698
699         adev->gfx.scratch.num_reg = 7;
700         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
701         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
702                 adev->gfx.scratch.free[i] = true;
703                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
704         }
705 }
706
707 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
708 {
709         struct amdgpu_device *adev = ring->adev;
710         uint32_t scratch;
711         uint32_t tmp = 0;
712         unsigned i;
713         int r;
714
715         r = amdgpu_gfx_scratch_get(adev, &scratch);
716         if (r) {
717                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
718                 return r;
719         }
720         WREG32(scratch, 0xCAFEDEAD);
721         r = amdgpu_ring_alloc(ring, 3);
722         if (r) {
723                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
724                           ring->idx, r);
725                 amdgpu_gfx_scratch_free(adev, scratch);
726                 return r;
727         }
728         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
729         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
730         amdgpu_ring_write(ring, 0xDEADBEEF);
731         amdgpu_ring_commit(ring);
732
733         for (i = 0; i < adev->usec_timeout; i++) {
734                 tmp = RREG32(scratch);
735                 if (tmp == 0xDEADBEEF)
736                         break;
737                 DRM_UDELAY(1);
738         }
739         if (i < adev->usec_timeout) {
740                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
741                          ring->idx, i);
742         } else {
743                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
744                           ring->idx, scratch, tmp);
745                 r = -EINVAL;
746         }
747         amdgpu_gfx_scratch_free(adev, scratch);
748         return r;
749 }
750
751 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
752 {
753         struct amdgpu_device *adev = ring->adev;
754         struct amdgpu_ib ib;
755         struct fence *f = NULL;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         long r;
759
760         r = amdgpu_gfx_scratch_get(adev, &scratch);
761         if (r) {
762                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
763                 return r;
764         }
765         WREG32(scratch, 0xCAFEDEAD);
766         memset(&ib, 0, sizeof(ib));
767         r = amdgpu_ib_get(adev, NULL, 256, &ib);
768         if (r) {
769                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
770                 goto err1;
771         }
772         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
773         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
774         ib.ptr[2] = 0xDEADBEEF;
775         ib.length_dw = 3;
776
777         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
778         if (r)
779                 goto err2;
780
781         r = fence_wait_timeout(f, false, timeout);
782         if (r == 0) {
783                 DRM_ERROR("amdgpu: IB test timed out.\n");
784                 r = -ETIMEDOUT;
785                 goto err2;
786         } else if (r < 0) {
787                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
788                 goto err2;
789         }
790         tmp = RREG32(scratch);
791         if (tmp == 0xDEADBEEF) {
792                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
793                 r = 0;
794         } else {
795                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
796                           scratch, tmp);
797                 r = -EINVAL;
798         }
799 err2:
800         amdgpu_ib_free(adev, &ib, NULL);
801         fence_put(f);
802 err1:
803         amdgpu_gfx_scratch_free(adev, scratch);
804         return r;
805 }
806
807
808 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
809         release_firmware(adev->gfx.pfp_fw);
810         adev->gfx.pfp_fw = NULL;
811         release_firmware(adev->gfx.me_fw);
812         adev->gfx.me_fw = NULL;
813         release_firmware(adev->gfx.ce_fw);
814         adev->gfx.ce_fw = NULL;
815         release_firmware(adev->gfx.rlc_fw);
816         adev->gfx.rlc_fw = NULL;
817         release_firmware(adev->gfx.mec_fw);
818         adev->gfx.mec_fw = NULL;
819         if ((adev->asic_type != CHIP_STONEY) &&
820             (adev->asic_type != CHIP_TOPAZ))
821                 release_firmware(adev->gfx.mec2_fw);
822         adev->gfx.mec2_fw = NULL;
823
824         kfree(adev->gfx.rlc.register_list_format);
825 }
826
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
828 {
829         const char *chip_name;
830         char fw_name[30];
831         int err;
832         struct amdgpu_firmware_info *info = NULL;
833         const struct common_firmware_header *header = NULL;
834         const struct gfx_firmware_header_v1_0 *cp_hdr;
835         const struct rlc_firmware_header_v2_0 *rlc_hdr;
836         unsigned int *tmp = NULL, i;
837
838         DRM_DEBUG("\n");
839
840         switch (adev->asic_type) {
841         case CHIP_TOPAZ:
842                 chip_name = "topaz";
843                 break;
844         case CHIP_TONGA:
845                 chip_name = "tonga";
846                 break;
847         case CHIP_CARRIZO:
848                 chip_name = "carrizo";
849                 break;
850         case CHIP_FIJI:
851                 chip_name = "fiji";
852                 break;
853         case CHIP_POLARIS11:
854                 chip_name = "polaris11";
855                 break;
856         case CHIP_POLARIS10:
857                 chip_name = "polaris10";
858                 break;
859         case CHIP_STONEY:
860                 chip_name = "stoney";
861                 break;
862         default:
863                 BUG();
864         }
865
866         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
867         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
868         if (err)
869                 goto out;
870         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
871         if (err)
872                 goto out;
873         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
874         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
875         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
876
877         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
878         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
879         if (err)
880                 goto out;
881         err = amdgpu_ucode_validate(adev->gfx.me_fw);
882         if (err)
883                 goto out;
884         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
885         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
886         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
887
888         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
889         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
890         if (err)
891                 goto out;
892         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
893         if (err)
894                 goto out;
895         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
896         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
897         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
898
899         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
900         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
901         if (err)
902                 goto out;
903         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
904         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
905         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
906         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
907
908         adev->gfx.rlc.save_and_restore_offset =
909                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
910         adev->gfx.rlc.clear_state_descriptor_offset =
911                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
912         adev->gfx.rlc.avail_scratch_ram_locations =
913                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
914         adev->gfx.rlc.reg_restore_list_size =
915                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
916         adev->gfx.rlc.reg_list_format_start =
917                         le32_to_cpu(rlc_hdr->reg_list_format_start);
918         adev->gfx.rlc.reg_list_format_separate_start =
919                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
920         adev->gfx.rlc.starting_offsets_start =
921                         le32_to_cpu(rlc_hdr->starting_offsets_start);
922         adev->gfx.rlc.reg_list_format_size_bytes =
923                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
924         adev->gfx.rlc.reg_list_size_bytes =
925                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
926
927         adev->gfx.rlc.register_list_format =
928                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
929                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
930
931         if (!adev->gfx.rlc.register_list_format) {
932                 err = -ENOMEM;
933                 goto out;
934         }
935
936         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
937                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
938         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
939                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
940
941         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
942
943         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
944                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
945         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
946                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
947
948         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
950         if (err)
951                 goto out;
952         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
953         if (err)
954                 goto out;
955         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
956         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
957         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
958
959         if ((adev->asic_type != CHIP_STONEY) &&
960             (adev->asic_type != CHIP_TOPAZ)) {
961                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
962                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
963                 if (!err) {
964                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
965                         if (err)
966                                 goto out;
967                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
968                                 adev->gfx.mec2_fw->data;
969                         adev->gfx.mec2_fw_version =
970                                 le32_to_cpu(cp_hdr->header.ucode_version);
971                         adev->gfx.mec2_feature_version =
972                                 le32_to_cpu(cp_hdr->ucode_feature_version);
973                 } else {
974                         err = 0;
975                         adev->gfx.mec2_fw = NULL;
976                 }
977         }
978
979         if (adev->firmware.smu_load) {
980                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
981                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
982                 info->fw = adev->gfx.pfp_fw;
983                 header = (const struct common_firmware_header *)info->fw->data;
984                 adev->firmware.fw_size +=
985                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
986
987                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
988                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
989                 info->fw = adev->gfx.me_fw;
990                 header = (const struct common_firmware_header *)info->fw->data;
991                 adev->firmware.fw_size +=
992                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
993
994                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
995                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
996                 info->fw = adev->gfx.ce_fw;
997                 header = (const struct common_firmware_header *)info->fw->data;
998                 adev->firmware.fw_size +=
999                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1000
1001                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1002                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1003                 info->fw = adev->gfx.rlc_fw;
1004                 header = (const struct common_firmware_header *)info->fw->data;
1005                 adev->firmware.fw_size +=
1006                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1007
1008                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1009                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1010                 info->fw = adev->gfx.mec_fw;
1011                 header = (const struct common_firmware_header *)info->fw->data;
1012                 adev->firmware.fw_size +=
1013                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1014
1015                 if (adev->gfx.mec2_fw) {
1016                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1017                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1018                         info->fw = adev->gfx.mec2_fw;
1019                         header = (const struct common_firmware_header *)info->fw->data;
1020                         adev->firmware.fw_size +=
1021                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1022                 }
1023
1024         }
1025
1026 out:
1027         if (err) {
1028                 dev_err(adev->dev,
1029                         "gfx8: Failed to load firmware \"%s\"\n",
1030                         fw_name);
1031                 release_firmware(adev->gfx.pfp_fw);
1032                 adev->gfx.pfp_fw = NULL;
1033                 release_firmware(adev->gfx.me_fw);
1034                 adev->gfx.me_fw = NULL;
1035                 release_firmware(adev->gfx.ce_fw);
1036                 adev->gfx.ce_fw = NULL;
1037                 release_firmware(adev->gfx.rlc_fw);
1038                 adev->gfx.rlc_fw = NULL;
1039                 release_firmware(adev->gfx.mec_fw);
1040                 adev->gfx.mec_fw = NULL;
1041                 release_firmware(adev->gfx.mec2_fw);
1042                 adev->gfx.mec2_fw = NULL;
1043         }
1044         return err;
1045 }
1046
1047 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1048                                     volatile u32 *buffer)
1049 {
1050         u32 count = 0, i;
1051         const struct cs_section_def *sect = NULL;
1052         const struct cs_extent_def *ext = NULL;
1053
1054         if (adev->gfx.rlc.cs_data == NULL)
1055                 return;
1056         if (buffer == NULL)
1057                 return;
1058
1059         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1060         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1061
1062         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1063         buffer[count++] = cpu_to_le32(0x80000000);
1064         buffer[count++] = cpu_to_le32(0x80000000);
1065
1066         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1067                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1068                         if (sect->id == SECT_CONTEXT) {
1069                                 buffer[count++] =
1070                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1071                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1072                                                 PACKET3_SET_CONTEXT_REG_START);
1073                                 for (i = 0; i < ext->reg_count; i++)
1074                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1075                         } else {
1076                                 return;
1077                         }
1078                 }
1079         }
1080
1081         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1082         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1083                         PACKET3_SET_CONTEXT_REG_START);
1084         switch (adev->asic_type) {
1085         case CHIP_TONGA:
1086         case CHIP_POLARIS10:
1087                 buffer[count++] = cpu_to_le32(0x16000012);
1088                 buffer[count++] = cpu_to_le32(0x0000002A);
1089                 break;
1090         case CHIP_POLARIS11:
1091                 buffer[count++] = cpu_to_le32(0x16000012);
1092                 buffer[count++] = cpu_to_le32(0x00000000);
1093                 break;
1094         case CHIP_FIJI:
1095                 buffer[count++] = cpu_to_le32(0x3a00161a);
1096                 buffer[count++] = cpu_to_le32(0x0000002e);
1097                 break;
1098         case CHIP_TOPAZ:
1099         case CHIP_CARRIZO:
1100                 buffer[count++] = cpu_to_le32(0x00000002);
1101                 buffer[count++] = cpu_to_le32(0x00000000);
1102                 break;
1103         case CHIP_STONEY:
1104                 buffer[count++] = cpu_to_le32(0x00000000);
1105                 buffer[count++] = cpu_to_le32(0x00000000);
1106                 break;
1107         default:
1108                 buffer[count++] = cpu_to_le32(0x00000000);
1109                 buffer[count++] = cpu_to_le32(0x00000000);
1110                 break;
1111         }
1112
1113         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1114         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1115
1116         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1117         buffer[count++] = cpu_to_le32(0);
1118 }
1119
1120 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1121 {
1122         const __le32 *fw_data;
1123         volatile u32 *dst_ptr;
1124         int me, i, max_me = 4;
1125         u32 bo_offset = 0;
1126         u32 table_offset, table_size;
1127
1128         if (adev->asic_type == CHIP_CARRIZO)
1129                 max_me = 5;
1130
1131         /* write the cp table buffer */
1132         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1133         for (me = 0; me < max_me; me++) {
1134                 if (me == 0) {
1135                         const struct gfx_firmware_header_v1_0 *hdr =
1136                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1137                         fw_data = (const __le32 *)
1138                                 (adev->gfx.ce_fw->data +
1139                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1140                         table_offset = le32_to_cpu(hdr->jt_offset);
1141                         table_size = le32_to_cpu(hdr->jt_size);
1142                 } else if (me == 1) {
1143                         const struct gfx_firmware_header_v1_0 *hdr =
1144                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1145                         fw_data = (const __le32 *)
1146                                 (adev->gfx.pfp_fw->data +
1147                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1148                         table_offset = le32_to_cpu(hdr->jt_offset);
1149                         table_size = le32_to_cpu(hdr->jt_size);
1150                 } else if (me == 2) {
1151                         const struct gfx_firmware_header_v1_0 *hdr =
1152                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1153                         fw_data = (const __le32 *)
1154                                 (adev->gfx.me_fw->data +
1155                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1156                         table_offset = le32_to_cpu(hdr->jt_offset);
1157                         table_size = le32_to_cpu(hdr->jt_size);
1158                 } else if (me == 3) {
1159                         const struct gfx_firmware_header_v1_0 *hdr =
1160                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1161                         fw_data = (const __le32 *)
1162                                 (adev->gfx.mec_fw->data +
1163                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1164                         table_offset = le32_to_cpu(hdr->jt_offset);
1165                         table_size = le32_to_cpu(hdr->jt_size);
1166                 } else  if (me == 4) {
1167                         const struct gfx_firmware_header_v1_0 *hdr =
1168                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1169                         fw_data = (const __le32 *)
1170                                 (adev->gfx.mec2_fw->data +
1171                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1172                         table_offset = le32_to_cpu(hdr->jt_offset);
1173                         table_size = le32_to_cpu(hdr->jt_size);
1174                 }
1175
1176                 for (i = 0; i < table_size; i ++) {
1177                         dst_ptr[bo_offset + i] =
1178                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1179                 }
1180
1181                 bo_offset += table_size;
1182         }
1183 }
1184
1185 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1186 {
1187         int r;
1188
1189         /* clear state block */
1190         if (adev->gfx.rlc.clear_state_obj) {
1191                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1192                 if (unlikely(r != 0))
1193                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1194                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1195                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1196                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1197                 adev->gfx.rlc.clear_state_obj = NULL;
1198         }
1199
1200         /* jump table block */
1201         if (adev->gfx.rlc.cp_table_obj) {
1202                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1203                 if (unlikely(r != 0))
1204                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1205                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1206                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1207                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1208                 adev->gfx.rlc.cp_table_obj = NULL;
1209         }
1210 }
1211
1212 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1213 {
1214         volatile u32 *dst_ptr;
1215         u32 dws;
1216         const struct cs_section_def *cs_data;
1217         int r;
1218
1219         adev->gfx.rlc.cs_data = vi_cs_data;
1220
1221         cs_data = adev->gfx.rlc.cs_data;
1222
1223         if (cs_data) {
1224                 /* clear state block */
1225                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1226
1227                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1228                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1229                                              AMDGPU_GEM_DOMAIN_VRAM,
1230                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1231                                              NULL, NULL,
1232                                              &adev->gfx.rlc.clear_state_obj);
1233                         if (r) {
1234                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1235                                 gfx_v8_0_rlc_fini(adev);
1236                                 return r;
1237                         }
1238                 }
1239                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1240                 if (unlikely(r != 0)) {
1241                         gfx_v8_0_rlc_fini(adev);
1242                         return r;
1243                 }
1244                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1245                                   &adev->gfx.rlc.clear_state_gpu_addr);
1246                 if (r) {
1247                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1248                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1249                         gfx_v8_0_rlc_fini(adev);
1250                         return r;
1251                 }
1252
1253                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1254                 if (r) {
1255                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1256                         gfx_v8_0_rlc_fini(adev);
1257                         return r;
1258                 }
1259                 /* set up the cs buffer */
1260                 dst_ptr = adev->gfx.rlc.cs_ptr;
1261                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1262                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1263                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1264         }
1265
1266         if ((adev->asic_type == CHIP_CARRIZO) ||
1267             (adev->asic_type == CHIP_STONEY)) {
1268                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1269                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1270                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1271                                              AMDGPU_GEM_DOMAIN_VRAM,
1272                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1273                                              NULL, NULL,
1274                                              &adev->gfx.rlc.cp_table_obj);
1275                         if (r) {
1276                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1277                                 return r;
1278                         }
1279                 }
1280
1281                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1282                 if (unlikely(r != 0)) {
1283                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1284                         return r;
1285                 }
1286                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1287                                   &adev->gfx.rlc.cp_table_gpu_addr);
1288                 if (r) {
1289                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1290                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1291                         return r;
1292                 }
1293                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1294                 if (r) {
1295                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1296                         return r;
1297                 }
1298
1299                 cz_init_cp_jump_table(adev);
1300
1301                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1302                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1303         }
1304
1305         return 0;
1306 }
1307
1308 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1309 {
1310         int r;
1311
1312         if (adev->gfx.mec.hpd_eop_obj) {
1313                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1314                 if (unlikely(r != 0))
1315                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1316                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1317                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1318                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1319                 adev->gfx.mec.hpd_eop_obj = NULL;
1320         }
1321 }
1322
1323 #define MEC_HPD_SIZE 2048
1324
1325 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1326 {
1327         int r;
1328         u32 *hpd;
1329
1330         /*
1331          * we assign only 1 pipe because all other pipes will
1332          * be handled by KFD
1333          */
1334         adev->gfx.mec.num_mec = 1;
1335         adev->gfx.mec.num_pipe = 1;
1336         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1337
1338         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1339                 r = amdgpu_bo_create(adev,
1340                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1341                                      PAGE_SIZE, true,
1342                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1343                                      &adev->gfx.mec.hpd_eop_obj);
1344                 if (r) {
1345                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1346                         return r;
1347                 }
1348         }
1349
1350         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1351         if (unlikely(r != 0)) {
1352                 gfx_v8_0_mec_fini(adev);
1353                 return r;
1354         }
1355         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1356                           &adev->gfx.mec.hpd_eop_gpu_addr);
1357         if (r) {
1358                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1359                 gfx_v8_0_mec_fini(adev);
1360                 return r;
1361         }
1362         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1363         if (r) {
1364                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1365                 gfx_v8_0_mec_fini(adev);
1366                 return r;
1367         }
1368
1369         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1370
1371         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1372         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1373
1374         return 0;
1375 }
1376
1377 static const u32 vgpr_init_compute_shader[] =
1378 {
1379         0x7e000209, 0x7e020208,
1380         0x7e040207, 0x7e060206,
1381         0x7e080205, 0x7e0a0204,
1382         0x7e0c0203, 0x7e0e0202,
1383         0x7e100201, 0x7e120200,
1384         0x7e140209, 0x7e160208,
1385         0x7e180207, 0x7e1a0206,
1386         0x7e1c0205, 0x7e1e0204,
1387         0x7e200203, 0x7e220202,
1388         0x7e240201, 0x7e260200,
1389         0x7e280209, 0x7e2a0208,
1390         0x7e2c0207, 0x7e2e0206,
1391         0x7e300205, 0x7e320204,
1392         0x7e340203, 0x7e360202,
1393         0x7e380201, 0x7e3a0200,
1394         0x7e3c0209, 0x7e3e0208,
1395         0x7e400207, 0x7e420206,
1396         0x7e440205, 0x7e460204,
1397         0x7e480203, 0x7e4a0202,
1398         0x7e4c0201, 0x7e4e0200,
1399         0x7e500209, 0x7e520208,
1400         0x7e540207, 0x7e560206,
1401         0x7e580205, 0x7e5a0204,
1402         0x7e5c0203, 0x7e5e0202,
1403         0x7e600201, 0x7e620200,
1404         0x7e640209, 0x7e660208,
1405         0x7e680207, 0x7e6a0206,
1406         0x7e6c0205, 0x7e6e0204,
1407         0x7e700203, 0x7e720202,
1408         0x7e740201, 0x7e760200,
1409         0x7e780209, 0x7e7a0208,
1410         0x7e7c0207, 0x7e7e0206,
1411         0xbf8a0000, 0xbf810000,
1412 };
1413
1414 static const u32 sgpr_init_compute_shader[] =
1415 {
1416         0xbe8a0100, 0xbe8c0102,
1417         0xbe8e0104, 0xbe900106,
1418         0xbe920108, 0xbe940100,
1419         0xbe960102, 0xbe980104,
1420         0xbe9a0106, 0xbe9c0108,
1421         0xbe9e0100, 0xbea00102,
1422         0xbea20104, 0xbea40106,
1423         0xbea60108, 0xbea80100,
1424         0xbeaa0102, 0xbeac0104,
1425         0xbeae0106, 0xbeb00108,
1426         0xbeb20100, 0xbeb40102,
1427         0xbeb60104, 0xbeb80106,
1428         0xbeba0108, 0xbebc0100,
1429         0xbebe0102, 0xbec00104,
1430         0xbec20106, 0xbec40108,
1431         0xbec60100, 0xbec80102,
1432         0xbee60004, 0xbee70005,
1433         0xbeea0006, 0xbeeb0007,
1434         0xbee80008, 0xbee90009,
1435         0xbefc0000, 0xbf8a0000,
1436         0xbf810000, 0x00000000,
1437 };
1438
1439 static const u32 vgpr_init_regs[] =
1440 {
1441         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1442         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1443         mmCOMPUTE_NUM_THREAD_X, 256*4,
1444         mmCOMPUTE_NUM_THREAD_Y, 1,
1445         mmCOMPUTE_NUM_THREAD_Z, 1,
1446         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1447         mmCOMPUTE_PGM_RSRC2, 20,
1448         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1449         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1450         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1451         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1452         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1453         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1454         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1455         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1456         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1457         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1458 };
1459
1460 static const u32 sgpr1_init_regs[] =
1461 {
1462         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1463         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1464         mmCOMPUTE_NUM_THREAD_X, 256*5,
1465         mmCOMPUTE_NUM_THREAD_Y, 1,
1466         mmCOMPUTE_NUM_THREAD_Z, 1,
1467         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1468         mmCOMPUTE_PGM_RSRC2, 20,
1469         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1470         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1471         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1472         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1473         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1474         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1475         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1476         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1477         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1478         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479 };
1480
1481 static const u32 sgpr2_init_regs[] =
1482 {
1483         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1484         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1485         mmCOMPUTE_NUM_THREAD_X, 256*5,
1486         mmCOMPUTE_NUM_THREAD_Y, 1,
1487         mmCOMPUTE_NUM_THREAD_Z, 1,
1488         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1489         mmCOMPUTE_PGM_RSRC2, 20,
1490         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1491         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1492         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1493         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1494         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1495         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1496         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1497         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1498         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1499         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1500 };
1501
1502 static const u32 sec_ded_counter_registers[] =
1503 {
1504         mmCPC_EDC_ATC_CNT,
1505         mmCPC_EDC_SCRATCH_CNT,
1506         mmCPC_EDC_UCODE_CNT,
1507         mmCPF_EDC_ATC_CNT,
1508         mmCPF_EDC_ROQ_CNT,
1509         mmCPF_EDC_TAG_CNT,
1510         mmCPG_EDC_ATC_CNT,
1511         mmCPG_EDC_DMA_CNT,
1512         mmCPG_EDC_TAG_CNT,
1513         mmDC_EDC_CSINVOC_CNT,
1514         mmDC_EDC_RESTORE_CNT,
1515         mmDC_EDC_STATE_CNT,
1516         mmGDS_EDC_CNT,
1517         mmGDS_EDC_GRBM_CNT,
1518         mmGDS_EDC_OA_DED,
1519         mmSPI_EDC_CNT,
1520         mmSQC_ATC_EDC_GATCL1_CNT,
1521         mmSQC_EDC_CNT,
1522         mmSQ_EDC_DED_CNT,
1523         mmSQ_EDC_INFO,
1524         mmSQ_EDC_SEC_CNT,
1525         mmTCC_EDC_CNT,
1526         mmTCP_ATC_EDC_GATCL1_CNT,
1527         mmTCP_EDC_CNT,
1528         mmTD_EDC_CNT
1529 };
1530
1531 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1532 {
1533         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1534         struct amdgpu_ib ib;
1535         struct fence *f = NULL;
1536         int r, i;
1537         u32 tmp;
1538         unsigned total_size, vgpr_offset, sgpr_offset;
1539         u64 gpu_addr;
1540
1541         /* only supported on CZ */
1542         if (adev->asic_type != CHIP_CARRIZO)
1543                 return 0;
1544
1545         /* bail if the compute ring is not ready */
1546         if (!ring->ready)
1547                 return 0;
1548
1549         tmp = RREG32(mmGB_EDC_MODE);
1550         WREG32(mmGB_EDC_MODE, 0);
1551
1552         total_size =
1553                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554         total_size +=
1555                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1556         total_size +=
1557                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1558         total_size = ALIGN(total_size, 256);
1559         vgpr_offset = total_size;
1560         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1561         sgpr_offset = total_size;
1562         total_size += sizeof(sgpr_init_compute_shader);
1563
1564         /* allocate an indirect buffer to put the commands in */
1565         memset(&ib, 0, sizeof(ib));
1566         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1567         if (r) {
1568                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1569                 return r;
1570         }
1571
1572         /* load the compute shaders */
1573         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1574                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1575
1576         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1577                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1578
1579         /* init the ib length to 0 */
1580         ib.length_dw = 0;
1581
1582         /* VGPR */
1583         /* write the register state for the compute dispatch */
1584         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1585                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1586                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1587                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1588         }
1589         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1590         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1591         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1592         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1593         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1594         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1595
1596         /* write dispatch packet */
1597         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1598         ib.ptr[ib.length_dw++] = 8; /* x */
1599         ib.ptr[ib.length_dw++] = 1; /* y */
1600         ib.ptr[ib.length_dw++] = 1; /* z */
1601         ib.ptr[ib.length_dw++] =
1602                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1603
1604         /* write CS partial flush packet */
1605         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1606         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1607
1608         /* SGPR1 */
1609         /* write the register state for the compute dispatch */
1610         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1611                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1612                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1613                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1614         }
1615         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1616         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1617         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1618         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1619         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1620         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1621
1622         /* write dispatch packet */
1623         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1624         ib.ptr[ib.length_dw++] = 8; /* x */
1625         ib.ptr[ib.length_dw++] = 1; /* y */
1626         ib.ptr[ib.length_dw++] = 1; /* z */
1627         ib.ptr[ib.length_dw++] =
1628                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1629
1630         /* write CS partial flush packet */
1631         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1632         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1633
1634         /* SGPR2 */
1635         /* write the register state for the compute dispatch */
1636         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1637                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1638                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1639                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1640         }
1641         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1642         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1643         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1644         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1645         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1646         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1647
1648         /* write dispatch packet */
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1650         ib.ptr[ib.length_dw++] = 8; /* x */
1651         ib.ptr[ib.length_dw++] = 1; /* y */
1652         ib.ptr[ib.length_dw++] = 1; /* z */
1653         ib.ptr[ib.length_dw++] =
1654                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1655
1656         /* write CS partial flush packet */
1657         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1658         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1659
1660         /* shedule the ib on the ring */
1661         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1662         if (r) {
1663                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1664                 goto fail;
1665         }
1666
1667         /* wait for the GPU to finish processing the IB */
1668         r = fence_wait(f, false);
1669         if (r) {
1670                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1671                 goto fail;
1672         }
1673
1674         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1675         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1676         WREG32(mmGB_EDC_MODE, tmp);
1677
1678         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1679         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1680         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1681
1682
1683         /* read back registers to clear the counters */
1684         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1685                 RREG32(sec_ded_counter_registers[i]);
1686
1687 fail:
1688         amdgpu_ib_free(adev, &ib, NULL);
1689         fence_put(f);
1690
1691         return r;
1692 }
1693
1694 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1695 {
1696         u32 gb_addr_config;
1697         u32 mc_shared_chmap, mc_arb_ramcfg;
1698         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1699         u32 tmp;
1700         int ret;
1701
1702         switch (adev->asic_type) {
1703         case CHIP_TOPAZ:
1704                 adev->gfx.config.max_shader_engines = 1;
1705                 adev->gfx.config.max_tile_pipes = 2;
1706                 adev->gfx.config.max_cu_per_sh = 6;
1707                 adev->gfx.config.max_sh_per_se = 1;
1708                 adev->gfx.config.max_backends_per_se = 2;
1709                 adev->gfx.config.max_texture_channel_caches = 2;
1710                 adev->gfx.config.max_gprs = 256;
1711                 adev->gfx.config.max_gs_threads = 32;
1712                 adev->gfx.config.max_hw_contexts = 8;
1713
1714                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1715                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1716                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1717                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1718                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1719                 break;
1720         case CHIP_FIJI:
1721                 adev->gfx.config.max_shader_engines = 4;
1722                 adev->gfx.config.max_tile_pipes = 16;
1723                 adev->gfx.config.max_cu_per_sh = 16;
1724                 adev->gfx.config.max_sh_per_se = 1;
1725                 adev->gfx.config.max_backends_per_se = 4;
1726                 adev->gfx.config.max_texture_channel_caches = 16;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_POLARIS11:
1738                 ret = amdgpu_atombios_get_gfx_info(adev);
1739                 if (ret)
1740                         return ret;
1741                 adev->gfx.config.max_gprs = 256;
1742                 adev->gfx.config.max_gs_threads = 32;
1743                 adev->gfx.config.max_hw_contexts = 8;
1744
1745                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1750                 break;
1751         case CHIP_POLARIS10:
1752                 ret = amdgpu_atombios_get_gfx_info(adev);
1753                 if (ret)
1754                         return ret;
1755                 adev->gfx.config.max_gprs = 256;
1756                 adev->gfx.config.max_gs_threads = 32;
1757                 adev->gfx.config.max_hw_contexts = 8;
1758
1759                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1764                 break;
1765         case CHIP_TONGA:
1766                 adev->gfx.config.max_shader_engines = 4;
1767                 adev->gfx.config.max_tile_pipes = 8;
1768                 adev->gfx.config.max_cu_per_sh = 8;
1769                 adev->gfx.config.max_sh_per_se = 1;
1770                 adev->gfx.config.max_backends_per_se = 2;
1771                 adev->gfx.config.max_texture_channel_caches = 8;
1772                 adev->gfx.config.max_gprs = 256;
1773                 adev->gfx.config.max_gs_threads = 32;
1774                 adev->gfx.config.max_hw_contexts = 8;
1775
1776                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1781                 break;
1782         case CHIP_CARRIZO:
1783                 adev->gfx.config.max_shader_engines = 1;
1784                 adev->gfx.config.max_tile_pipes = 2;
1785                 adev->gfx.config.max_sh_per_se = 1;
1786                 adev->gfx.config.max_backends_per_se = 2;
1787
1788                 switch (adev->pdev->revision) {
1789                 case 0xc4:
1790                 case 0x84:
1791                 case 0xc8:
1792                 case 0xcc:
1793                 case 0xe1:
1794                 case 0xe3:
1795                         /* B10 */
1796                         adev->gfx.config.max_cu_per_sh = 8;
1797                         break;
1798                 case 0xc5:
1799                 case 0x81:
1800                 case 0x85:
1801                 case 0xc9:
1802                 case 0xcd:
1803                 case 0xe2:
1804                 case 0xe4:
1805                         /* B8 */
1806                         adev->gfx.config.max_cu_per_sh = 6;
1807                         break;
1808                 case 0xc6:
1809                 case 0xca:
1810                 case 0xce:
1811                 case 0x88:
1812                         /* B6 */
1813                         adev->gfx.config.max_cu_per_sh = 6;
1814                         break;
1815                 case 0xc7:
1816                 case 0x87:
1817                 case 0xcb:
1818                 case 0xe5:
1819                 case 0x89:
1820                 default:
1821                         /* B4 */
1822                         adev->gfx.config.max_cu_per_sh = 4;
1823                         break;
1824                 }
1825
1826                 adev->gfx.config.max_texture_channel_caches = 2;
1827                 adev->gfx.config.max_gprs = 256;
1828                 adev->gfx.config.max_gs_threads = 32;
1829                 adev->gfx.config.max_hw_contexts = 8;
1830
1831                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1832                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1833                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1834                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1835                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1836                 break;
1837         case CHIP_STONEY:
1838                 adev->gfx.config.max_shader_engines = 1;
1839                 adev->gfx.config.max_tile_pipes = 2;
1840                 adev->gfx.config.max_sh_per_se = 1;
1841                 adev->gfx.config.max_backends_per_se = 1;
1842
1843                 switch (adev->pdev->revision) {
1844                 case 0xc0:
1845                 case 0xc1:
1846                 case 0xc2:
1847                 case 0xc4:
1848                 case 0xc8:
1849                 case 0xc9:
1850                         adev->gfx.config.max_cu_per_sh = 3;
1851                         break;
1852                 case 0xd0:
1853                 case 0xd1:
1854                 case 0xd2:
1855                 default:
1856                         adev->gfx.config.max_cu_per_sh = 2;
1857                         break;
1858                 }
1859
1860                 adev->gfx.config.max_texture_channel_caches = 2;
1861                 adev->gfx.config.max_gprs = 256;
1862                 adev->gfx.config.max_gs_threads = 16;
1863                 adev->gfx.config.max_hw_contexts = 8;
1864
1865                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1870                 break;
1871         default:
1872                 adev->gfx.config.max_shader_engines = 2;
1873                 adev->gfx.config.max_tile_pipes = 4;
1874                 adev->gfx.config.max_cu_per_sh = 2;
1875                 adev->gfx.config.max_sh_per_se = 1;
1876                 adev->gfx.config.max_backends_per_se = 2;
1877                 adev->gfx.config.max_texture_channel_caches = 4;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         }
1889
1890         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1891         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1892         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1893
1894         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1895         adev->gfx.config.mem_max_burst_length_bytes = 256;
1896         if (adev->flags & AMD_IS_APU) {
1897                 /* Get memory bank mapping mode. */
1898                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1899                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1900                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1901
1902                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1903                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1904                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1905
1906                 /* Validate settings in case only one DIMM installed. */
1907                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1908                         dimm00_addr_map = 0;
1909                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1910                         dimm01_addr_map = 0;
1911                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1912                         dimm10_addr_map = 0;
1913                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1914                         dimm11_addr_map = 0;
1915
1916                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1917                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1918                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1919                         adev->gfx.config.mem_row_size_in_kb = 2;
1920                 else
1921                         adev->gfx.config.mem_row_size_in_kb = 1;
1922         } else {
1923                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1924                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1926                         adev->gfx.config.mem_row_size_in_kb = 4;
1927         }
1928
1929         adev->gfx.config.shader_engine_tile_size = 32;
1930         adev->gfx.config.num_gpus = 1;
1931         adev->gfx.config.multi_gpu_tile_size = 64;
1932
1933         /* fix up row size */
1934         switch (adev->gfx.config.mem_row_size_in_kb) {
1935         case 1:
1936         default:
1937                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1938                 break;
1939         case 2:
1940                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1941                 break;
1942         case 4:
1943                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1944                 break;
1945         }
1946         adev->gfx.config.gb_addr_config = gb_addr_config;
1947
1948         return 0;
1949 }
1950
1951 static int gfx_v8_0_sw_init(void *handle)
1952 {
1953         int i, r;
1954         struct amdgpu_ring *ring;
1955         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1956
1957         /* EOP Event */
1958         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1959         if (r)
1960                 return r;
1961
1962         /* Privileged reg */
1963         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1964         if (r)
1965                 return r;
1966
1967         /* Privileged inst */
1968         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1969         if (r)
1970                 return r;
1971
1972         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1973
1974         gfx_v8_0_scratch_init(adev);
1975
1976         r = gfx_v8_0_init_microcode(adev);
1977         if (r) {
1978                 DRM_ERROR("Failed to load gfx firmware!\n");
1979                 return r;
1980         }
1981
1982         r = gfx_v8_0_rlc_init(adev);
1983         if (r) {
1984                 DRM_ERROR("Failed to init rlc BOs!\n");
1985                 return r;
1986         }
1987
1988         r = gfx_v8_0_mec_init(adev);
1989         if (r) {
1990                 DRM_ERROR("Failed to init MEC BOs!\n");
1991                 return r;
1992         }
1993
1994         /* set up the gfx ring */
1995         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1996                 ring = &adev->gfx.gfx_ring[i];
1997                 ring->ring_obj = NULL;
1998                 sprintf(ring->name, "gfx");
1999                 /* no gfx doorbells on iceland */
2000                 if (adev->asic_type != CHIP_TOPAZ) {
2001                         ring->use_doorbell = true;
2002                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2003                 }
2004
2005                 r = amdgpu_ring_init(adev, ring, 1024,
2006                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2007                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2008                                      AMDGPU_RING_TYPE_GFX);
2009                 if (r)
2010                         return r;
2011         }
2012
2013         /* set up the compute queues */
2014         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2015                 unsigned irq_type;
2016
2017                 /* max 32 queues per MEC */
2018                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2019                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2020                         break;
2021                 }
2022                 ring = &adev->gfx.compute_ring[i];
2023                 ring->ring_obj = NULL;
2024                 ring->use_doorbell = true;
2025                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2026                 ring->me = 1; /* first MEC */
2027                 ring->pipe = i / 8;
2028                 ring->queue = i % 8;
2029                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2030                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2031                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2032                 r = amdgpu_ring_init(adev, ring, 1024,
2033                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2034                                      &adev->gfx.eop_irq, irq_type,
2035                                      AMDGPU_RING_TYPE_COMPUTE);
2036                 if (r)
2037                         return r;
2038         }
2039
2040         /* reserve GDS, GWS and OA resource for gfx */
2041         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2042                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2043                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2044         if (r)
2045                 return r;
2046
2047         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2048                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2049                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2050         if (r)
2051                 return r;
2052
2053         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2054                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2055                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2056         if (r)
2057                 return r;
2058
2059         adev->gfx.ce_ram_size = 0x8000;
2060
2061         r = gfx_v8_0_gpu_early_init(adev);
2062         if (r)
2063                 return r;
2064
2065         return 0;
2066 }
2067
2068 static int gfx_v8_0_sw_fini(void *handle)
2069 {
2070         int i;
2071         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2072
2073         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2074         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2075         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2076
2077         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2078                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2079         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2080                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2081
2082         gfx_v8_0_mec_fini(adev);
2083         gfx_v8_0_rlc_fini(adev);
2084         gfx_v8_0_free_microcode(adev);
2085
2086         return 0;
2087 }
2088
2089 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2090 {
2091         uint32_t *modearray, *mod2array;
2092         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2093         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2094         u32 reg_offset;
2095
2096         modearray = adev->gfx.config.tile_mode_array;
2097         mod2array = adev->gfx.config.macrotile_mode_array;
2098
2099         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2100                 modearray[reg_offset] = 0;
2101
2102         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2103                 mod2array[reg_offset] = 0;
2104
2105         switch (adev->asic_type) {
2106         case CHIP_TOPAZ:
2107                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2108                                 PIPE_CONFIG(ADDR_SURF_P2) |
2109                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2110                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                 PIPE_CONFIG(ADDR_SURF_P2) |
2113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2115                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116                                 PIPE_CONFIG(ADDR_SURF_P2) |
2117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2119                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120                                 PIPE_CONFIG(ADDR_SURF_P2) |
2121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2123                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                 PIPE_CONFIG(ADDR_SURF_P2) |
2125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2127                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128                                 PIPE_CONFIG(ADDR_SURF_P2) |
2129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132                                 PIPE_CONFIG(ADDR_SURF_P2) |
2133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2136                                 PIPE_CONFIG(ADDR_SURF_P2));
2137                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2138                                 PIPE_CONFIG(ADDR_SURF_P2) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2140                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2141                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142                                  PIPE_CONFIG(ADDR_SURF_P2) |
2143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2146                                  PIPE_CONFIG(ADDR_SURF_P2) |
2147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2149                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2150                                  PIPE_CONFIG(ADDR_SURF_P2) |
2151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154                                  PIPE_CONFIG(ADDR_SURF_P2) |
2155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2157                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2158                                  PIPE_CONFIG(ADDR_SURF_P2) |
2159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2162                                  PIPE_CONFIG(ADDR_SURF_P2) |
2163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2165                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2166                                  PIPE_CONFIG(ADDR_SURF_P2) |
2167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2169                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2170                                  PIPE_CONFIG(ADDR_SURF_P2) |
2171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2173                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2174                                  PIPE_CONFIG(ADDR_SURF_P2) |
2175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2177                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2185                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2209
2210                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2213                                 NUM_BANKS(ADDR_SURF_8_BANK));
2214                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2217                                 NUM_BANKS(ADDR_SURF_8_BANK));
2218                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2221                                 NUM_BANKS(ADDR_SURF_8_BANK));
2222                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2225                                 NUM_BANKS(ADDR_SURF_8_BANK));
2226                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2229                                 NUM_BANKS(ADDR_SURF_8_BANK));
2230                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233                                 NUM_BANKS(ADDR_SURF_8_BANK));
2234                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2237                                 NUM_BANKS(ADDR_SURF_8_BANK));
2238                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2241                                 NUM_BANKS(ADDR_SURF_16_BANK));
2242                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245                                 NUM_BANKS(ADDR_SURF_16_BANK));
2246                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2247                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249                                  NUM_BANKS(ADDR_SURF_16_BANK));
2250                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253                                  NUM_BANKS(ADDR_SURF_16_BANK));
2254                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2257                                  NUM_BANKS(ADDR_SURF_16_BANK));
2258                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                  NUM_BANKS(ADDR_SURF_16_BANK));
2262                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                  NUM_BANKS(ADDR_SURF_8_BANK));
2266
2267                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2268                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2269                             reg_offset != 23)
2270                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2271
2272                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2273                         if (reg_offset != 7)
2274                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2275
2276                 break;
2277         case CHIP_FIJI:
2278                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2312                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2324                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2400
2401                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404                                 NUM_BANKS(ADDR_SURF_8_BANK));
2405                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408                                 NUM_BANKS(ADDR_SURF_8_BANK));
2409                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412                                 NUM_BANKS(ADDR_SURF_8_BANK));
2413                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                                  NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2443                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444                                  NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                  NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                  NUM_BANKS(ADDR_SURF_4_BANK));
2457
2458                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2459                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2460
2461                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2462                         if (reg_offset != 7)
2463                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2464
2465                 break;
2466         case CHIP_TONGA:
2467                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2468                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2470                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2474                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2501                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2502                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2504                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2505                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2513                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2533                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2541                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2589
2590                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2593                                 NUM_BANKS(ADDR_SURF_16_BANK));
2594                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597                                 NUM_BANKS(ADDR_SURF_16_BANK));
2598                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601                                 NUM_BANKS(ADDR_SURF_16_BANK));
2602                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2628                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2629                                  NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2633                                  NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2637                                  NUM_BANKS(ADDR_SURF_8_BANK));
2638                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                  NUM_BANKS(ADDR_SURF_4_BANK));
2642                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                  NUM_BANKS(ADDR_SURF_4_BANK));
2646
2647                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2648                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2649
2650                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651                         if (reg_offset != 7)
2652                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2653
2654                 break;
2655         case CHIP_POLARIS11:
2656                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2668                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2672                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2693                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2697                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2710                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2722                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2742                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2746                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2770                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2774                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2778
2779                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2781                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2782                                 NUM_BANKS(ADDR_SURF_16_BANK));
2783
2784                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787                                 NUM_BANKS(ADDR_SURF_16_BANK));
2788
2789                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792                                 NUM_BANKS(ADDR_SURF_16_BANK));
2793
2794                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797                                 NUM_BANKS(ADDR_SURF_16_BANK));
2798
2799                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2802                                 NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812                                 NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2817                                 NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842                                 NUM_BANKS(ADDR_SURF_8_BANK));
2843
2844                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2847                                 NUM_BANKS(ADDR_SURF_4_BANK));
2848
2849                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2850                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2851
2852                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2853                         if (reg_offset != 7)
2854                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2855
2856                 break;
2857         case CHIP_POLARIS10:
2858                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2870                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2874                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2878                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2892                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2900                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2908                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2928                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2944                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2948                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2949                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2953                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2976                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2980
2981                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2983                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984                                 NUM_BANKS(ADDR_SURF_16_BANK));
2985
2986                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2988                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2989                                 NUM_BANKS(ADDR_SURF_16_BANK));
2990
2991                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995
2996                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000
3001                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3004                                 NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3008                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3009                                 NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3039                                 NUM_BANKS(ADDR_SURF_8_BANK));
3040
3041                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3044                                 NUM_BANKS(ADDR_SURF_4_BANK));
3045
3046                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049                                 NUM_BANKS(ADDR_SURF_4_BANK));
3050
3051                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3052                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3053
3054                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3055                         if (reg_offset != 7)
3056                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3057
3058                 break;
3059         case CHIP_STONEY:
3060                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3061                                 PIPE_CONFIG(ADDR_SURF_P2) |
3062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3065                                 PIPE_CONFIG(ADDR_SURF_P2) |
3066                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3068                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3069                                 PIPE_CONFIG(ADDR_SURF_P2) |
3070                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3072                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3073                                 PIPE_CONFIG(ADDR_SURF_P2) |
3074                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3076                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077                                 PIPE_CONFIG(ADDR_SURF_P2) |
3078                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3080                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P2) |
3082                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P2) |
3086                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2));
3090                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3091                                 PIPE_CONFIG(ADDR_SURF_P2) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3093                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3095                                  PIPE_CONFIG(ADDR_SURF_P2) |
3096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3098                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099                                  PIPE_CONFIG(ADDR_SURF_P2) |
3100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3106                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3110                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3118                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3130                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3134                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3138                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3162
3163                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3166                                 NUM_BANKS(ADDR_SURF_8_BANK));
3167                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                 NUM_BANKS(ADDR_SURF_8_BANK));
3171                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3174                                 NUM_BANKS(ADDR_SURF_8_BANK));
3175                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                 NUM_BANKS(ADDR_SURF_8_BANK));
3183                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3186                                 NUM_BANKS(ADDR_SURF_8_BANK));
3187                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190                                 NUM_BANKS(ADDR_SURF_8_BANK));
3191                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194                                 NUM_BANKS(ADDR_SURF_16_BANK));
3195                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198                                 NUM_BANKS(ADDR_SURF_16_BANK));
3199                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3200                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3201                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3202                                  NUM_BANKS(ADDR_SURF_16_BANK));
3203                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3204                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3205                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206                                  NUM_BANKS(ADDR_SURF_16_BANK));
3207                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210                                  NUM_BANKS(ADDR_SURF_16_BANK));
3211                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214                                  NUM_BANKS(ADDR_SURF_16_BANK));
3215                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218                                  NUM_BANKS(ADDR_SURF_8_BANK));
3219
3220                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3221                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3222                             reg_offset != 23)
3223                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3224
3225                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3226                         if (reg_offset != 7)
3227                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3228
3229                 break;
3230         default:
3231                 dev_warn(adev->dev,
3232                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3233                          adev->asic_type);
3234
3235         case CHIP_CARRIZO:
3236                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237                                 PIPE_CONFIG(ADDR_SURF_P2) |
3238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241                                 PIPE_CONFIG(ADDR_SURF_P2) |
3242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3245                                 PIPE_CONFIG(ADDR_SURF_P2) |
3246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3249                                 PIPE_CONFIG(ADDR_SURF_P2) |
3250                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3252                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253                                 PIPE_CONFIG(ADDR_SURF_P2) |
3254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3256                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3257                                 PIPE_CONFIG(ADDR_SURF_P2) |
3258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3260                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                 PIPE_CONFIG(ADDR_SURF_P2) |
3262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3264                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3265                                 PIPE_CONFIG(ADDR_SURF_P2));
3266                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3269                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3278                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3279                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3282                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                  PIPE_CONFIG(ADDR_SURF_P2) |
3284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3286                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3287                                  PIPE_CONFIG(ADDR_SURF_P2) |
3288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3294                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3314                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3322                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3323                                  PIPE_CONFIG(ADDR_SURF_P2) |
3324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3327                                  PIPE_CONFIG(ADDR_SURF_P2) |
3328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3331                                  PIPE_CONFIG(ADDR_SURF_P2) |
3332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3334                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3335                                  PIPE_CONFIG(ADDR_SURF_P2) |
3336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3338
3339                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342                                 NUM_BANKS(ADDR_SURF_8_BANK));
3343                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346                                 NUM_BANKS(ADDR_SURF_8_BANK));
3347                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350                                 NUM_BANKS(ADDR_SURF_8_BANK));
3351                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3354                                 NUM_BANKS(ADDR_SURF_8_BANK));
3355                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3358                                 NUM_BANKS(ADDR_SURF_8_BANK));
3359                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3362                                 NUM_BANKS(ADDR_SURF_8_BANK));
3363                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3366                                 NUM_BANKS(ADDR_SURF_8_BANK));
3367                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370                                 NUM_BANKS(ADDR_SURF_16_BANK));
3371                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374                                 NUM_BANKS(ADDR_SURF_16_BANK));
3375                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3378                                  NUM_BANKS(ADDR_SURF_16_BANK));
3379                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382                                  NUM_BANKS(ADDR_SURF_16_BANK));
3383                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3385                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386                                  NUM_BANKS(ADDR_SURF_16_BANK));
3387                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390                                  NUM_BANKS(ADDR_SURF_16_BANK));
3391                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394                                  NUM_BANKS(ADDR_SURF_8_BANK));
3395
3396                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3397                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3398                             reg_offset != 23)
3399                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3400
3401                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3402                         if (reg_offset != 7)
3403                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3404
3405                 break;
3406         }
3407 }
3408
3409 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3410                                   u32 se_num, u32 sh_num, u32 instance)
3411 {
3412         u32 data;
3413
3414         if (instance == 0xffffffff)
3415                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3416         else
3417                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3418
3419         if (se_num == 0xffffffff)
3420                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3421         else
3422                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3423
3424         if (sh_num == 0xffffffff)
3425                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3426         else
3427                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3428
3429         WREG32(mmGRBM_GFX_INDEX, data);
3430 }
3431
3432 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3433 {
3434         return (u32)((1ULL << bit_width) - 1);
3435 }
3436
3437 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3438 {
3439         u32 data, mask;
3440
3441         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3442                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3443
3444         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3445
3446         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3447                                        adev->gfx.config.max_sh_per_se);
3448
3449         return (~data) & mask;
3450 }
3451
3452 static void
3453 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3454 {
3455         switch (adev->asic_type) {
3456         case CHIP_FIJI:
3457                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3458                           RB_XSEL2(1) | PKR_MAP(2) |
3459                           PKR_XSEL(1) | PKR_YSEL(1) |
3460                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3461                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3462                            SE_PAIR_YSEL(2);
3463                 break;
3464         case CHIP_TONGA:
3465         case CHIP_POLARIS10:
3466                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3467                           SE_XSEL(1) | SE_YSEL(1);
3468                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3469                            SE_PAIR_YSEL(2);
3470                 break;
3471         case CHIP_TOPAZ:
3472         case CHIP_CARRIZO:
3473                 *rconf |= RB_MAP_PKR0(2);
3474                 *rconf1 |= 0x0;
3475                 break;
3476         case CHIP_POLARIS11:
3477                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3478                           SE_XSEL(1) | SE_YSEL(1);
3479                 *rconf1 |= 0x0;
3480                 break;
3481         case CHIP_STONEY:
3482                 *rconf |= 0x0;
3483                 *rconf1 |= 0x0;
3484                 break;
3485         default:
3486                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3487                 break;
3488         }
3489 }
3490
3491 static void
3492 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3493                                         u32 raster_config, u32 raster_config_1,
3494                                         unsigned rb_mask, unsigned num_rb)
3495 {
3496         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3497         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3498         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3499         unsigned rb_per_se = num_rb / num_se;
3500         unsigned se_mask[4];
3501         unsigned se;
3502
3503         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3504         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3505         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3506         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3507
3508         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3509         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3510         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3511
3512         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3513                              (!se_mask[2] && !se_mask[3]))) {
3514                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3515
3516                 if (!se_mask[0] && !se_mask[1]) {
3517                         raster_config_1 |=
3518                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3519                 } else {
3520                         raster_config_1 |=
3521                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3522                 }
3523         }
3524
3525         for (se = 0; se < num_se; se++) {
3526                 unsigned raster_config_se = raster_config;
3527                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3528                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3529                 int idx = (se / 2) * 2;
3530
3531                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3532                         raster_config_se &= ~SE_MAP_MASK;
3533
3534                         if (!se_mask[idx]) {
3535                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3536                         } else {
3537                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3538                         }
3539                 }
3540
3541                 pkr0_mask &= rb_mask;
3542                 pkr1_mask &= rb_mask;
3543                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3544                         raster_config_se &= ~PKR_MAP_MASK;
3545
3546                         if (!pkr0_mask) {
3547                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3548                         } else {
3549                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3550                         }
3551                 }
3552
3553                 if (rb_per_se >= 2) {
3554                         unsigned rb0_mask = 1 << (se * rb_per_se);
3555                         unsigned rb1_mask = rb0_mask << 1;
3556
3557                         rb0_mask &= rb_mask;
3558                         rb1_mask &= rb_mask;
3559                         if (!rb0_mask || !rb1_mask) {
3560                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3561
3562                                 if (!rb0_mask) {
3563                                         raster_config_se |=
3564                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3565                                 } else {
3566                                         raster_config_se |=
3567                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3568                                 }
3569                         }
3570
3571                         if (rb_per_se > 2) {
3572                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3573                                 rb1_mask = rb0_mask << 1;
3574                                 rb0_mask &= rb_mask;
3575                                 rb1_mask &= rb_mask;
3576                                 if (!rb0_mask || !rb1_mask) {
3577                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3578
3579                                         if (!rb0_mask) {
3580                                                 raster_config_se |=
3581                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3582                                         } else {
3583                                                 raster_config_se |=
3584                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3585                                         }
3586                                 }
3587                         }
3588                 }
3589
3590                 /* GRBM_GFX_INDEX has a different offset on VI */
3591                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3592                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3593                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3594         }
3595
3596         /* GRBM_GFX_INDEX has a different offset on VI */
3597         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3598 }
3599
3600 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3601 {
3602         int i, j;
3603         u32 data;
3604         u32 raster_config = 0, raster_config_1 = 0;
3605         u32 active_rbs = 0;
3606         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3607                                         adev->gfx.config.max_sh_per_se;
3608         unsigned num_rb_pipes;
3609
3610         mutex_lock(&adev->grbm_idx_mutex);
3611         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3612                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3613                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3614                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3615                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3616                                                rb_bitmap_width_per_sh);
3617                 }
3618         }
3619         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3620
3621         adev->gfx.config.backend_enable_mask = active_rbs;
3622         adev->gfx.config.num_rbs = hweight32(active_rbs);
3623
3624         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3625                              adev->gfx.config.max_shader_engines, 16);
3626
3627         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3628
3629         if (!adev->gfx.config.backend_enable_mask ||
3630                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3631                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3632                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3633         } else {
3634                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3635                                                         adev->gfx.config.backend_enable_mask,
3636                                                         num_rb_pipes);
3637         }
3638
3639         mutex_unlock(&adev->grbm_idx_mutex);
3640 }
3641
3642 /**
3643  * gfx_v8_0_init_compute_vmid - gart enable
3644  *
3645  * @rdev: amdgpu_device pointer
3646  *
3647  * Initialize compute vmid sh_mem registers
3648  *
3649  */
3650 #define DEFAULT_SH_MEM_BASES    (0x6000)
3651 #define FIRST_COMPUTE_VMID      (8)
3652 #define LAST_COMPUTE_VMID       (16)
3653 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3654 {
3655         int i;
3656         uint32_t sh_mem_config;
3657         uint32_t sh_mem_bases;
3658
3659         /*
3660          * Configure apertures:
3661          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3662          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3663          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3664          */
3665         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3666
3667         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3668                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3669                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3670                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3671                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3672                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3673
3674         mutex_lock(&adev->srbm_mutex);
3675         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3676                 vi_srbm_select(adev, 0, 0, 0, i);
3677                 /* CP and shaders */
3678                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3679                 WREG32(mmSH_MEM_APE1_BASE, 1);
3680                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3681                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3682         }
3683         vi_srbm_select(adev, 0, 0, 0, 0);
3684         mutex_unlock(&adev->srbm_mutex);
3685 }
3686
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3688 {
3689         u32 tmp;
3690         int i;
3691
3692         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3696
3697         gfx_v8_0_tiling_mode_table_init(adev);
3698         gfx_v8_0_setup_rb(adev);
3699         gfx_v8_0_get_cu_info(adev);
3700
3701         /* XXX SH_MEM regs */
3702         /* where to put LDS, scratch, GPUVM in FSA64 space */
3703         mutex_lock(&adev->srbm_mutex);
3704         for (i = 0; i < 16; i++) {
3705                 vi_srbm_select(adev, 0, 0, 0, i);
3706                 /* CP and shaders */
3707                 if (i == 0) {
3708                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3709                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3710                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3711                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3712                         WREG32(mmSH_MEM_CONFIG, tmp);
3713                 } else {
3714                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3715                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3716                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3717                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3718                         WREG32(mmSH_MEM_CONFIG, tmp);
3719                 }
3720
3721                 WREG32(mmSH_MEM_APE1_BASE, 1);
3722                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3723                 WREG32(mmSH_MEM_BASES, 0);
3724         }
3725         vi_srbm_select(adev, 0, 0, 0, 0);
3726         mutex_unlock(&adev->srbm_mutex);
3727
3728         gfx_v8_0_init_compute_vmid(adev);
3729
3730         mutex_lock(&adev->grbm_idx_mutex);
3731         /*
3732          * making sure that the following register writes will be broadcasted
3733          * to all the shaders
3734          */
3735         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3736
3737         WREG32(mmPA_SC_FIFO_SIZE,
3738                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3739                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3740                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3741                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3742                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3743                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3744                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3745                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3746         mutex_unlock(&adev->grbm_idx_mutex);
3747
3748 }
3749
3750 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3751 {
3752         u32 i, j, k;
3753         u32 mask;
3754
3755         mutex_lock(&adev->grbm_idx_mutex);
3756         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3757                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3758                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3759                         for (k = 0; k < adev->usec_timeout; k++) {
3760                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3761                                         break;
3762                                 udelay(1);
3763                         }
3764                 }
3765         }
3766         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3767         mutex_unlock(&adev->grbm_idx_mutex);
3768
3769         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3770                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3771                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3772                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3773         for (k = 0; k < adev->usec_timeout; k++) {
3774                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3775                         break;
3776                 udelay(1);
3777         }
3778 }
3779
3780 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3781                                                bool enable)
3782 {
3783         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3784
3785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3786         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3787         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3788         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3789
3790         WREG32(mmCP_INT_CNTL_RING0, tmp);
3791 }
3792
3793 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3794 {
3795         /* csib */
3796         WREG32(mmRLC_CSIB_ADDR_HI,
3797                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3798         WREG32(mmRLC_CSIB_ADDR_LO,
3799                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3800         WREG32(mmRLC_CSIB_LENGTH,
3801                         adev->gfx.rlc.clear_state_size);
3802 }
3803
3804 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3805                                 int ind_offset,
3806                                 int list_size,
3807                                 int *unique_indices,
3808                                 int *indices_count,
3809                                 int max_indices,
3810                                 int *ind_start_offsets,
3811                                 int *offset_count,
3812                                 int max_offset)
3813 {
3814         int indices;
3815         bool new_entry = true;
3816
3817         for (; ind_offset < list_size; ind_offset++) {
3818
3819                 if (new_entry) {
3820                         new_entry = false;
3821                         ind_start_offsets[*offset_count] = ind_offset;
3822                         *offset_count = *offset_count + 1;
3823                         BUG_ON(*offset_count >= max_offset);
3824                 }
3825
3826                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3827                         new_entry = true;
3828                         continue;
3829                 }
3830
3831                 ind_offset += 2;
3832
3833                 /* look for the matching indice */
3834                 for (indices = 0;
3835                         indices < *indices_count;
3836                         indices++) {
3837                         if (unique_indices[indices] ==
3838                                 register_list_format[ind_offset])
3839                                 break;
3840                 }
3841
3842                 if (indices >= *indices_count) {
3843                         unique_indices[*indices_count] =
3844                                 register_list_format[ind_offset];
3845                         indices = *indices_count;
3846                         *indices_count = *indices_count + 1;
3847                         BUG_ON(*indices_count >= max_indices);
3848                 }
3849
3850                 register_list_format[ind_offset] = indices;
3851         }
3852 }
3853
3854 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3855 {
3856         int i, temp, data;
3857         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3858         int indices_count = 0;
3859         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3860         int offset_count = 0;
3861
3862         int list_size;
3863         unsigned int *register_list_format =
3864                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3865         if (register_list_format == NULL)
3866                 return -ENOMEM;
3867         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3868                         adev->gfx.rlc.reg_list_format_size_bytes);
3869
3870         gfx_v8_0_parse_ind_reg_list(register_list_format,
3871                                 RLC_FormatDirectRegListLength,
3872                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3873                                 unique_indices,
3874                                 &indices_count,
3875                                 sizeof(unique_indices) / sizeof(int),
3876                                 indirect_start_offsets,
3877                                 &offset_count,
3878                                 sizeof(indirect_start_offsets)/sizeof(int));
3879
3880         /* save and restore list */
3881         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3882
3883         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3884         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3885                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3886
3887         /* indirect list */
3888         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3889         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3890                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3891
3892         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3893         list_size = list_size >> 1;
3894         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3895         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3896
3897         /* starting offsets starts */
3898         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3899                 adev->gfx.rlc.starting_offsets_start);
3900         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3901                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3902                                 indirect_start_offsets[i]);
3903
3904         /* unique indices */
3905         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3906         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3907         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3908                 if (unique_indices[i] != 0) {
3909                         amdgpu_mm_wreg(adev, temp + i,
3910                                         unique_indices[i] & 0x3FFFF, false);
3911                         amdgpu_mm_wreg(adev, data + i,
3912                                         unique_indices[i] >> 20, false);
3913                 }
3914         }
3915         kfree(register_list_format);
3916
3917         return 0;
3918 }
3919
3920 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3921 {
3922         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3923 }
3924
3925 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3926 {
3927         uint32_t data;
3928
3929         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3930                               AMD_PG_SUPPORT_GFX_SMG |
3931                               AMD_PG_SUPPORT_GFX_DMG)) {
3932                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3933
3934                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3935                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3936                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3937                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3938                 WREG32(mmRLC_PG_DELAY, data);
3939
3940                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3941                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3942         }
3943 }
3944
3945 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3946                                                 bool enable)
3947 {
3948         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3949 }
3950
3951 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3952                                                   bool enable)
3953 {
3954         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3955 }
3956
3957 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3958 {
3959         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3960 }
3961
3962 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3963 {
3964         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3965                               AMD_PG_SUPPORT_GFX_SMG |
3966                               AMD_PG_SUPPORT_GFX_DMG |
3967                               AMD_PG_SUPPORT_CP |
3968                               AMD_PG_SUPPORT_GDS |
3969                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3970                 gfx_v8_0_init_csb(adev);
3971                 gfx_v8_0_init_save_restore_list(adev);
3972                 gfx_v8_0_enable_save_restore_machine(adev);
3973
3974                 if ((adev->asic_type == CHIP_CARRIZO) ||
3975                     (adev->asic_type == CHIP_STONEY)) {
3976                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3977                         gfx_v8_0_init_power_gating(adev);
3978                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3979                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3980                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3981                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3982                         } else {
3983                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3984                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3985                         }
3986                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3987                                 cz_enable_cp_power_gating(adev, true);
3988                         else
3989                                 cz_enable_cp_power_gating(adev, false);
3990                 } else if (adev->asic_type == CHIP_POLARIS11) {
3991                         gfx_v8_0_init_power_gating(adev);
3992                 }
3993         }
3994 }
3995
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3997 {
3998         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3999
4000         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001         gfx_v8_0_wait_for_rlc_serdes(adev);
4002 }
4003
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4005 {
4006         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4007         udelay(50);
4008
4009         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4010         udelay(50);
4011 }
4012
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4014 {
4015         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4016
4017         /* carrizo do enable cp interrupt after cp inited */
4018         if (!(adev->flags & AMD_IS_APU))
4019                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4020
4021         udelay(50);
4022 }
4023
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4025 {
4026         const struct rlc_firmware_header_v2_0 *hdr;
4027         const __le32 *fw_data;
4028         unsigned i, fw_size;
4029
4030         if (!adev->gfx.rlc_fw)
4031                 return -EINVAL;
4032
4033         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4035
4036         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4039
4040         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041         for (i = 0; i < fw_size; i++)
4042                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4044
4045         return 0;
4046 }
4047
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4049 {
4050         int r;
4051         u32 tmp;
4052
4053         gfx_v8_0_rlc_stop(adev);
4054
4055         /* disable CG */
4056         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060         if (adev->asic_type == CHIP_POLARIS11 ||
4061             adev->asic_type == CHIP_POLARIS10) {
4062                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4063                 tmp &= ~0x3;
4064                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4065         }
4066
4067         /* disable PG */
4068         WREG32(mmRLC_PG_CNTL, 0);
4069
4070         gfx_v8_0_rlc_reset(adev);
4071         gfx_v8_0_init_pg(adev);
4072
4073         if (!adev->pp_enabled) {
4074                 if (!adev->firmware.smu_load) {
4075                         /* legacy rlc firmware loading */
4076                         r = gfx_v8_0_rlc_load_microcode(adev);
4077                         if (r)
4078                                 return r;
4079                 } else {
4080                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4081                                                         AMDGPU_UCODE_ID_RLC_G);
4082                         if (r)
4083                                 return -EINVAL;
4084                 }
4085         }
4086
4087         gfx_v8_0_rlc_start(adev);
4088
4089         return 0;
4090 }
4091
4092 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4093 {
4094         int i;
4095         u32 tmp = RREG32(mmCP_ME_CNTL);
4096
4097         if (enable) {
4098                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4099                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4100                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4101         } else {
4102                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4103                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4104                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4105                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4106                         adev->gfx.gfx_ring[i].ready = false;
4107         }
4108         WREG32(mmCP_ME_CNTL, tmp);
4109         udelay(50);
4110 }
4111
4112 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4113 {
4114         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4115         const struct gfx_firmware_header_v1_0 *ce_hdr;
4116         const struct gfx_firmware_header_v1_0 *me_hdr;
4117         const __le32 *fw_data;
4118         unsigned i, fw_size;
4119
4120         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4121                 return -EINVAL;
4122
4123         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4124                 adev->gfx.pfp_fw->data;
4125         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4126                 adev->gfx.ce_fw->data;
4127         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4128                 adev->gfx.me_fw->data;
4129
4130         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4131         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4132         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4133
4134         gfx_v8_0_cp_gfx_enable(adev, false);
4135
4136         /* PFP */
4137         fw_data = (const __le32 *)
4138                 (adev->gfx.pfp_fw->data +
4139                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4140         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4141         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4142         for (i = 0; i < fw_size; i++)
4143                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4144         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4145
4146         /* CE */
4147         fw_data = (const __le32 *)
4148                 (adev->gfx.ce_fw->data +
4149                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4150         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4151         WREG32(mmCP_CE_UCODE_ADDR, 0);
4152         for (i = 0; i < fw_size; i++)
4153                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4154         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4155
4156         /* ME */
4157         fw_data = (const __le32 *)
4158                 (adev->gfx.me_fw->data +
4159                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4160         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4161         WREG32(mmCP_ME_RAM_WADDR, 0);
4162         for (i = 0; i < fw_size; i++)
4163                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4164         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4165
4166         return 0;
4167 }
4168
4169 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4170 {
4171         u32 count = 0;
4172         const struct cs_section_def *sect = NULL;
4173         const struct cs_extent_def *ext = NULL;
4174
4175         /* begin clear state */
4176         count += 2;
4177         /* context control state */
4178         count += 3;
4179
4180         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4181                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4182                         if (sect->id == SECT_CONTEXT)
4183                                 count += 2 + ext->reg_count;
4184                         else
4185                                 return 0;
4186                 }
4187         }
4188         /* pa_sc_raster_config/pa_sc_raster_config1 */
4189         count += 4;
4190         /* end clear state */
4191         count += 2;
4192         /* clear state */
4193         count += 2;
4194
4195         return count;
4196 }
4197
4198 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4199 {
4200         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4201         const struct cs_section_def *sect = NULL;
4202         const struct cs_extent_def *ext = NULL;
4203         int r, i;
4204
4205         /* init the CP */
4206         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4207         WREG32(mmCP_ENDIAN_SWAP, 0);
4208         WREG32(mmCP_DEVICE_ID, 1);
4209
4210         gfx_v8_0_cp_gfx_enable(adev, true);
4211
4212         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4213         if (r) {
4214                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4215                 return r;
4216         }
4217
4218         /* clear state buffer */
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4221
4222         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4223         amdgpu_ring_write(ring, 0x80000000);
4224         amdgpu_ring_write(ring, 0x80000000);
4225
4226         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4227                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4228                         if (sect->id == SECT_CONTEXT) {
4229                                 amdgpu_ring_write(ring,
4230                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4231                                                ext->reg_count));
4232                                 amdgpu_ring_write(ring,
4233                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4234                                 for (i = 0; i < ext->reg_count; i++)
4235                                         amdgpu_ring_write(ring, ext->extent[i]);
4236                         }
4237                 }
4238         }
4239
4240         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4241         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4242         switch (adev->asic_type) {
4243         case CHIP_TONGA:
4244         case CHIP_POLARIS10:
4245                 amdgpu_ring_write(ring, 0x16000012);
4246                 amdgpu_ring_write(ring, 0x0000002A);
4247                 break;
4248         case CHIP_POLARIS11:
4249                 amdgpu_ring_write(ring, 0x16000012);
4250                 amdgpu_ring_write(ring, 0x00000000);
4251                 break;
4252         case CHIP_FIJI:
4253                 amdgpu_ring_write(ring, 0x3a00161a);
4254                 amdgpu_ring_write(ring, 0x0000002e);
4255                 break;
4256         case CHIP_CARRIZO:
4257                 amdgpu_ring_write(ring, 0x00000002);
4258                 amdgpu_ring_write(ring, 0x00000000);
4259                 break;
4260         case CHIP_TOPAZ:
4261                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4262                                 0x00000000 : 0x00000002);
4263                 amdgpu_ring_write(ring, 0x00000000);
4264                 break;
4265         case CHIP_STONEY:
4266                 amdgpu_ring_write(ring, 0x00000000);
4267                 amdgpu_ring_write(ring, 0x00000000);
4268                 break;
4269         default:
4270                 BUG();
4271         }
4272
4273         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4274         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4275
4276         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4277         amdgpu_ring_write(ring, 0);
4278
4279         /* init the CE partitions */
4280         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4281         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4282         amdgpu_ring_write(ring, 0x8000);
4283         amdgpu_ring_write(ring, 0x8000);
4284
4285         amdgpu_ring_commit(ring);
4286
4287         return 0;
4288 }
4289
4290 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4291 {
4292         struct amdgpu_ring *ring;
4293         u32 tmp;
4294         u32 rb_bufsz;
4295         u64 rb_addr, rptr_addr;
4296         int r;
4297
4298         /* Set the write pointer delay */
4299         WREG32(mmCP_RB_WPTR_DELAY, 0);
4300
4301         /* set the RB to use vmid 0 */
4302         WREG32(mmCP_RB_VMID, 0);
4303
4304         /* Set ring buffer size */
4305         ring = &adev->gfx.gfx_ring[0];
4306         rb_bufsz = order_base_2(ring->ring_size / 8);
4307         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4308         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4309         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4310         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4311 #ifdef __BIG_ENDIAN
4312         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4313 #endif
4314         WREG32(mmCP_RB0_CNTL, tmp);
4315
4316         /* Initialize the ring buffer's read and write pointers */
4317         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4318         ring->wptr = 0;
4319         WREG32(mmCP_RB0_WPTR, ring->wptr);
4320
4321         /* set the wb address wether it's enabled or not */
4322         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4323         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4324         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4325
4326         mdelay(1);
4327         WREG32(mmCP_RB0_CNTL, tmp);
4328
4329         rb_addr = ring->gpu_addr >> 8;
4330         WREG32(mmCP_RB0_BASE, rb_addr);
4331         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4332
4333         /* no gfx doorbells on iceland */
4334         if (adev->asic_type != CHIP_TOPAZ) {
4335                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4336                 if (ring->use_doorbell) {
4337                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4338                                             DOORBELL_OFFSET, ring->doorbell_index);
4339                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4340                                             DOORBELL_HIT, 0);
4341                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4342                                             DOORBELL_EN, 1);
4343                 } else {
4344                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4345                                             DOORBELL_EN, 0);
4346                 }
4347                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4348
4349                 if (adev->asic_type == CHIP_TONGA) {
4350                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4351                                             DOORBELL_RANGE_LOWER,
4352                                             AMDGPU_DOORBELL_GFX_RING0);
4353                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4354
4355                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4356                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4357                 }
4358
4359         }
4360
4361         /* start the ring */
4362         gfx_v8_0_cp_gfx_start(adev);
4363         ring->ready = true;
4364         r = amdgpu_ring_test_ring(ring);
4365         if (r)
4366                 ring->ready = false;
4367
4368         return r;
4369 }
4370
4371 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4372 {
4373         int i;
4374
4375         if (enable) {
4376                 WREG32(mmCP_MEC_CNTL, 0);
4377         } else {
4378                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4379                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4380                         adev->gfx.compute_ring[i].ready = false;
4381         }
4382         udelay(50);
4383 }
4384
4385 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4386 {
4387         const struct gfx_firmware_header_v1_0 *mec_hdr;
4388         const __le32 *fw_data;
4389         unsigned i, fw_size;
4390
4391         if (!adev->gfx.mec_fw)
4392                 return -EINVAL;
4393
4394         gfx_v8_0_cp_compute_enable(adev, false);
4395
4396         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4397         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4398
4399         fw_data = (const __le32 *)
4400                 (adev->gfx.mec_fw->data +
4401                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4402         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4403
4404         /* MEC1 */
4405         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4406         for (i = 0; i < fw_size; i++)
4407                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4408         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4409
4410         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4411         if (adev->gfx.mec2_fw) {
4412                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4413
4414                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4415                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4416
4417                 fw_data = (const __le32 *)
4418                         (adev->gfx.mec2_fw->data +
4419                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4420                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4421
4422                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4423                 for (i = 0; i < fw_size; i++)
4424                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4425                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4426         }
4427
4428         return 0;
4429 }
4430
4431 struct vi_mqd {
4432         uint32_t header;  /* ordinal0 */
4433         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4434         uint32_t compute_dim_x;  /* ordinal2 */
4435         uint32_t compute_dim_y;  /* ordinal3 */
4436         uint32_t compute_dim_z;  /* ordinal4 */
4437         uint32_t compute_start_x;  /* ordinal5 */
4438         uint32_t compute_start_y;  /* ordinal6 */
4439         uint32_t compute_start_z;  /* ordinal7 */
4440         uint32_t compute_num_thread_x;  /* ordinal8 */
4441         uint32_t compute_num_thread_y;  /* ordinal9 */
4442         uint32_t compute_num_thread_z;  /* ordinal10 */
4443         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4444         uint32_t compute_perfcount_enable;  /* ordinal12 */
4445         uint32_t compute_pgm_lo;  /* ordinal13 */
4446         uint32_t compute_pgm_hi;  /* ordinal14 */
4447         uint32_t compute_tba_lo;  /* ordinal15 */
4448         uint32_t compute_tba_hi;  /* ordinal16 */
4449         uint32_t compute_tma_lo;  /* ordinal17 */
4450         uint32_t compute_tma_hi;  /* ordinal18 */
4451         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4452         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4453         uint32_t compute_vmid;  /* ordinal21 */
4454         uint32_t compute_resource_limits;  /* ordinal22 */
4455         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4456         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4457         uint32_t compute_tmpring_size;  /* ordinal25 */
4458         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4459         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4460         uint32_t compute_restart_x;  /* ordinal28 */
4461         uint32_t compute_restart_y;  /* ordinal29 */
4462         uint32_t compute_restart_z;  /* ordinal30 */
4463         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4464         uint32_t compute_misc_reserved;  /* ordinal32 */
4465         uint32_t compute_dispatch_id;  /* ordinal33 */
4466         uint32_t compute_threadgroup_id;  /* ordinal34 */
4467         uint32_t compute_relaunch;  /* ordinal35 */
4468         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4469         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4470         uint32_t compute_wave_restore_control;  /* ordinal38 */
4471         uint32_t reserved9;  /* ordinal39 */
4472         uint32_t reserved10;  /* ordinal40 */
4473         uint32_t reserved11;  /* ordinal41 */
4474         uint32_t reserved12;  /* ordinal42 */
4475         uint32_t reserved13;  /* ordinal43 */
4476         uint32_t reserved14;  /* ordinal44 */
4477         uint32_t reserved15;  /* ordinal45 */
4478         uint32_t reserved16;  /* ordinal46 */
4479         uint32_t reserved17;  /* ordinal47 */
4480         uint32_t reserved18;  /* ordinal48 */
4481         uint32_t reserved19;  /* ordinal49 */
4482         uint32_t reserved20;  /* ordinal50 */
4483         uint32_t reserved21;  /* ordinal51 */
4484         uint32_t reserved22;  /* ordinal52 */
4485         uint32_t reserved23;  /* ordinal53 */
4486         uint32_t reserved24;  /* ordinal54 */
4487         uint32_t reserved25;  /* ordinal55 */
4488         uint32_t reserved26;  /* ordinal56 */
4489         uint32_t reserved27;  /* ordinal57 */
4490         uint32_t reserved28;  /* ordinal58 */
4491         uint32_t reserved29;  /* ordinal59 */
4492         uint32_t reserved30;  /* ordinal60 */
4493         uint32_t reserved31;  /* ordinal61 */
4494         uint32_t reserved32;  /* ordinal62 */
4495         uint32_t reserved33;  /* ordinal63 */
4496         uint32_t reserved34;  /* ordinal64 */
4497         uint32_t compute_user_data_0;  /* ordinal65 */
4498         uint32_t compute_user_data_1;  /* ordinal66 */
4499         uint32_t compute_user_data_2;  /* ordinal67 */
4500         uint32_t compute_user_data_3;  /* ordinal68 */
4501         uint32_t compute_user_data_4;  /* ordinal69 */
4502         uint32_t compute_user_data_5;  /* ordinal70 */
4503         uint32_t compute_user_data_6;  /* ordinal71 */
4504         uint32_t compute_user_data_7;  /* ordinal72 */
4505         uint32_t compute_user_data_8;  /* ordinal73 */
4506         uint32_t compute_user_data_9;  /* ordinal74 */
4507         uint32_t compute_user_data_10;  /* ordinal75 */
4508         uint32_t compute_user_data_11;  /* ordinal76 */
4509         uint32_t compute_user_data_12;  /* ordinal77 */
4510         uint32_t compute_user_data_13;  /* ordinal78 */
4511         uint32_t compute_user_data_14;  /* ordinal79 */
4512         uint32_t compute_user_data_15;  /* ordinal80 */
4513         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4514         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4515         uint32_t reserved35;  /* ordinal83 */
4516         uint32_t reserved36;  /* ordinal84 */
4517         uint32_t reserved37;  /* ordinal85 */
4518         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4519         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4520         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4521         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4522         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4523         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4524         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4525         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4526         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4527         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4528         uint32_t reserved38;  /* ordinal96 */
4529         uint32_t reserved39;  /* ordinal97 */
4530         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4531         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4532         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4533         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4534         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4535         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4536         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4537         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4538         uint32_t reserved40;  /* ordinal106 */
4539         uint32_t reserved41;  /* ordinal107 */
4540         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4541         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4542         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4543         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4544         uint32_t reserved42;  /* ordinal112 */
4545         uint32_t reserved43;  /* ordinal113 */
4546         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4547         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4548         uint32_t cp_packet_id_lo;  /* ordinal116 */
4549         uint32_t cp_packet_id_hi;  /* ordinal117 */
4550         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4551         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4552         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4553         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4554         uint32_t gds_save_mask_lo;  /* ordinal122 */
4555         uint32_t gds_save_mask_hi;  /* ordinal123 */
4556         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4557         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4558         uint32_t reserved44;  /* ordinal126 */
4559         uint32_t reserved45;  /* ordinal127 */
4560         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4561         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4562         uint32_t cp_hqd_active;  /* ordinal130 */
4563         uint32_t cp_hqd_vmid;  /* ordinal131 */
4564         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4565         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4566         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4567         uint32_t cp_hqd_quantum;  /* ordinal135 */
4568         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4569         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4570         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4571         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4572         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4573         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4574         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4575         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4576         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4577         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4578         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4579         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4580         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4581         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4582         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4583         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4584         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4585         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4586         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4587         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4588         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4589         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4590         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4591         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4592         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4593         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4594         uint32_t cp_mqd_control;  /* ordinal162 */
4595         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4596         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4597         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4598         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4599         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4600         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4601         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4602         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4603         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4604         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4605         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4606         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4607         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4608         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4609         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4610         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4611         uint32_t cp_hqd_error;  /* ordinal179 */
4612         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4613         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4614         uint32_t reserved46;  /* ordinal182 */
4615         uint32_t reserved47;  /* ordinal183 */
4616         uint32_t reserved48;  /* ordinal184 */
4617         uint32_t reserved49;  /* ordinal185 */
4618         uint32_t reserved50;  /* ordinal186 */
4619         uint32_t reserved51;  /* ordinal187 */
4620         uint32_t reserved52;  /* ordinal188 */
4621         uint32_t reserved53;  /* ordinal189 */
4622         uint32_t reserved54;  /* ordinal190 */
4623         uint32_t reserved55;  /* ordinal191 */
4624         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4625         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4626         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4627         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4628         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4629         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4630         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4631         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4632         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4633         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4634         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4635         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4636         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4637         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4638         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4639         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4640         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4641         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4642         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4643         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4644         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4645         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4646         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4647         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4648         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4649         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4650         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4651         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4652         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4653         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4654         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4655         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4656         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4657         uint32_t reserved56;  /* ordinal225 */
4658         uint32_t reserved57;  /* ordinal226 */
4659         uint32_t reserved58;  /* ordinal227 */
4660         uint32_t set_resources_header;  /* ordinal228 */
4661         uint32_t set_resources_dw1;  /* ordinal229 */
4662         uint32_t set_resources_dw2;  /* ordinal230 */
4663         uint32_t set_resources_dw3;  /* ordinal231 */
4664         uint32_t set_resources_dw4;  /* ordinal232 */
4665         uint32_t set_resources_dw5;  /* ordinal233 */
4666         uint32_t set_resources_dw6;  /* ordinal234 */
4667         uint32_t set_resources_dw7;  /* ordinal235 */
4668         uint32_t reserved59;  /* ordinal236 */
4669         uint32_t reserved60;  /* ordinal237 */
4670         uint32_t reserved61;  /* ordinal238 */
4671         uint32_t reserved62;  /* ordinal239 */
4672         uint32_t reserved63;  /* ordinal240 */
4673         uint32_t reserved64;  /* ordinal241 */
4674         uint32_t reserved65;  /* ordinal242 */
4675         uint32_t reserved66;  /* ordinal243 */
4676         uint32_t reserved67;  /* ordinal244 */
4677         uint32_t reserved68;  /* ordinal245 */
4678         uint32_t reserved69;  /* ordinal246 */
4679         uint32_t reserved70;  /* ordinal247 */
4680         uint32_t reserved71;  /* ordinal248 */
4681         uint32_t reserved72;  /* ordinal249 */
4682         uint32_t reserved73;  /* ordinal250 */
4683         uint32_t reserved74;  /* ordinal251 */
4684         uint32_t reserved75;  /* ordinal252 */
4685         uint32_t reserved76;  /* ordinal253 */
4686         uint32_t reserved77;  /* ordinal254 */
4687         uint32_t reserved78;  /* ordinal255 */
4688
4689         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4690 };
4691
4692 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4693 {
4694         int i, r;
4695
4696         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4697                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4698
4699                 if (ring->mqd_obj) {
4700                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4701                         if (unlikely(r != 0))
4702                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4703
4704                         amdgpu_bo_unpin(ring->mqd_obj);
4705                         amdgpu_bo_unreserve(ring->mqd_obj);
4706
4707                         amdgpu_bo_unref(&ring->mqd_obj);
4708                         ring->mqd_obj = NULL;
4709                 }
4710         }
4711 }
4712
4713 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4714 {
4715         int r, i, j;
4716         u32 tmp;
4717         bool use_doorbell = true;
4718         u64 hqd_gpu_addr;
4719         u64 mqd_gpu_addr;
4720         u64 eop_gpu_addr;
4721         u64 wb_gpu_addr;
4722         u32 *buf;
4723         struct vi_mqd *mqd;
4724
4725         /* init the pipes */
4726         mutex_lock(&adev->srbm_mutex);
4727         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4728                 int me = (i < 4) ? 1 : 2;
4729                 int pipe = (i < 4) ? i : (i - 4);
4730
4731                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4732                 eop_gpu_addr >>= 8;
4733
4734                 vi_srbm_select(adev, me, pipe, 0, 0);
4735
4736                 /* write the EOP addr */
4737                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4738                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4739
4740                 /* set the VMID assigned */
4741                 WREG32(mmCP_HQD_VMID, 0);
4742
4743                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4744                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4745                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4746                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4747                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4748         }
4749         vi_srbm_select(adev, 0, 0, 0, 0);
4750         mutex_unlock(&adev->srbm_mutex);
4751
4752         /* init the queues.  Just two for now. */
4753         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4754                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4755
4756                 if (ring->mqd_obj == NULL) {
4757                         r = amdgpu_bo_create(adev,
4758                                              sizeof(struct vi_mqd),
4759                                              PAGE_SIZE, true,
4760                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4761                                              NULL, &ring->mqd_obj);
4762                         if (r) {
4763                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4764                                 return r;
4765                         }
4766                 }
4767
4768                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4769                 if (unlikely(r != 0)) {
4770                         gfx_v8_0_cp_compute_fini(adev);
4771                         return r;
4772                 }
4773                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4774                                   &mqd_gpu_addr);
4775                 if (r) {
4776                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4777                         gfx_v8_0_cp_compute_fini(adev);
4778                         return r;
4779                 }
4780                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4781                 if (r) {
4782                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4783                         gfx_v8_0_cp_compute_fini(adev);
4784                         return r;
4785                 }
4786
4787                 /* init the mqd struct */
4788                 memset(buf, 0, sizeof(struct vi_mqd));
4789
4790                 mqd = (struct vi_mqd *)buf;
4791                 mqd->header = 0xC0310800;
4792                 mqd->compute_pipelinestat_enable = 0x00000001;
4793                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4794                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4795                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4796                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4797                 mqd->compute_misc_reserved = 0x00000003;
4798
4799                 mutex_lock(&adev->srbm_mutex);
4800                 vi_srbm_select(adev, ring->me,
4801                                ring->pipe,
4802                                ring->queue, 0);
4803
4804                 /* disable wptr polling */
4805                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4806                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4807                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4808
4809                 mqd->cp_hqd_eop_base_addr_lo =
4810                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4811                 mqd->cp_hqd_eop_base_addr_hi =
4812                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4813
4814                 /* enable doorbell? */
4815                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4816                 if (use_doorbell) {
4817                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4818                 } else {
4819                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4820                 }
4821                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4822                 mqd->cp_hqd_pq_doorbell_control = tmp;
4823
4824                 /* disable the queue if it's active */
4825                 mqd->cp_hqd_dequeue_request = 0;
4826                 mqd->cp_hqd_pq_rptr = 0;
4827                 mqd->cp_hqd_pq_wptr= 0;
4828                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4829                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4830                         for (j = 0; j < adev->usec_timeout; j++) {
4831                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4832                                         break;
4833                                 udelay(1);
4834                         }
4835                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4836                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4837                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4838                 }
4839
4840                 /* set the pointer to the MQD */
4841                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4842                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4843                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4844                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4845
4846                 /* set MQD vmid to 0 */
4847                 tmp = RREG32(mmCP_MQD_CONTROL);
4848                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4849                 WREG32(mmCP_MQD_CONTROL, tmp);
4850                 mqd->cp_mqd_control = tmp;
4851
4852                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4853                 hqd_gpu_addr = ring->gpu_addr >> 8;
4854                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4855                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4856                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4857                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4858
4859                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4860                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4861                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4862                                     (order_base_2(ring->ring_size / 4) - 1));
4863                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4864                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4865 #ifdef __BIG_ENDIAN
4866                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4867 #endif
4868                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4869                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4870                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4871                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4872                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4873                 mqd->cp_hqd_pq_control = tmp;
4874
4875                 /* set the wb address wether it's enabled or not */
4876                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4877                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4878                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4879                         upper_32_bits(wb_gpu_addr) & 0xffff;
4880                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4881                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4882                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4883                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4884
4885                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4886                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4887                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4888                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4889                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4890                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4891                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4892
4893                 /* enable the doorbell if requested */
4894                 if (use_doorbell) {
4895                         if ((adev->asic_type == CHIP_CARRIZO) ||
4896                             (adev->asic_type == CHIP_FIJI) ||
4897                             (adev->asic_type == CHIP_STONEY) ||
4898                             (adev->asic_type == CHIP_POLARIS11) ||
4899                             (adev->asic_type == CHIP_POLARIS10)) {
4900                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4901                                        AMDGPU_DOORBELL_KIQ << 2);
4902                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4903                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4904                         }
4905                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4906                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4907                                             DOORBELL_OFFSET, ring->doorbell_index);
4908                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4909                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4910                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4911                         mqd->cp_hqd_pq_doorbell_control = tmp;
4912
4913                 } else {
4914                         mqd->cp_hqd_pq_doorbell_control = 0;
4915                 }
4916                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4917                        mqd->cp_hqd_pq_doorbell_control);
4918
4919                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4920                 ring->wptr = 0;
4921                 mqd->cp_hqd_pq_wptr = ring->wptr;
4922                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4923                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4924
4925                 /* set the vmid for the queue */
4926                 mqd->cp_hqd_vmid = 0;
4927                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4928
4929                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4930                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4931                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4932                 mqd->cp_hqd_persistent_state = tmp;
4933                 if (adev->asic_type == CHIP_STONEY ||
4934                         adev->asic_type == CHIP_POLARIS11 ||
4935                         adev->asic_type == CHIP_POLARIS10) {
4936                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4937                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4938                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4939                 }
4940
4941                 /* activate the queue */
4942                 mqd->cp_hqd_active = 1;
4943                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4944
4945                 vi_srbm_select(adev, 0, 0, 0, 0);
4946                 mutex_unlock(&adev->srbm_mutex);
4947
4948                 amdgpu_bo_kunmap(ring->mqd_obj);
4949                 amdgpu_bo_unreserve(ring->mqd_obj);
4950         }
4951
4952         if (use_doorbell) {
4953                 tmp = RREG32(mmCP_PQ_STATUS);
4954                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4955                 WREG32(mmCP_PQ_STATUS, tmp);
4956         }
4957
4958         gfx_v8_0_cp_compute_enable(adev, true);
4959
4960         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4961                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4962
4963                 ring->ready = true;
4964                 r = amdgpu_ring_test_ring(ring);
4965                 if (r)
4966                         ring->ready = false;
4967         }
4968
4969         return 0;
4970 }
4971
4972 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4973 {
4974         int r;
4975
4976         if (!(adev->flags & AMD_IS_APU))
4977                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4978
4979         if (!adev->pp_enabled) {
4980                 if (!adev->firmware.smu_load) {
4981                         /* legacy firmware loading */
4982                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4983                         if (r)
4984                                 return r;
4985
4986                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4987                         if (r)
4988                                 return r;
4989                 } else {
4990                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4991                                                         AMDGPU_UCODE_ID_CP_CE);
4992                         if (r)
4993                                 return -EINVAL;
4994
4995                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4996                                                         AMDGPU_UCODE_ID_CP_PFP);
4997                         if (r)
4998                                 return -EINVAL;
4999
5000                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5001                                                         AMDGPU_UCODE_ID_CP_ME);
5002                         if (r)
5003                                 return -EINVAL;
5004
5005                         if (adev->asic_type == CHIP_TOPAZ) {
5006                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5007                                 if (r)
5008                                         return r;
5009                         } else {
5010                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5011                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5012                                 if (r)
5013                                         return -EINVAL;
5014                         }
5015                 }
5016         }
5017
5018         r = gfx_v8_0_cp_gfx_resume(adev);
5019         if (r)
5020                 return r;
5021
5022         r = gfx_v8_0_cp_compute_resume(adev);
5023         if (r)
5024                 return r;
5025
5026         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5027
5028         return 0;
5029 }
5030
5031 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5032 {
5033         gfx_v8_0_cp_gfx_enable(adev, enable);
5034         gfx_v8_0_cp_compute_enable(adev, enable);
5035 }
5036
5037 static int gfx_v8_0_hw_init(void *handle)
5038 {
5039         int r;
5040         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041
5042         gfx_v8_0_init_golden_registers(adev);
5043         gfx_v8_0_gpu_init(adev);
5044
5045         r = gfx_v8_0_rlc_resume(adev);
5046         if (r)
5047                 return r;
5048
5049         r = gfx_v8_0_cp_resume(adev);
5050
5051         return r;
5052 }
5053
5054 static int gfx_v8_0_hw_fini(void *handle)
5055 {
5056         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5057
5058         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5059         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5060         gfx_v8_0_cp_enable(adev, false);
5061         gfx_v8_0_rlc_stop(adev);
5062         gfx_v8_0_cp_compute_fini(adev);
5063
5064         amdgpu_set_powergating_state(adev,
5065                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5066
5067         return 0;
5068 }
5069
5070 static int gfx_v8_0_suspend(void *handle)
5071 {
5072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073
5074         return gfx_v8_0_hw_fini(adev);
5075 }
5076
5077 static int gfx_v8_0_resume(void *handle)
5078 {
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080
5081         return gfx_v8_0_hw_init(adev);
5082 }
5083
5084 static bool gfx_v8_0_is_idle(void *handle)
5085 {
5086         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5087
5088         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5089                 return false;
5090         else
5091                 return true;
5092 }
5093
5094 static int gfx_v8_0_wait_for_idle(void *handle)
5095 {
5096         unsigned i;
5097         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5098
5099         for (i = 0; i < adev->usec_timeout; i++) {
5100                 if (gfx_v8_0_is_idle(handle))
5101                         return 0;
5102
5103                 udelay(1);
5104         }
5105         return -ETIMEDOUT;
5106 }
5107
5108 static bool gfx_v8_0_check_soft_reset(void *handle)
5109 {
5110         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5112         u32 tmp;
5113
5114         /* GRBM_STATUS */
5115         tmp = RREG32(mmGRBM_STATUS);
5116         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5117                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5118                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5119                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5120                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5121                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5122                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5123                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5124                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5125                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5126                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5127                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5128                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5129         }
5130
5131         /* GRBM_STATUS2 */
5132         tmp = RREG32(mmGRBM_STATUS2);
5133         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5134                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5135                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5136
5137         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5138             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5139             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5140                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5141                                                 SOFT_RESET_CPF, 1);
5142                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5143                                                 SOFT_RESET_CPC, 1);
5144                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5145                                                 SOFT_RESET_CPG, 1);
5146                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5147                                                 SOFT_RESET_GRBM, 1);
5148         }
5149
5150         /* SRBM_STATUS */
5151         tmp = RREG32(mmSRBM_STATUS);
5152         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5153                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5154                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5155         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5156                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5157                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5158
5159         if (grbm_soft_reset || srbm_soft_reset) {
5160                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5161                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5162                 return true;
5163         } else {
5164                 adev->gfx.grbm_soft_reset = 0;
5165                 adev->gfx.srbm_soft_reset = 0;
5166                 return false;
5167         }
5168 }
5169
5170 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5171                                   struct amdgpu_ring *ring)
5172 {
5173         int i;
5174
5175         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5176         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5177                 u32 tmp;
5178                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5179                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5180                                     DEQUEUE_REQ, 2);
5181                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5182                 for (i = 0; i < adev->usec_timeout; i++) {
5183                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5184                                 break;
5185                         udelay(1);
5186                 }
5187         }
5188 }
5189
5190 static int gfx_v8_0_pre_soft_reset(void *handle)
5191 {
5192         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5194
5195         if ((!adev->gfx.grbm_soft_reset) &&
5196             (!adev->gfx.srbm_soft_reset))
5197                 return 0;
5198
5199         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5200         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5201
5202         /* stop the rlc */
5203         gfx_v8_0_rlc_stop(adev);
5204
5205         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5206             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5207                 /* Disable GFX parsing/prefetching */
5208                 gfx_v8_0_cp_gfx_enable(adev, false);
5209
5210         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5211             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5212             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5213             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5214                 int i;
5215
5216                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5217                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5218
5219                         gfx_v8_0_inactive_hqd(adev, ring);
5220                 }
5221                 /* Disable MEC parsing/prefetching */
5222                 gfx_v8_0_cp_compute_enable(adev, false);
5223         }
5224
5225        return 0;
5226 }
5227
5228 static int gfx_v8_0_soft_reset(void *handle)
5229 {
5230         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5231         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5232         u32 tmp;
5233
5234         if ((!adev->gfx.grbm_soft_reset) &&
5235             (!adev->gfx.srbm_soft_reset))
5236                 return 0;
5237
5238         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5239         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5240
5241         if (grbm_soft_reset || srbm_soft_reset) {
5242                 tmp = RREG32(mmGMCON_DEBUG);
5243                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5244                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5245                 WREG32(mmGMCON_DEBUG, tmp);
5246                 udelay(50);
5247         }
5248
5249         if (grbm_soft_reset) {
5250                 tmp = RREG32(mmGRBM_SOFT_RESET);
5251                 tmp |= grbm_soft_reset;
5252                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5253                 WREG32(mmGRBM_SOFT_RESET, tmp);
5254                 tmp = RREG32(mmGRBM_SOFT_RESET);
5255
5256                 udelay(50);
5257
5258                 tmp &= ~grbm_soft_reset;
5259                 WREG32(mmGRBM_SOFT_RESET, tmp);
5260                 tmp = RREG32(mmGRBM_SOFT_RESET);
5261         }
5262
5263         if (srbm_soft_reset) {
5264                 tmp = RREG32(mmSRBM_SOFT_RESET);
5265                 tmp |= srbm_soft_reset;
5266                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5267                 WREG32(mmSRBM_SOFT_RESET, tmp);
5268                 tmp = RREG32(mmSRBM_SOFT_RESET);
5269
5270                 udelay(50);
5271
5272                 tmp &= ~srbm_soft_reset;
5273                 WREG32(mmSRBM_SOFT_RESET, tmp);
5274                 tmp = RREG32(mmSRBM_SOFT_RESET);
5275         }
5276
5277         if (grbm_soft_reset || srbm_soft_reset) {
5278                 tmp = RREG32(mmGMCON_DEBUG);
5279                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5280                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5281                 WREG32(mmGMCON_DEBUG, tmp);
5282         }
5283
5284         /* Wait a little for things to settle down */
5285         udelay(50);
5286
5287         return 0;
5288 }
5289
5290 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5291                               struct amdgpu_ring *ring)
5292 {
5293         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5294         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5295         WREG32(mmCP_HQD_PQ_RPTR, 0);
5296         WREG32(mmCP_HQD_PQ_WPTR, 0);
5297         vi_srbm_select(adev, 0, 0, 0, 0);
5298 }
5299
5300 static int gfx_v8_0_post_soft_reset(void *handle)
5301 {
5302         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5303         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5304
5305         if ((!adev->gfx.grbm_soft_reset) &&
5306             (!adev->gfx.srbm_soft_reset))
5307                 return 0;
5308
5309         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5310         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5311
5312         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5313             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5314                 gfx_v8_0_cp_gfx_resume(adev);
5315
5316         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5317             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5318             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5319             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5320                 int i;
5321
5322                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5323                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5324
5325                         gfx_v8_0_init_hqd(adev, ring);
5326                 }
5327                 gfx_v8_0_cp_compute_resume(adev);
5328         }
5329         gfx_v8_0_rlc_start(adev);
5330
5331         return 0;
5332 }
5333
5334 /**
5335  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5336  *
5337  * @adev: amdgpu_device pointer
5338  *
5339  * Fetches a GPU clock counter snapshot.
5340  * Returns the 64 bit clock counter snapshot.
5341  */
5342 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5343 {
5344         uint64_t clock;
5345
5346         mutex_lock(&adev->gfx.gpu_clock_mutex);
5347         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5348         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5349                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5350         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5351         return clock;
5352 }
5353
5354 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5355                                           uint32_t vmid,
5356                                           uint32_t gds_base, uint32_t gds_size,
5357                                           uint32_t gws_base, uint32_t gws_size,
5358                                           uint32_t oa_base, uint32_t oa_size)
5359 {
5360         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5361         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5362
5363         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5364         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5365
5366         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5367         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5368
5369         /* GDS Base */
5370         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5371         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5372                                 WRITE_DATA_DST_SEL(0)));
5373         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5374         amdgpu_ring_write(ring, 0);
5375         amdgpu_ring_write(ring, gds_base);
5376
5377         /* GDS Size */
5378         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5379         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5380                                 WRITE_DATA_DST_SEL(0)));
5381         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5382         amdgpu_ring_write(ring, 0);
5383         amdgpu_ring_write(ring, gds_size);
5384
5385         /* GWS */
5386         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5387         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5388                                 WRITE_DATA_DST_SEL(0)));
5389         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5390         amdgpu_ring_write(ring, 0);
5391         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5392
5393         /* OA */
5394         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5395         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5396                                 WRITE_DATA_DST_SEL(0)));
5397         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5398         amdgpu_ring_write(ring, 0);
5399         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5400 }
5401
5402 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5403         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5404         .select_se_sh = &gfx_v8_0_select_se_sh,
5405 };
5406
5407 static int gfx_v8_0_early_init(void *handle)
5408 {
5409         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5410
5411         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5412         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5413         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5414         gfx_v8_0_set_ring_funcs(adev);
5415         gfx_v8_0_set_irq_funcs(adev);
5416         gfx_v8_0_set_gds_init(adev);
5417         gfx_v8_0_set_rlc_funcs(adev);
5418
5419         return 0;
5420 }
5421
5422 static int gfx_v8_0_late_init(void *handle)
5423 {
5424         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5425         int r;
5426
5427         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5428         if (r)
5429                 return r;
5430
5431         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5432         if (r)
5433                 return r;
5434
5435         /* requires IBs so do in late init after IB pool is initialized */
5436         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5437         if (r)
5438                 return r;
5439
5440         amdgpu_set_powergating_state(adev,
5441                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5442
5443         return 0;
5444 }
5445
5446 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5447                                                        bool enable)
5448 {
5449         if (adev->asic_type == CHIP_POLARIS11)
5450                 /* Send msg to SMU via Powerplay */
5451                 amdgpu_set_powergating_state(adev,
5452                                              AMD_IP_BLOCK_TYPE_SMC,
5453                                              enable ?
5454                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5455
5456         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5457 }
5458
5459 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5460                                                         bool enable)
5461 {
5462         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5463 }
5464
5465 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5466                 bool enable)
5467 {
5468         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5469 }
5470
5471 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5472                                           bool enable)
5473 {
5474         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5475 }
5476
5477 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5478                                                 bool enable)
5479 {
5480         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5481
5482         /* Read any GFX register to wake up GFX. */
5483         if (!enable)
5484                 RREG32(mmDB_RENDER_CONTROL);
5485 }
5486
5487 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5488                                           bool enable)
5489 {
5490         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5491                 cz_enable_gfx_cg_power_gating(adev, true);
5492                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5493                         cz_enable_gfx_pipeline_power_gating(adev, true);
5494         } else {
5495                 cz_enable_gfx_cg_power_gating(adev, false);
5496                 cz_enable_gfx_pipeline_power_gating(adev, false);
5497         }
5498 }
5499
5500 static int gfx_v8_0_set_powergating_state(void *handle,
5501                                           enum amd_powergating_state state)
5502 {
5503         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5504         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5505
5506         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5507                 return 0;
5508
5509         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5510                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5511                                 AMD_PG_SUPPORT_CP |
5512                                 AMD_PG_SUPPORT_GFX_DMG))
5513                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5514         switch (adev->asic_type) {
5515         case CHIP_CARRIZO:
5516         case CHIP_STONEY:
5517                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5518                         cz_update_gfx_cg_power_gating(adev, enable);
5519
5520                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5521                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5522                 else
5523                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5524
5525                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5526                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5527                 else
5528                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5529                 break;
5530         case CHIP_POLARIS11:
5531                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5532                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5533                 else
5534                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5535
5536                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5537                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5538                 else
5539                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5540
5541                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5542                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5543                 else
5544                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5545                 break;
5546         default:
5547                 break;
5548         }
5549         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5550                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5551                                 AMD_PG_SUPPORT_CP |
5552                                 AMD_PG_SUPPORT_GFX_DMG))
5553                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5554         return 0;
5555 }
5556
5557 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5558                                      uint32_t reg_addr, uint32_t cmd)
5559 {
5560         uint32_t data;
5561
5562         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5563
5564         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5565         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5566
5567         data = RREG32(mmRLC_SERDES_WR_CTRL);
5568         if (adev->asic_type == CHIP_STONEY)
5569                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5570                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5571                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5572                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5573                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5574                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5575                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5576                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5577                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5578         else
5579                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5580                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5581                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5582                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5583                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5584                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5585                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5586                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5587                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5588                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5589                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5590         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5591                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5592                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5593                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5594
5595         WREG32(mmRLC_SERDES_WR_CTRL, data);
5596 }
5597
5598 #define MSG_ENTER_RLC_SAFE_MODE     1
5599 #define MSG_EXIT_RLC_SAFE_MODE      0
5600 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5601 #define RLC_GPR_REG2__REQ__SHIFT 0
5602 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5603 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5604
5605 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5606 {
5607         u32 data = 0;
5608         unsigned i;
5609
5610         data = RREG32(mmRLC_CNTL);
5611         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5612                 return;
5613
5614         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5615             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5616                                AMD_PG_SUPPORT_GFX_DMG))) {
5617                 data |= RLC_GPR_REG2__REQ_MASK;
5618                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5619                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5620                 WREG32(mmRLC_GPR_REG2, data);
5621
5622                 for (i = 0; i < adev->usec_timeout; i++) {
5623                         if ((RREG32(mmRLC_GPM_STAT) &
5624                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5625                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5626                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5627                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5628                                 break;
5629                         udelay(1);
5630                 }
5631
5632                 for (i = 0; i < adev->usec_timeout; i++) {
5633                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5634                                 break;
5635                         udelay(1);
5636                 }
5637                 adev->gfx.rlc.in_safe_mode = true;
5638         }
5639 }
5640
5641 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5642 {
5643         u32 data;
5644         unsigned i;
5645
5646         data = RREG32(mmRLC_CNTL);
5647         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5648                 return;
5649
5650         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5651             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5652                                AMD_PG_SUPPORT_GFX_DMG))) {
5653                 data |= RLC_GPR_REG2__REQ_MASK;
5654                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5655                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5656                 WREG32(mmRLC_GPR_REG2, data);
5657                 adev->gfx.rlc.in_safe_mode = false;
5658         }
5659
5660         for (i = 0; i < adev->usec_timeout; i++) {
5661                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5662                         break;
5663                 udelay(1);
5664         }
5665 }
5666
5667 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5668 {
5669         u32 data;
5670         unsigned i;
5671
5672         data = RREG32(mmRLC_CNTL);
5673         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5674                 return;
5675
5676         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5677                 data |= RLC_SAFE_MODE__CMD_MASK;
5678                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5679                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5680                 WREG32(mmRLC_SAFE_MODE, data);
5681
5682                 for (i = 0; i < adev->usec_timeout; i++) {
5683                         if ((RREG32(mmRLC_GPM_STAT) &
5684                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5685                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5686                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5687                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5688                                 break;
5689                         udelay(1);
5690                 }
5691
5692                 for (i = 0; i < adev->usec_timeout; i++) {
5693                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5694                                 break;
5695                         udelay(1);
5696                 }
5697                 adev->gfx.rlc.in_safe_mode = true;
5698         }
5699 }
5700
5701 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5702 {
5703         u32 data = 0;
5704         unsigned i;
5705
5706         data = RREG32(mmRLC_CNTL);
5707         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5708                 return;
5709
5710         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5711                 if (adev->gfx.rlc.in_safe_mode) {
5712                         data |= RLC_SAFE_MODE__CMD_MASK;
5713                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5714                         WREG32(mmRLC_SAFE_MODE, data);
5715                         adev->gfx.rlc.in_safe_mode = false;
5716                 }
5717         }
5718
5719         for (i = 0; i < adev->usec_timeout; i++) {
5720                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5721                         break;
5722                 udelay(1);
5723         }
5724 }
5725
5726 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5727 {
5728         adev->gfx.rlc.in_safe_mode = true;
5729 }
5730
5731 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5732 {
5733         adev->gfx.rlc.in_safe_mode = false;
5734 }
5735
5736 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5737         .enter_safe_mode = cz_enter_rlc_safe_mode,
5738         .exit_safe_mode = cz_exit_rlc_safe_mode
5739 };
5740
5741 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5742         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5743         .exit_safe_mode = iceland_exit_rlc_safe_mode
5744 };
5745
5746 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5747         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5748         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5749 };
5750
5751 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5752                                                       bool enable)
5753 {
5754         uint32_t temp, data;
5755
5756         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5757
5758         /* It is disabled by HW by default */
5759         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5760                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5761                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5762                                 /* 1 - RLC memory Light sleep */
5763                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5764
5765                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5766                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5767                 }
5768
5769                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5770                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5771                 if (adev->flags & AMD_IS_APU)
5772                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5773                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5774                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5775                 else
5776                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5777                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5778                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5779                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5780
5781                 if (temp != data)
5782                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5783
5784                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5785                 gfx_v8_0_wait_for_rlc_serdes(adev);
5786
5787                 /* 5 - clear mgcg override */
5788                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5789
5790                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5791                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5792                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5793                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5794                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5795                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5796                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5797                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5798                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5799                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5800                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5801                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5802                         if (temp != data)
5803                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5804                 }
5805                 udelay(50);
5806
5807                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5808                 gfx_v8_0_wait_for_rlc_serdes(adev);
5809         } else {
5810                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5811                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5812                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5813                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5814                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5815                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5816                 if (temp != data)
5817                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5818
5819                 /* 2 - disable MGLS in RLC */
5820                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5821                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5822                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5823                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5824                 }
5825
5826                 /* 3 - disable MGLS in CP */
5827                 data = RREG32(mmCP_MEM_SLP_CNTL);
5828                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5829                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5830                         WREG32(mmCP_MEM_SLP_CNTL, data);
5831                 }
5832
5833                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5834                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5835                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5836                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5837                 if (temp != data)
5838                         WREG32(mmCGTS_SM_CTRL_REG, data);
5839
5840                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5841                 gfx_v8_0_wait_for_rlc_serdes(adev);
5842
5843                 /* 6 - set mgcg override */
5844                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5845
5846                 udelay(50);
5847
5848                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5849                 gfx_v8_0_wait_for_rlc_serdes(adev);
5850         }
5851
5852         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5853 }
5854
5855 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5856                                                       bool enable)
5857 {
5858         uint32_t temp, temp1, data, data1;
5859
5860         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5861
5862         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5863
5864         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5865                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5866                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5867                 if (temp1 != data1)
5868                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5869
5870                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871                 gfx_v8_0_wait_for_rlc_serdes(adev);
5872
5873                 /* 2 - clear cgcg override */
5874                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5875
5876                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877                 gfx_v8_0_wait_for_rlc_serdes(adev);
5878
5879                 /* 3 - write cmd to set CGLS */
5880                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5881
5882                 /* 4 - enable cgcg */
5883                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5884
5885                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5886                         /* enable cgls*/
5887                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5888
5889                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5890                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5891
5892                         if (temp1 != data1)
5893                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5894                 } else {
5895                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5896                 }
5897
5898                 if (temp != data)
5899                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5900
5901                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5902                  * Cmp_busy/GFX_Idle interrupts
5903                  */
5904                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5905         } else {
5906                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5907                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5908
5909                 /* TEST CGCG */
5910                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5911                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5912                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5913                 if (temp1 != data1)
5914                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5915
5916                 /* read gfx register to wake up cgcg */
5917                 RREG32(mmCB_CGTT_SCLK_CTRL);
5918                 RREG32(mmCB_CGTT_SCLK_CTRL);
5919                 RREG32(mmCB_CGTT_SCLK_CTRL);
5920                 RREG32(mmCB_CGTT_SCLK_CTRL);
5921
5922                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5923                 gfx_v8_0_wait_for_rlc_serdes(adev);
5924
5925                 /* write cmd to Set CGCG Overrride */
5926                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5927
5928                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929                 gfx_v8_0_wait_for_rlc_serdes(adev);
5930
5931                 /* write cmd to Clear CGLS */
5932                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5933
5934                 /* disable cgcg, cgls should be disabled too. */
5935                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5936                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5937                 if (temp != data)
5938                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5939         }
5940
5941         gfx_v8_0_wait_for_rlc_serdes(adev);
5942
5943         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5944 }
5945 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5946                                             bool enable)
5947 {
5948         if (enable) {
5949                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5950                  * ===  MGCG + MGLS + TS(CG/LS) ===
5951                  */
5952                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5953                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5954         } else {
5955                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5956                  * ===  CGCG + CGLS ===
5957                  */
5958                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5959                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5960         }
5961         return 0;
5962 }
5963
5964 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5965                                           enum amd_clockgating_state state)
5966 {
5967         uint32_t msg_id, pp_state;
5968         void *pp_handle = adev->powerplay.pp_handle;
5969
5970         if (state == AMD_CG_STATE_UNGATE)
5971                 pp_state = 0;
5972         else
5973                 pp_state = PP_STATE_CG | PP_STATE_LS;
5974
5975         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5976                         PP_BLOCK_GFX_CG,
5977                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5978                         pp_state);
5979         amd_set_clockgating_by_smu(pp_handle, msg_id);
5980
5981         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5982                         PP_BLOCK_GFX_MG,
5983                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5984                         pp_state);
5985         amd_set_clockgating_by_smu(pp_handle, msg_id);
5986
5987         return 0;
5988 }
5989
5990 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5991                                           enum amd_clockgating_state state)
5992 {
5993         uint32_t msg_id, pp_state;
5994         void *pp_handle = adev->powerplay.pp_handle;
5995
5996         if (state == AMD_CG_STATE_UNGATE)
5997                 pp_state = 0;
5998         else
5999                 pp_state = PP_STATE_CG | PP_STATE_LS;
6000
6001         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6002                         PP_BLOCK_GFX_CG,
6003                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6004                         pp_state);
6005         amd_set_clockgating_by_smu(pp_handle, msg_id);
6006
6007         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6008                         PP_BLOCK_GFX_3D,
6009                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6010                         pp_state);
6011         amd_set_clockgating_by_smu(pp_handle, msg_id);
6012
6013         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014                         PP_BLOCK_GFX_MG,
6015                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6016                         pp_state);
6017         amd_set_clockgating_by_smu(pp_handle, msg_id);
6018
6019         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6020                         PP_BLOCK_GFX_RLC,
6021                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6022                         pp_state);
6023         amd_set_clockgating_by_smu(pp_handle, msg_id);
6024
6025         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6026                         PP_BLOCK_GFX_CP,
6027                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6028                         pp_state);
6029         amd_set_clockgating_by_smu(pp_handle, msg_id);
6030
6031         return 0;
6032 }
6033
6034 static int gfx_v8_0_set_clockgating_state(void *handle,
6035                                           enum amd_clockgating_state state)
6036 {
6037         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6038
6039         switch (adev->asic_type) {
6040         case CHIP_FIJI:
6041         case CHIP_CARRIZO:
6042         case CHIP_STONEY:
6043                 gfx_v8_0_update_gfx_clock_gating(adev,
6044                                                  state == AMD_CG_STATE_GATE ? true : false);
6045                 break;
6046         case CHIP_TONGA:
6047                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6048                 break;
6049         case CHIP_POLARIS10:
6050         case CHIP_POLARIS11:
6051                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6052                 break;
6053         default:
6054                 break;
6055         }
6056         return 0;
6057 }
6058
6059 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6060 {
6061         return ring->adev->wb.wb[ring->rptr_offs];
6062 }
6063
6064 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6065 {
6066         struct amdgpu_device *adev = ring->adev;
6067
6068         if (ring->use_doorbell)
6069                 /* XXX check if swapping is necessary on BE */
6070                 return ring->adev->wb.wb[ring->wptr_offs];
6071         else
6072                 return RREG32(mmCP_RB0_WPTR);
6073 }
6074
6075 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6076 {
6077         struct amdgpu_device *adev = ring->adev;
6078
6079         if (ring->use_doorbell) {
6080                 /* XXX check if swapping is necessary on BE */
6081                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6082                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6083         } else {
6084                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6085                 (void)RREG32(mmCP_RB0_WPTR);
6086         }
6087 }
6088
6089 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6090 {
6091         u32 ref_and_mask, reg_mem_engine;
6092
6093         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6094                 switch (ring->me) {
6095                 case 1:
6096                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6097                         break;
6098                 case 2:
6099                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6100                         break;
6101                 default:
6102                         return;
6103                 }
6104                 reg_mem_engine = 0;
6105         } else {
6106                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6107                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6108         }
6109
6110         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6111         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6112                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6113                                  reg_mem_engine));
6114         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6115         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6116         amdgpu_ring_write(ring, ref_and_mask);
6117         amdgpu_ring_write(ring, ref_and_mask);
6118         amdgpu_ring_write(ring, 0x20); /* poll interval */
6119 }
6120
6121 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6122 {
6123         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6124         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6125                                  WRITE_DATA_DST_SEL(0) |
6126                                  WR_CONFIRM));
6127         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6128         amdgpu_ring_write(ring, 0);
6129         amdgpu_ring_write(ring, 1);
6130
6131 }
6132
6133 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6134                                       struct amdgpu_ib *ib,
6135                                       unsigned vm_id, bool ctx_switch)
6136 {
6137         u32 header, control = 0;
6138
6139         if (ib->flags & AMDGPU_IB_FLAG_CE)
6140                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6141         else
6142                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6143
6144         control |= ib->length_dw | (vm_id << 24);
6145
6146         amdgpu_ring_write(ring, header);
6147         amdgpu_ring_write(ring,
6148 #ifdef __BIG_ENDIAN
6149                           (2 << 0) |
6150 #endif
6151                           (ib->gpu_addr & 0xFFFFFFFC));
6152         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6153         amdgpu_ring_write(ring, control);
6154 }
6155
6156 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6157                                           struct amdgpu_ib *ib,
6158                                           unsigned vm_id, bool ctx_switch)
6159 {
6160         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6161
6162         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6163         amdgpu_ring_write(ring,
6164 #ifdef __BIG_ENDIAN
6165                                 (2 << 0) |
6166 #endif
6167                                 (ib->gpu_addr & 0xFFFFFFFC));
6168         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6169         amdgpu_ring_write(ring, control);
6170 }
6171
6172 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6173                                          u64 seq, unsigned flags)
6174 {
6175         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6176         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6177
6178         /* EVENT_WRITE_EOP - flush caches, send int */
6179         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6180         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6181                                  EOP_TC_ACTION_EN |
6182                                  EOP_TC_WB_ACTION_EN |
6183                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6184                                  EVENT_INDEX(5)));
6185         amdgpu_ring_write(ring, addr & 0xfffffffc);
6186         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6187                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6188         amdgpu_ring_write(ring, lower_32_bits(seq));
6189         amdgpu_ring_write(ring, upper_32_bits(seq));
6190
6191 }
6192
6193 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6194 {
6195         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6196         uint32_t seq = ring->fence_drv.sync_seq;
6197         uint64_t addr = ring->fence_drv.gpu_addr;
6198
6199         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6200         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6201                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6202                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6203         amdgpu_ring_write(ring, addr & 0xfffffffc);
6204         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6205         amdgpu_ring_write(ring, seq);
6206         amdgpu_ring_write(ring, 0xffffffff);
6207         amdgpu_ring_write(ring, 4); /* poll interval */
6208 }
6209
6210 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6211                                         unsigned vm_id, uint64_t pd_addr)
6212 {
6213         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6214
6215         /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6216         if (usepfp)
6217                 amdgpu_ring_insert_nop(ring, 128);
6218
6219         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6220         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6221                                  WRITE_DATA_DST_SEL(0)) |
6222                                  WR_CONFIRM);
6223         if (vm_id < 8) {
6224                 amdgpu_ring_write(ring,
6225                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6226         } else {
6227                 amdgpu_ring_write(ring,
6228                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6229         }
6230         amdgpu_ring_write(ring, 0);
6231         amdgpu_ring_write(ring, pd_addr >> 12);
6232
6233         /* bits 0-15 are the VM contexts0-15 */
6234         /* invalidate the cache */
6235         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6236         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6237                                  WRITE_DATA_DST_SEL(0)));
6238         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6239         amdgpu_ring_write(ring, 0);
6240         amdgpu_ring_write(ring, 1 << vm_id);
6241
6242         /* wait for the invalidate to complete */
6243         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6244         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6245                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6246                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6247         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6248         amdgpu_ring_write(ring, 0);
6249         amdgpu_ring_write(ring, 0); /* ref */
6250         amdgpu_ring_write(ring, 0); /* mask */
6251         amdgpu_ring_write(ring, 0x20); /* poll interval */
6252
6253         /* compute doesn't have PFP */
6254         if (usepfp) {
6255                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6256                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6257                 amdgpu_ring_write(ring, 0x0);
6258                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6259                 amdgpu_ring_insert_nop(ring, 128);
6260         }
6261 }
6262
6263 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6264 {
6265         return ring->adev->wb.wb[ring->wptr_offs];
6266 }
6267
6268 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6269 {
6270         struct amdgpu_device *adev = ring->adev;
6271
6272         /* XXX check if swapping is necessary on BE */
6273         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6274         WDOORBELL32(ring->doorbell_index, ring->wptr);
6275 }
6276
6277 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6278                                              u64 addr, u64 seq,
6279                                              unsigned flags)
6280 {
6281         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6282         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6283
6284         /* RELEASE_MEM - flush caches, send int */
6285         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6286         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6287                                  EOP_TC_ACTION_EN |
6288                                  EOP_TC_WB_ACTION_EN |
6289                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6290                                  EVENT_INDEX(5)));
6291         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6292         amdgpu_ring_write(ring, addr & 0xfffffffc);
6293         amdgpu_ring_write(ring, upper_32_bits(addr));
6294         amdgpu_ring_write(ring, lower_32_bits(seq));
6295         amdgpu_ring_write(ring, upper_32_bits(seq));
6296 }
6297
6298 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6299 {
6300         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6301         amdgpu_ring_write(ring, 0);
6302 }
6303
6304 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6305 {
6306         uint32_t dw2 = 0;
6307
6308         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6309         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6310                 /* set load_global_config & load_global_uconfig */
6311                 dw2 |= 0x8001;
6312                 /* set load_cs_sh_regs */
6313                 dw2 |= 0x01000000;
6314                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6315                 dw2 |= 0x10002;
6316
6317                 /* set load_ce_ram if preamble presented */
6318                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6319                         dw2 |= 0x10000000;
6320         } else {
6321                 /* still load_ce_ram if this is the first time preamble presented
6322                  * although there is no context switch happens.
6323                  */
6324                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6325                         dw2 |= 0x10000000;
6326         }
6327
6328         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6329         amdgpu_ring_write(ring, dw2);
6330         amdgpu_ring_write(ring, 0);
6331 }
6332
6333 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6334 {
6335         return
6336                 4; /* gfx_v8_0_ring_emit_ib_gfx */
6337 }
6338
6339 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6340 {
6341         return
6342                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6343                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6344                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6345                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6346                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6347                 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6348                 2 + /* gfx_v8_ring_emit_sb */
6349                 3; /* gfx_v8_ring_emit_cntxcntl */
6350 }
6351
6352 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6353 {
6354         return
6355                 4; /* gfx_v8_0_ring_emit_ib_compute */
6356 }
6357
6358 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6359 {
6360         return
6361                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6362                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6363                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6364                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6365                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6366                 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6367 }
6368
6369 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6370                                                  enum amdgpu_interrupt_state state)
6371 {
6372         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6373                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6374 }
6375
6376 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6377                                                      int me, int pipe,
6378                                                      enum amdgpu_interrupt_state state)
6379 {
6380         /*
6381          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6382          * handles the setting of interrupts for this specific pipe. All other
6383          * pipes' interrupts are set by amdkfd.
6384          */
6385
6386         if (me == 1) {
6387                 switch (pipe) {
6388                 case 0:
6389                         break;
6390                 default:
6391                         DRM_DEBUG("invalid pipe %d\n", pipe);
6392                         return;
6393                 }
6394         } else {
6395                 DRM_DEBUG("invalid me %d\n", me);
6396                 return;
6397         }
6398
6399         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6400                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6401 }
6402
6403 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6404                                              struct amdgpu_irq_src *source,
6405                                              unsigned type,
6406                                              enum amdgpu_interrupt_state state)
6407 {
6408         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6409                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6410
6411         return 0;
6412 }
6413
6414 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6415                                               struct amdgpu_irq_src *source,
6416                                               unsigned type,
6417                                               enum amdgpu_interrupt_state state)
6418 {
6419         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6420                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6421
6422         return 0;
6423 }
6424
6425 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6426                                             struct amdgpu_irq_src *src,
6427                                             unsigned type,
6428                                             enum amdgpu_interrupt_state state)
6429 {
6430         switch (type) {
6431         case AMDGPU_CP_IRQ_GFX_EOP:
6432                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6433                 break;
6434         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6435                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6436                 break;
6437         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6438                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6439                 break;
6440         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6441                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6442                 break;
6443         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6444                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6445                 break;
6446         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6447                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6448                 break;
6449         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6450                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6451                 break;
6452         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6453                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6454                 break;
6455         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6456                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6457                 break;
6458         default:
6459                 break;
6460         }
6461         return 0;
6462 }
6463
6464 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6465                             struct amdgpu_irq_src *source,
6466                             struct amdgpu_iv_entry *entry)
6467 {
6468         int i;
6469         u8 me_id, pipe_id, queue_id;
6470         struct amdgpu_ring *ring;
6471
6472         DRM_DEBUG("IH: CP EOP\n");
6473         me_id = (entry->ring_id & 0x0c) >> 2;
6474         pipe_id = (entry->ring_id & 0x03) >> 0;
6475         queue_id = (entry->ring_id & 0x70) >> 4;
6476
6477         switch (me_id) {
6478         case 0:
6479                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6480                 break;
6481         case 1:
6482         case 2:
6483                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6484                         ring = &adev->gfx.compute_ring[i];
6485                         /* Per-queue interrupt is supported for MEC starting from VI.
6486                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6487                           */
6488                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6489                                 amdgpu_fence_process(ring);
6490                 }
6491                 break;
6492         }
6493         return 0;
6494 }
6495
6496 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6497                                  struct amdgpu_irq_src *source,
6498                                  struct amdgpu_iv_entry *entry)
6499 {
6500         DRM_ERROR("Illegal register access in command stream\n");
6501         schedule_work(&adev->reset_work);
6502         return 0;
6503 }
6504
6505 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6506                                   struct amdgpu_irq_src *source,
6507                                   struct amdgpu_iv_entry *entry)
6508 {
6509         DRM_ERROR("Illegal instruction in command stream\n");
6510         schedule_work(&adev->reset_work);
6511         return 0;
6512 }
6513
6514 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6515         .name = "gfx_v8_0",
6516         .early_init = gfx_v8_0_early_init,
6517         .late_init = gfx_v8_0_late_init,
6518         .sw_init = gfx_v8_0_sw_init,
6519         .sw_fini = gfx_v8_0_sw_fini,
6520         .hw_init = gfx_v8_0_hw_init,
6521         .hw_fini = gfx_v8_0_hw_fini,
6522         .suspend = gfx_v8_0_suspend,
6523         .resume = gfx_v8_0_resume,
6524         .is_idle = gfx_v8_0_is_idle,
6525         .wait_for_idle = gfx_v8_0_wait_for_idle,
6526         .check_soft_reset = gfx_v8_0_check_soft_reset,
6527         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6528         .soft_reset = gfx_v8_0_soft_reset,
6529         .post_soft_reset = gfx_v8_0_post_soft_reset,
6530         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6531         .set_powergating_state = gfx_v8_0_set_powergating_state,
6532 };
6533
6534 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6535         .get_rptr = gfx_v8_0_ring_get_rptr,
6536         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6537         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6538         .parse_cs = NULL,
6539         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6540         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6541         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6542         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6543         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6544         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6545         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6546         .test_ring = gfx_v8_0_ring_test_ring,
6547         .test_ib = gfx_v8_0_ring_test_ib,
6548         .insert_nop = amdgpu_ring_insert_nop,
6549         .pad_ib = amdgpu_ring_generic_pad_ib,
6550         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6551         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6552         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6553         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6554 };
6555
6556 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6557         .get_rptr = gfx_v8_0_ring_get_rptr,
6558         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6559         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6560         .parse_cs = NULL,
6561         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6562         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6563         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6564         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6565         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6566         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6567         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6568         .test_ring = gfx_v8_0_ring_test_ring,
6569         .test_ib = gfx_v8_0_ring_test_ib,
6570         .insert_nop = amdgpu_ring_insert_nop,
6571         .pad_ib = amdgpu_ring_generic_pad_ib,
6572         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6573         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6574 };
6575
6576 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6577 {
6578         int i;
6579
6580         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6581                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6582
6583         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6584                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6585 }
6586
6587 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6588         .set = gfx_v8_0_set_eop_interrupt_state,
6589         .process = gfx_v8_0_eop_irq,
6590 };
6591
6592 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6593         .set = gfx_v8_0_set_priv_reg_fault_state,
6594         .process = gfx_v8_0_priv_reg_irq,
6595 };
6596
6597 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6598         .set = gfx_v8_0_set_priv_inst_fault_state,
6599         .process = gfx_v8_0_priv_inst_irq,
6600 };
6601
6602 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6603 {
6604         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6605         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6606
6607         adev->gfx.priv_reg_irq.num_types = 1;
6608         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6609
6610         adev->gfx.priv_inst_irq.num_types = 1;
6611         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6612 }
6613
6614 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6615 {
6616         switch (adev->asic_type) {
6617         case CHIP_TOPAZ:
6618                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6619                 break;
6620         case CHIP_STONEY:
6621         case CHIP_CARRIZO:
6622                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6623                 break;
6624         default:
6625                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6626                 break;
6627         }
6628 }
6629
6630 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6631 {
6632         /* init asci gds info */
6633         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6634         adev->gds.gws.total_size = 64;
6635         adev->gds.oa.total_size = 16;
6636
6637         if (adev->gds.mem.total_size == 64 * 1024) {
6638                 adev->gds.mem.gfx_partition_size = 4096;
6639                 adev->gds.mem.cs_partition_size = 4096;
6640
6641                 adev->gds.gws.gfx_partition_size = 4;
6642                 adev->gds.gws.cs_partition_size = 4;
6643
6644                 adev->gds.oa.gfx_partition_size = 4;
6645                 adev->gds.oa.cs_partition_size = 1;
6646         } else {
6647                 adev->gds.mem.gfx_partition_size = 1024;
6648                 adev->gds.mem.cs_partition_size = 1024;
6649
6650                 adev->gds.gws.gfx_partition_size = 16;
6651                 adev->gds.gws.cs_partition_size = 16;
6652
6653                 adev->gds.oa.gfx_partition_size = 4;
6654                 adev->gds.oa.cs_partition_size = 4;
6655         }
6656 }
6657
6658 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6659                                                  u32 bitmap)
6660 {
6661         u32 data;
6662
6663         if (!bitmap)
6664                 return;
6665
6666         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6667         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6668
6669         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6670 }
6671
6672 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6673 {
6674         u32 data, mask;
6675
6676         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6677                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6678
6679         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6680
6681         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6682 }
6683
6684 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6685 {
6686         int i, j, k, counter, active_cu_number = 0;
6687         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6688         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6689         unsigned disable_masks[4 * 2];
6690
6691         memset(cu_info, 0, sizeof(*cu_info));
6692
6693         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6694
6695         mutex_lock(&adev->grbm_idx_mutex);
6696         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6697                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6698                         mask = 1;
6699                         ao_bitmap = 0;
6700                         counter = 0;
6701                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6702                         if (i < 4 && j < 2)
6703                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6704                                         adev, disable_masks[i * 2 + j]);
6705                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6706                         cu_info->bitmap[i][j] = bitmap;
6707
6708                         for (k = 0; k < 16; k ++) {
6709                                 if (bitmap & mask) {
6710                                         if (counter < 2)
6711                                                 ao_bitmap |= mask;
6712                                         counter ++;
6713                                 }
6714                                 mask <<= 1;
6715                         }
6716                         active_cu_number += counter;
6717                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6718                 }
6719         }
6720         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6721         mutex_unlock(&adev->grbm_idx_mutex);
6722
6723         cu_info->number = active_cu_number;
6724         cu_info->ao_cu_mask = ao_cu_mask;
6725 }