GNU Linux-libre 4.9-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 /*(DEBLOBBED)*/
95
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
97 {
98         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
114 };
115
116 static const u32 golden_settings_tonga_a11[] =
117 {
118         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121         mmGB_GPU_ID, 0x0000000f, 0x00000000,
122         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
126         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
127         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
128         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
129         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
130         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
131         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
132         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
133 };
134
135 static const u32 tonga_golden_common_all[] =
136 {
137         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
138         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
139         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
140         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
141         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
142         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
143         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
144         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
145 };
146
147 static const u32 tonga_mgcg_cgcg_init[] =
148 {
149         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
150         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
151         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
152         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
153         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
154         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
155         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
156         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
157         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
158         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
160         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
163         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
165         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
167         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
168         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
169         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
170         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
171         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
172         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
174         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
175         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
176         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
177         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
181         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
182         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
183         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
184         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
185         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
186         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
187         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
188         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
189         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
190         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
191         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
192         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
193         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
194         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
195         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
196         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
197         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
198         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
199         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
200         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
201         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
202         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
203         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
204         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
205         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
206         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
207         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
208         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
209         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
210         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
211         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
212         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
213         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
214         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
215         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
216         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
217         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
218         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
219         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
220         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
221         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
222         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
223         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
224 };
225
226 static const u32 golden_settings_polaris11_a11[] =
227 {
228         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
229         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
230         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
231         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
232         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
233         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
234         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
235         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
236         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
237         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
238         mmSQ_CONFIG, 0x07f80000, 0x01180000,
239         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
240         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
241         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
242         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
243         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
244         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
245 };
246
247 static const u32 polaris11_golden_common_all[] =
248 {
249         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
250         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
251         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
252         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
253         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
254         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
255 };
256
257 static const u32 golden_settings_polaris10_a11[] =
258 {
259         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
260         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
261         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
262         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
263         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
264         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
265         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
266         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
267         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
268         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
269         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
270         mmSQ_CONFIG, 0x07f80000, 0x07180000,
271         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
272         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
273         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
274         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
275         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
276 };
277
278 static const u32 polaris10_golden_common_all[] =
279 {
280         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
281         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
282         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
283         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
284         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
285         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
286         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
287         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
288 };
289
290 static const u32 fiji_golden_common_all[] =
291 {
292         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
293         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
294         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
301         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
302 };
303
304 static const u32 golden_settings_fiji_a10[] =
305 {
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
316         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
317 };
318
319 static const u32 fiji_mgcg_cgcg_init[] =
320 {
321         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
322         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
324         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
325         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
326         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
327         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
328         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
329         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
330         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
331         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
332         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
333         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
334         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
335         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
336         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
337         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
338         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
339         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
340         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
341         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
342         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
343         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
344         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
345         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
346         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
347         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
348         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
349         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
351         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
352         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
353         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
354         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
355         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
356 };
357
358 static const u32 golden_settings_iceland_a11[] =
359 {
360         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
363         mmGB_GPU_ID, 0x0000000f, 0x00000000,
364         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
365         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
366         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
367         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
368         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
369         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
370         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
371         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
372         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
373         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
374         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
375         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
376 };
377
378 static const u32 iceland_golden_common_all[] =
379 {
380         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
381         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
382         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
383         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
384         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
385         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
386         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
387         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
388 };
389
390 static const u32 iceland_mgcg_cgcg_init[] =
391 {
392         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
397         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
398         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
399         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
403         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
408         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
412         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
413         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
414         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
415         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
416         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
417         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
418         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
419         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
420         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
422         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
423         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
424         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
425         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
426         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
427         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
428         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
429         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
430         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
431         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
432         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
433         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
434         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
435         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
436         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
437         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
438         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
439         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
440         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
441         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
442         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
443         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
444         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
445         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
446         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
447         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
448         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
449         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
450         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
451         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
452         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
453         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
454         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
455         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
456 };
457
458 static const u32 cz_golden_settings_a11[] =
459 {
460         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
461         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
462         mmGB_GPU_ID, 0x0000000f, 0x00000000,
463         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
464         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
465         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
466         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
467         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
468         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
469         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
470         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
471         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
472 };
473
474 static const u32 cz_golden_common_all[] =
475 {
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
478         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
479         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
480         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
481         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
482         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
483         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
484 };
485
486 static const u32 cz_mgcg_cgcg_init[] =
487 {
488         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
491         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
492         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
493         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
494         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
495         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
496         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
497         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
498         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
499         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
500         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
501         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
506         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
507         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
508         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
509         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
510         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
514         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
515         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
516         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
518         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
519         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
520         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
521         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
522         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
523         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
524         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
525         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
526         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
527         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
528         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
529         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
530         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
531         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
532         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
533         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
534         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
535         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
536         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
537         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
538         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
539         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
540         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
541         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
542         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
543         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
544         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
545         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
546         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
547         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
548         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
549         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
550         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
551         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
552         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
553         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
554         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
557         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
560         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
561         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
562         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
563 };
564
565 static const u32 stoney_golden_settings_a11[] =
566 {
567         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
568         mmGB_GPU_ID, 0x0000000f, 0x00000000,
569         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
570         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
571         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
572         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
573         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
574         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
575         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
576         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
577 };
578
579 static const u32 stoney_golden_common_all[] =
580 {
581         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
582         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
583         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
584         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
585         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
586         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
587         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
588         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
589 };
590
591 static const u32 stoney_mgcg_cgcg_init[] =
592 {
593         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
594         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
595         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
596         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
598 };
599
600 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
601 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
604 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
605 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
606
607 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
608 {
609         switch (adev->asic_type) {
610         case CHIP_TOPAZ:
611                 amdgpu_program_register_sequence(adev,
612                                                  iceland_mgcg_cgcg_init,
613                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
614                 amdgpu_program_register_sequence(adev,
615                                                  golden_settings_iceland_a11,
616                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
617                 amdgpu_program_register_sequence(adev,
618                                                  iceland_golden_common_all,
619                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
620                 break;
621         case CHIP_FIJI:
622                 amdgpu_program_register_sequence(adev,
623                                                  fiji_mgcg_cgcg_init,
624                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
625                 amdgpu_program_register_sequence(adev,
626                                                  golden_settings_fiji_a10,
627                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
628                 amdgpu_program_register_sequence(adev,
629                                                  fiji_golden_common_all,
630                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
631                 break;
632
633         case CHIP_TONGA:
634                 amdgpu_program_register_sequence(adev,
635                                                  tonga_mgcg_cgcg_init,
636                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
637                 amdgpu_program_register_sequence(adev,
638                                                  golden_settings_tonga_a11,
639                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
640                 amdgpu_program_register_sequence(adev,
641                                                  tonga_golden_common_all,
642                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
643                 break;
644         case CHIP_POLARIS11:
645                 amdgpu_program_register_sequence(adev,
646                                                  golden_settings_polaris11_a11,
647                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
648                 amdgpu_program_register_sequence(adev,
649                                                  polaris11_golden_common_all,
650                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
651                 break;
652         case CHIP_POLARIS10:
653                 amdgpu_program_register_sequence(adev,
654                                                  golden_settings_polaris10_a11,
655                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
656                 amdgpu_program_register_sequence(adev,
657                                                  polaris10_golden_common_all,
658                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
659                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
660                 if (adev->pdev->revision == 0xc7 &&
661                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
662                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
663                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
664                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
665                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
666                 }
667                 break;
668         case CHIP_CARRIZO:
669                 amdgpu_program_register_sequence(adev,
670                                                  cz_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  cz_golden_settings_a11,
674                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  cz_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
678                 break;
679         case CHIP_STONEY:
680                 amdgpu_program_register_sequence(adev,
681                                                  stoney_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  stoney_golden_settings_a11,
685                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  stoney_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
689                 break;
690         default:
691                 break;
692         }
693 }
694
695 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
696 {
697         int i;
698
699         adev->gfx.scratch.num_reg = 7;
700         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
701         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
702                 adev->gfx.scratch.free[i] = true;
703                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
704         }
705 }
706
707 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
708 {
709         struct amdgpu_device *adev = ring->adev;
710         uint32_t scratch;
711         uint32_t tmp = 0;
712         unsigned i;
713         int r;
714
715         r = amdgpu_gfx_scratch_get(adev, &scratch);
716         if (r) {
717                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
718                 return r;
719         }
720         WREG32(scratch, 0xCAFEDEAD);
721         r = amdgpu_ring_alloc(ring, 3);
722         if (r) {
723                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
724                           ring->idx, r);
725                 amdgpu_gfx_scratch_free(adev, scratch);
726                 return r;
727         }
728         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
729         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
730         amdgpu_ring_write(ring, 0xDEADBEEF);
731         amdgpu_ring_commit(ring);
732
733         for (i = 0; i < adev->usec_timeout; i++) {
734                 tmp = RREG32(scratch);
735                 if (tmp == 0xDEADBEEF)
736                         break;
737                 DRM_UDELAY(1);
738         }
739         if (i < adev->usec_timeout) {
740                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
741                          ring->idx, i);
742         } else {
743                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
744                           ring->idx, scratch, tmp);
745                 r = -EINVAL;
746         }
747         amdgpu_gfx_scratch_free(adev, scratch);
748         return r;
749 }
750
751 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
752 {
753         struct amdgpu_device *adev = ring->adev;
754         struct amdgpu_ib ib;
755         struct fence *f = NULL;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         long r;
759
760         r = amdgpu_gfx_scratch_get(adev, &scratch);
761         if (r) {
762                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
763                 return r;
764         }
765         WREG32(scratch, 0xCAFEDEAD);
766         memset(&ib, 0, sizeof(ib));
767         r = amdgpu_ib_get(adev, NULL, 256, &ib);
768         if (r) {
769                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
770                 goto err1;
771         }
772         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
773         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
774         ib.ptr[2] = 0xDEADBEEF;
775         ib.length_dw = 3;
776
777         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
778         if (r)
779                 goto err2;
780
781         r = fence_wait_timeout(f, false, timeout);
782         if (r == 0) {
783                 DRM_ERROR("amdgpu: IB test timed out.\n");
784                 r = -ETIMEDOUT;
785                 goto err2;
786         } else if (r < 0) {
787                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
788                 goto err2;
789         }
790         tmp = RREG32(scratch);
791         if (tmp == 0xDEADBEEF) {
792                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
793                 r = 0;
794         } else {
795                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
796                           scratch, tmp);
797                 r = -EINVAL;
798         }
799 err2:
800         amdgpu_ib_free(adev, &ib, NULL);
801         fence_put(f);
802 err1:
803         amdgpu_gfx_scratch_free(adev, scratch);
804         return r;
805 }
806
807
808 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
809         release_firmware(adev->gfx.pfp_fw);
810         adev->gfx.pfp_fw = NULL;
811         release_firmware(adev->gfx.me_fw);
812         adev->gfx.me_fw = NULL;
813         release_firmware(adev->gfx.ce_fw);
814         adev->gfx.ce_fw = NULL;
815         release_firmware(adev->gfx.rlc_fw);
816         adev->gfx.rlc_fw = NULL;
817         release_firmware(adev->gfx.mec_fw);
818         adev->gfx.mec_fw = NULL;
819         if ((adev->asic_type != CHIP_STONEY) &&
820             (adev->asic_type != CHIP_TOPAZ))
821                 release_firmware(adev->gfx.mec2_fw);
822         adev->gfx.mec2_fw = NULL;
823
824         kfree(adev->gfx.rlc.register_list_format);
825 }
826
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
828 {
829         const char *chip_name;
830         char fw_name[30];
831         int err;
832         struct amdgpu_firmware_info *info = NULL;
833         const struct common_firmware_header *header = NULL;
834         const struct gfx_firmware_header_v1_0 *cp_hdr;
835         const struct rlc_firmware_header_v2_0 *rlc_hdr;
836         unsigned int *tmp = NULL, i;
837
838         DRM_DEBUG("\n");
839
840         switch (adev->asic_type) {
841         case CHIP_TOPAZ:
842                 chip_name = "topaz";
843                 break;
844         case CHIP_TONGA:
845                 chip_name = "tonga";
846                 break;
847         case CHIP_CARRIZO:
848                 chip_name = "carrizo";
849                 break;
850         case CHIP_FIJI:
851                 chip_name = "fiji";
852                 break;
853         case CHIP_POLARIS11:
854                 chip_name = "polaris11";
855                 break;
856         case CHIP_POLARIS10:
857                 chip_name = "polaris10";
858                 break;
859         case CHIP_STONEY:
860                 chip_name = "stoney";
861                 break;
862         default:
863                 BUG();
864         }
865
866         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
867         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
868         if (err)
869                 goto out;
870         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
871         if (err)
872                 goto out;
873         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
874         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
875         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
876
877         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
878         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
879         if (err)
880                 goto out;
881         err = amdgpu_ucode_validate(adev->gfx.me_fw);
882         if (err)
883                 goto out;
884         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
885         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
886         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
887
888         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
889         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
890         if (err)
891                 goto out;
892         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
893         if (err)
894                 goto out;
895         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
896         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
897         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
898
899         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
900         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
901         if (err)
902                 goto out;
903         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
904         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
905         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
906         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
907
908         adev->gfx.rlc.save_and_restore_offset =
909                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
910         adev->gfx.rlc.clear_state_descriptor_offset =
911                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
912         adev->gfx.rlc.avail_scratch_ram_locations =
913                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
914         adev->gfx.rlc.reg_restore_list_size =
915                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
916         adev->gfx.rlc.reg_list_format_start =
917                         le32_to_cpu(rlc_hdr->reg_list_format_start);
918         adev->gfx.rlc.reg_list_format_separate_start =
919                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
920         adev->gfx.rlc.starting_offsets_start =
921                         le32_to_cpu(rlc_hdr->starting_offsets_start);
922         adev->gfx.rlc.reg_list_format_size_bytes =
923                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
924         adev->gfx.rlc.reg_list_size_bytes =
925                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
926
927         adev->gfx.rlc.register_list_format =
928                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
929                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
930
931         if (!adev->gfx.rlc.register_list_format) {
932                 err = -ENOMEM;
933                 goto out;
934         }
935
936         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
937                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
938         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
939                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
940
941         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
942
943         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
944                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
945         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
946                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
947
948         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
950         if (err)
951                 goto out;
952         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
953         if (err)
954                 goto out;
955         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
956         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
957         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
958
959         if ((adev->asic_type != CHIP_STONEY) &&
960             (adev->asic_type != CHIP_TOPAZ)) {
961                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
962                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
963                 if (!err) {
964                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
965                         if (err)
966                                 goto out;
967                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
968                                 adev->gfx.mec2_fw->data;
969                         adev->gfx.mec2_fw_version =
970                                 le32_to_cpu(cp_hdr->header.ucode_version);
971                         adev->gfx.mec2_feature_version =
972                                 le32_to_cpu(cp_hdr->ucode_feature_version);
973                 } else {
974                         err = 0;
975                         adev->gfx.mec2_fw = NULL;
976                 }
977         }
978
979         if (adev->firmware.smu_load) {
980                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
981                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
982                 info->fw = adev->gfx.pfp_fw;
983                 header = (const struct common_firmware_header *)info->fw->data;
984                 adev->firmware.fw_size +=
985                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
986
987                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
988                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
989                 info->fw = adev->gfx.me_fw;
990                 header = (const struct common_firmware_header *)info->fw->data;
991                 adev->firmware.fw_size +=
992                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
993
994                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
995                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
996                 info->fw = adev->gfx.ce_fw;
997                 header = (const struct common_firmware_header *)info->fw->data;
998                 adev->firmware.fw_size +=
999                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1000
1001                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1002                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1003                 info->fw = adev->gfx.rlc_fw;
1004                 header = (const struct common_firmware_header *)info->fw->data;
1005                 adev->firmware.fw_size +=
1006                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1007
1008                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1009                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1010                 info->fw = adev->gfx.mec_fw;
1011                 header = (const struct common_firmware_header *)info->fw->data;
1012                 adev->firmware.fw_size +=
1013                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1014
1015                 if (adev->gfx.mec2_fw) {
1016                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1017                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1018                         info->fw = adev->gfx.mec2_fw;
1019                         header = (const struct common_firmware_header *)info->fw->data;
1020                         adev->firmware.fw_size +=
1021                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1022                 }
1023
1024         }
1025
1026 out:
1027         if (err) {
1028                 dev_err(adev->dev,
1029                         "gfx8: Failed to load firmware \"%s\"\n",
1030                         fw_name);
1031                 release_firmware(adev->gfx.pfp_fw);
1032                 adev->gfx.pfp_fw = NULL;
1033                 release_firmware(adev->gfx.me_fw);
1034                 adev->gfx.me_fw = NULL;
1035                 release_firmware(adev->gfx.ce_fw);
1036                 adev->gfx.ce_fw = NULL;
1037                 release_firmware(adev->gfx.rlc_fw);
1038                 adev->gfx.rlc_fw = NULL;
1039                 release_firmware(adev->gfx.mec_fw);
1040                 adev->gfx.mec_fw = NULL;
1041                 release_firmware(adev->gfx.mec2_fw);
1042                 adev->gfx.mec2_fw = NULL;
1043         }
1044         return err;
1045 }
1046
1047 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1048                                     volatile u32 *buffer)
1049 {
1050         u32 count = 0, i;
1051         const struct cs_section_def *sect = NULL;
1052         const struct cs_extent_def *ext = NULL;
1053
1054         if (adev->gfx.rlc.cs_data == NULL)
1055                 return;
1056         if (buffer == NULL)
1057                 return;
1058
1059         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1060         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1061
1062         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1063         buffer[count++] = cpu_to_le32(0x80000000);
1064         buffer[count++] = cpu_to_le32(0x80000000);
1065
1066         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1067                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1068                         if (sect->id == SECT_CONTEXT) {
1069                                 buffer[count++] =
1070                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1071                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1072                                                 PACKET3_SET_CONTEXT_REG_START);
1073                                 for (i = 0; i < ext->reg_count; i++)
1074                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1075                         } else {
1076                                 return;
1077                         }
1078                 }
1079         }
1080
1081         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1082         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1083                         PACKET3_SET_CONTEXT_REG_START);
1084         switch (adev->asic_type) {
1085         case CHIP_TONGA:
1086         case CHIP_POLARIS10:
1087                 buffer[count++] = cpu_to_le32(0x16000012);
1088                 buffer[count++] = cpu_to_le32(0x0000002A);
1089                 break;
1090         case CHIP_POLARIS11:
1091                 buffer[count++] = cpu_to_le32(0x16000012);
1092                 buffer[count++] = cpu_to_le32(0x00000000);
1093                 break;
1094         case CHIP_FIJI:
1095                 buffer[count++] = cpu_to_le32(0x3a00161a);
1096                 buffer[count++] = cpu_to_le32(0x0000002e);
1097                 break;
1098         case CHIP_TOPAZ:
1099         case CHIP_CARRIZO:
1100                 buffer[count++] = cpu_to_le32(0x00000002);
1101                 buffer[count++] = cpu_to_le32(0x00000000);
1102                 break;
1103         case CHIP_STONEY:
1104                 buffer[count++] = cpu_to_le32(0x00000000);
1105                 buffer[count++] = cpu_to_le32(0x00000000);
1106                 break;
1107         default:
1108                 buffer[count++] = cpu_to_le32(0x00000000);
1109                 buffer[count++] = cpu_to_le32(0x00000000);
1110                 break;
1111         }
1112
1113         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1114         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1115
1116         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1117         buffer[count++] = cpu_to_le32(0);
1118 }
1119
1120 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1121 {
1122         const __le32 *fw_data;
1123         volatile u32 *dst_ptr;
1124         int me, i, max_me = 4;
1125         u32 bo_offset = 0;
1126         u32 table_offset, table_size;
1127
1128         if (adev->asic_type == CHIP_CARRIZO)
1129                 max_me = 5;
1130
1131         /* write the cp table buffer */
1132         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1133         for (me = 0; me < max_me; me++) {
1134                 if (me == 0) {
1135                         const struct gfx_firmware_header_v1_0 *hdr =
1136                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1137                         fw_data = (const __le32 *)
1138                                 (adev->gfx.ce_fw->data +
1139                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1140                         table_offset = le32_to_cpu(hdr->jt_offset);
1141                         table_size = le32_to_cpu(hdr->jt_size);
1142                 } else if (me == 1) {
1143                         const struct gfx_firmware_header_v1_0 *hdr =
1144                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1145                         fw_data = (const __le32 *)
1146                                 (adev->gfx.pfp_fw->data +
1147                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1148                         table_offset = le32_to_cpu(hdr->jt_offset);
1149                         table_size = le32_to_cpu(hdr->jt_size);
1150                 } else if (me == 2) {
1151                         const struct gfx_firmware_header_v1_0 *hdr =
1152                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1153                         fw_data = (const __le32 *)
1154                                 (adev->gfx.me_fw->data +
1155                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1156                         table_offset = le32_to_cpu(hdr->jt_offset);
1157                         table_size = le32_to_cpu(hdr->jt_size);
1158                 } else if (me == 3) {
1159                         const struct gfx_firmware_header_v1_0 *hdr =
1160                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1161                         fw_data = (const __le32 *)
1162                                 (adev->gfx.mec_fw->data +
1163                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1164                         table_offset = le32_to_cpu(hdr->jt_offset);
1165                         table_size = le32_to_cpu(hdr->jt_size);
1166                 } else  if (me == 4) {
1167                         const struct gfx_firmware_header_v1_0 *hdr =
1168                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1169                         fw_data = (const __le32 *)
1170                                 (adev->gfx.mec2_fw->data +
1171                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1172                         table_offset = le32_to_cpu(hdr->jt_offset);
1173                         table_size = le32_to_cpu(hdr->jt_size);
1174                 }
1175
1176                 for (i = 0; i < table_size; i ++) {
1177                         dst_ptr[bo_offset + i] =
1178                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1179                 }
1180
1181                 bo_offset += table_size;
1182         }
1183 }
1184
1185 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1186 {
1187         int r;
1188
1189         /* clear state block */
1190         if (adev->gfx.rlc.clear_state_obj) {
1191                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1192                 if (unlikely(r != 0))
1193                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1194                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1195                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1196                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1197                 adev->gfx.rlc.clear_state_obj = NULL;
1198         }
1199
1200         /* jump table block */
1201         if (adev->gfx.rlc.cp_table_obj) {
1202                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1203                 if (unlikely(r != 0))
1204                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1205                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1206                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1207                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1208                 adev->gfx.rlc.cp_table_obj = NULL;
1209         }
1210 }
1211
1212 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1213 {
1214         volatile u32 *dst_ptr;
1215         u32 dws;
1216         const struct cs_section_def *cs_data;
1217         int r;
1218
1219         adev->gfx.rlc.cs_data = vi_cs_data;
1220
1221         cs_data = adev->gfx.rlc.cs_data;
1222
1223         if (cs_data) {
1224                 /* clear state block */
1225                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1226
1227                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1228                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1229                                              AMDGPU_GEM_DOMAIN_VRAM,
1230                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1231                                              NULL, NULL,
1232                                              &adev->gfx.rlc.clear_state_obj);
1233                         if (r) {
1234                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1235                                 gfx_v8_0_rlc_fini(adev);
1236                                 return r;
1237                         }
1238                 }
1239                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1240                 if (unlikely(r != 0)) {
1241                         gfx_v8_0_rlc_fini(adev);
1242                         return r;
1243                 }
1244                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1245                                   &adev->gfx.rlc.clear_state_gpu_addr);
1246                 if (r) {
1247                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1248                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1249                         gfx_v8_0_rlc_fini(adev);
1250                         return r;
1251                 }
1252
1253                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1254                 if (r) {
1255                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1256                         gfx_v8_0_rlc_fini(adev);
1257                         return r;
1258                 }
1259                 /* set up the cs buffer */
1260                 dst_ptr = adev->gfx.rlc.cs_ptr;
1261                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1262                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1263                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1264         }
1265
1266         if ((adev->asic_type == CHIP_CARRIZO) ||
1267             (adev->asic_type == CHIP_STONEY)) {
1268                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1269                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1270                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1271                                              AMDGPU_GEM_DOMAIN_VRAM,
1272                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1273                                              NULL, NULL,
1274                                              &adev->gfx.rlc.cp_table_obj);
1275                         if (r) {
1276                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1277                                 return r;
1278                         }
1279                 }
1280
1281                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1282                 if (unlikely(r != 0)) {
1283                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1284                         return r;
1285                 }
1286                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1287                                   &adev->gfx.rlc.cp_table_gpu_addr);
1288                 if (r) {
1289                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1290                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1291                         return r;
1292                 }
1293                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1294                 if (r) {
1295                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1296                         return r;
1297                 }
1298
1299                 cz_init_cp_jump_table(adev);
1300
1301                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1302                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1303         }
1304
1305         return 0;
1306 }
1307
1308 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1309 {
1310         int r;
1311
1312         if (adev->gfx.mec.hpd_eop_obj) {
1313                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1314                 if (unlikely(r != 0))
1315                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1316                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1317                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1318                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1319                 adev->gfx.mec.hpd_eop_obj = NULL;
1320         }
1321 }
1322
1323 #define MEC_HPD_SIZE 2048
1324
1325 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1326 {
1327         int r;
1328         u32 *hpd;
1329
1330         /*
1331          * we assign only 1 pipe because all other pipes will
1332          * be handled by KFD
1333          */
1334         adev->gfx.mec.num_mec = 1;
1335         adev->gfx.mec.num_pipe = 1;
1336         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1337
1338         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1339                 r = amdgpu_bo_create(adev,
1340                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1341                                      PAGE_SIZE, true,
1342                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1343                                      &adev->gfx.mec.hpd_eop_obj);
1344                 if (r) {
1345                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1346                         return r;
1347                 }
1348         }
1349
1350         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1351         if (unlikely(r != 0)) {
1352                 gfx_v8_0_mec_fini(adev);
1353                 return r;
1354         }
1355         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1356                           &adev->gfx.mec.hpd_eop_gpu_addr);
1357         if (r) {
1358                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1359                 gfx_v8_0_mec_fini(adev);
1360                 return r;
1361         }
1362         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1363         if (r) {
1364                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1365                 gfx_v8_0_mec_fini(adev);
1366                 return r;
1367         }
1368
1369         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1370
1371         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1372         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1373
1374         return 0;
1375 }
1376
1377 static const u32 vgpr_init_compute_shader[] =
1378 {
1379         0x7e000209, 0x7e020208,
1380         0x7e040207, 0x7e060206,
1381         0x7e080205, 0x7e0a0204,
1382         0x7e0c0203, 0x7e0e0202,
1383         0x7e100201, 0x7e120200,
1384         0x7e140209, 0x7e160208,
1385         0x7e180207, 0x7e1a0206,
1386         0x7e1c0205, 0x7e1e0204,
1387         0x7e200203, 0x7e220202,
1388         0x7e240201, 0x7e260200,
1389         0x7e280209, 0x7e2a0208,
1390         0x7e2c0207, 0x7e2e0206,
1391         0x7e300205, 0x7e320204,
1392         0x7e340203, 0x7e360202,
1393         0x7e380201, 0x7e3a0200,
1394         0x7e3c0209, 0x7e3e0208,
1395         0x7e400207, 0x7e420206,
1396         0x7e440205, 0x7e460204,
1397         0x7e480203, 0x7e4a0202,
1398         0x7e4c0201, 0x7e4e0200,
1399         0x7e500209, 0x7e520208,
1400         0x7e540207, 0x7e560206,
1401         0x7e580205, 0x7e5a0204,
1402         0x7e5c0203, 0x7e5e0202,
1403         0x7e600201, 0x7e620200,
1404         0x7e640209, 0x7e660208,
1405         0x7e680207, 0x7e6a0206,
1406         0x7e6c0205, 0x7e6e0204,
1407         0x7e700203, 0x7e720202,
1408         0x7e740201, 0x7e760200,
1409         0x7e780209, 0x7e7a0208,
1410         0x7e7c0207, 0x7e7e0206,
1411         0xbf8a0000, 0xbf810000,
1412 };
1413
1414 static const u32 sgpr_init_compute_shader[] =
1415 {
1416         0xbe8a0100, 0xbe8c0102,
1417         0xbe8e0104, 0xbe900106,
1418         0xbe920108, 0xbe940100,
1419         0xbe960102, 0xbe980104,
1420         0xbe9a0106, 0xbe9c0108,
1421         0xbe9e0100, 0xbea00102,
1422         0xbea20104, 0xbea40106,
1423         0xbea60108, 0xbea80100,
1424         0xbeaa0102, 0xbeac0104,
1425         0xbeae0106, 0xbeb00108,
1426         0xbeb20100, 0xbeb40102,
1427         0xbeb60104, 0xbeb80106,
1428         0xbeba0108, 0xbebc0100,
1429         0xbebe0102, 0xbec00104,
1430         0xbec20106, 0xbec40108,
1431         0xbec60100, 0xbec80102,
1432         0xbee60004, 0xbee70005,
1433         0xbeea0006, 0xbeeb0007,
1434         0xbee80008, 0xbee90009,
1435         0xbefc0000, 0xbf8a0000,
1436         0xbf810000, 0x00000000,
1437 };
1438
1439 static const u32 vgpr_init_regs[] =
1440 {
1441         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1442         mmCOMPUTE_RESOURCE_LIMITS, 0,
1443         mmCOMPUTE_NUM_THREAD_X, 256*4,
1444         mmCOMPUTE_NUM_THREAD_Y, 1,
1445         mmCOMPUTE_NUM_THREAD_Z, 1,
1446         mmCOMPUTE_PGM_RSRC2, 20,
1447         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1448         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1449         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1450         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1451         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1452         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1453         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1454         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1455         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1456         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1457 };
1458
1459 static const u32 sgpr1_init_regs[] =
1460 {
1461         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1462         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1463         mmCOMPUTE_NUM_THREAD_X, 256*5,
1464         mmCOMPUTE_NUM_THREAD_Y, 1,
1465         mmCOMPUTE_NUM_THREAD_Z, 1,
1466         mmCOMPUTE_PGM_RSRC2, 20,
1467         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr2_init_regs[] =
1480 {
1481         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1482         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483         mmCOMPUTE_NUM_THREAD_X, 256*5,
1484         mmCOMPUTE_NUM_THREAD_Y, 1,
1485         mmCOMPUTE_NUM_THREAD_Z, 1,
1486         mmCOMPUTE_PGM_RSRC2, 20,
1487         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sec_ded_counter_registers[] =
1500 {
1501         mmCPC_EDC_ATC_CNT,
1502         mmCPC_EDC_SCRATCH_CNT,
1503         mmCPC_EDC_UCODE_CNT,
1504         mmCPF_EDC_ATC_CNT,
1505         mmCPF_EDC_ROQ_CNT,
1506         mmCPF_EDC_TAG_CNT,
1507         mmCPG_EDC_ATC_CNT,
1508         mmCPG_EDC_DMA_CNT,
1509         mmCPG_EDC_TAG_CNT,
1510         mmDC_EDC_CSINVOC_CNT,
1511         mmDC_EDC_RESTORE_CNT,
1512         mmDC_EDC_STATE_CNT,
1513         mmGDS_EDC_CNT,
1514         mmGDS_EDC_GRBM_CNT,
1515         mmGDS_EDC_OA_DED,
1516         mmSPI_EDC_CNT,
1517         mmSQC_ATC_EDC_GATCL1_CNT,
1518         mmSQC_EDC_CNT,
1519         mmSQ_EDC_DED_CNT,
1520         mmSQ_EDC_INFO,
1521         mmSQ_EDC_SEC_CNT,
1522         mmTCC_EDC_CNT,
1523         mmTCP_ATC_EDC_GATCL1_CNT,
1524         mmTCP_EDC_CNT,
1525         mmTD_EDC_CNT
1526 };
1527
1528 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1529 {
1530         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1531         struct amdgpu_ib ib;
1532         struct fence *f = NULL;
1533         int r, i;
1534         u32 tmp;
1535         unsigned total_size, vgpr_offset, sgpr_offset;
1536         u64 gpu_addr;
1537
1538         /* only supported on CZ */
1539         if (adev->asic_type != CHIP_CARRIZO)
1540                 return 0;
1541
1542         /* bail if the compute ring is not ready */
1543         if (!ring->ready)
1544                 return 0;
1545
1546         tmp = RREG32(mmGB_EDC_MODE);
1547         WREG32(mmGB_EDC_MODE, 0);
1548
1549         total_size =
1550                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1551         total_size +=
1552                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1553         total_size +=
1554                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555         total_size = ALIGN(total_size, 256);
1556         vgpr_offset = total_size;
1557         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1558         sgpr_offset = total_size;
1559         total_size += sizeof(sgpr_init_compute_shader);
1560
1561         /* allocate an indirect buffer to put the commands in */
1562         memset(&ib, 0, sizeof(ib));
1563         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1564         if (r) {
1565                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1566                 return r;
1567         }
1568
1569         /* load the compute shaders */
1570         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1571                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1572
1573         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1574                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1575
1576         /* init the ib length to 0 */
1577         ib.length_dw = 0;
1578
1579         /* VGPR */
1580         /* write the register state for the compute dispatch */
1581         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1582                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1583                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1584                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1585         }
1586         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1587         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1588         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1589         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1590         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1591         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1592
1593         /* write dispatch packet */
1594         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1595         ib.ptr[ib.length_dw++] = 8; /* x */
1596         ib.ptr[ib.length_dw++] = 1; /* y */
1597         ib.ptr[ib.length_dw++] = 1; /* z */
1598         ib.ptr[ib.length_dw++] =
1599                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1600
1601         /* write CS partial flush packet */
1602         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1603         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1604
1605         /* SGPR1 */
1606         /* write the register state for the compute dispatch */
1607         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1608                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1609                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1610                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1611         }
1612         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1613         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1615         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1616         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1617         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1618
1619         /* write dispatch packet */
1620         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1621         ib.ptr[ib.length_dw++] = 8; /* x */
1622         ib.ptr[ib.length_dw++] = 1; /* y */
1623         ib.ptr[ib.length_dw++] = 1; /* z */
1624         ib.ptr[ib.length_dw++] =
1625                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1626
1627         /* write CS partial flush packet */
1628         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1629         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1630
1631         /* SGPR2 */
1632         /* write the register state for the compute dispatch */
1633         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1634                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1635                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1636                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1637         }
1638         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1639         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1641         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1642         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1643         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1644
1645         /* write dispatch packet */
1646         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1647         ib.ptr[ib.length_dw++] = 8; /* x */
1648         ib.ptr[ib.length_dw++] = 1; /* y */
1649         ib.ptr[ib.length_dw++] = 1; /* z */
1650         ib.ptr[ib.length_dw++] =
1651                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1652
1653         /* write CS partial flush packet */
1654         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1655         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1656
1657         /* shedule the ib on the ring */
1658         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1659         if (r) {
1660                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1661                 goto fail;
1662         }
1663
1664         /* wait for the GPU to finish processing the IB */
1665         r = fence_wait(f, false);
1666         if (r) {
1667                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1668                 goto fail;
1669         }
1670
1671         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1672         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1673         WREG32(mmGB_EDC_MODE, tmp);
1674
1675         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1676         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1677         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1678
1679
1680         /* read back registers to clear the counters */
1681         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1682                 RREG32(sec_ded_counter_registers[i]);
1683
1684 fail:
1685         amdgpu_ib_free(adev, &ib, NULL);
1686         fence_put(f);
1687
1688         return r;
1689 }
1690
1691 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1692 {
1693         u32 gb_addr_config;
1694         u32 mc_shared_chmap, mc_arb_ramcfg;
1695         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1696         u32 tmp;
1697         int ret;
1698
1699         switch (adev->asic_type) {
1700         case CHIP_TOPAZ:
1701                 adev->gfx.config.max_shader_engines = 1;
1702                 adev->gfx.config.max_tile_pipes = 2;
1703                 adev->gfx.config.max_cu_per_sh = 6;
1704                 adev->gfx.config.max_sh_per_se = 1;
1705                 adev->gfx.config.max_backends_per_se = 2;
1706                 adev->gfx.config.max_texture_channel_caches = 2;
1707                 adev->gfx.config.max_gprs = 256;
1708                 adev->gfx.config.max_gs_threads = 32;
1709                 adev->gfx.config.max_hw_contexts = 8;
1710
1711                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1712                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1713                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1714                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1715                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1716                 break;
1717         case CHIP_FIJI:
1718                 adev->gfx.config.max_shader_engines = 4;
1719                 adev->gfx.config.max_tile_pipes = 16;
1720                 adev->gfx.config.max_cu_per_sh = 16;
1721                 adev->gfx.config.max_sh_per_se = 1;
1722                 adev->gfx.config.max_backends_per_se = 4;
1723                 adev->gfx.config.max_texture_channel_caches = 16;
1724                 adev->gfx.config.max_gprs = 256;
1725                 adev->gfx.config.max_gs_threads = 32;
1726                 adev->gfx.config.max_hw_contexts = 8;
1727
1728                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1729                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1730                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1731                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1732                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1733                 break;
1734         case CHIP_POLARIS11:
1735                 ret = amdgpu_atombios_get_gfx_info(adev);
1736                 if (ret)
1737                         return ret;
1738                 adev->gfx.config.max_gprs = 256;
1739                 adev->gfx.config.max_gs_threads = 32;
1740                 adev->gfx.config.max_hw_contexts = 8;
1741
1742                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1743                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1744                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1745                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1746                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1747                 break;
1748         case CHIP_POLARIS10:
1749                 ret = amdgpu_atombios_get_gfx_info(adev);
1750                 if (ret)
1751                         return ret;
1752                 adev->gfx.config.max_gprs = 256;
1753                 adev->gfx.config.max_gs_threads = 32;
1754                 adev->gfx.config.max_hw_contexts = 8;
1755
1756                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1757                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1758                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1759                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1760                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1761                 break;
1762         case CHIP_TONGA:
1763                 adev->gfx.config.max_shader_engines = 4;
1764                 adev->gfx.config.max_tile_pipes = 8;
1765                 adev->gfx.config.max_cu_per_sh = 8;
1766                 adev->gfx.config.max_sh_per_se = 1;
1767                 adev->gfx.config.max_backends_per_se = 2;
1768                 adev->gfx.config.max_texture_channel_caches = 8;
1769                 adev->gfx.config.max_gprs = 256;
1770                 adev->gfx.config.max_gs_threads = 32;
1771                 adev->gfx.config.max_hw_contexts = 8;
1772
1773                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1774                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1775                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1776                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1777                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1778                 break;
1779         case CHIP_CARRIZO:
1780                 adev->gfx.config.max_shader_engines = 1;
1781                 adev->gfx.config.max_tile_pipes = 2;
1782                 adev->gfx.config.max_sh_per_se = 1;
1783                 adev->gfx.config.max_backends_per_se = 2;
1784
1785                 switch (adev->pdev->revision) {
1786                 case 0xc4:
1787                 case 0x84:
1788                 case 0xc8:
1789                 case 0xcc:
1790                 case 0xe1:
1791                 case 0xe3:
1792                         /* B10 */
1793                         adev->gfx.config.max_cu_per_sh = 8;
1794                         break;
1795                 case 0xc5:
1796                 case 0x81:
1797                 case 0x85:
1798                 case 0xc9:
1799                 case 0xcd:
1800                 case 0xe2:
1801                 case 0xe4:
1802                         /* B8 */
1803                         adev->gfx.config.max_cu_per_sh = 6;
1804                         break;
1805                 case 0xc6:
1806                 case 0xca:
1807                 case 0xce:
1808                 case 0x88:
1809                         /* B6 */
1810                         adev->gfx.config.max_cu_per_sh = 6;
1811                         break;
1812                 case 0xc7:
1813                 case 0x87:
1814                 case 0xcb:
1815                 case 0xe5:
1816                 case 0x89:
1817                 default:
1818                         /* B4 */
1819                         adev->gfx.config.max_cu_per_sh = 4;
1820                         break;
1821                 }
1822
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 32;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         case CHIP_STONEY:
1835                 adev->gfx.config.max_shader_engines = 1;
1836                 adev->gfx.config.max_tile_pipes = 2;
1837                 adev->gfx.config.max_sh_per_se = 1;
1838                 adev->gfx.config.max_backends_per_se = 1;
1839
1840                 switch (adev->pdev->revision) {
1841                 case 0xc0:
1842                 case 0xc1:
1843                 case 0xc2:
1844                 case 0xc4:
1845                 case 0xc8:
1846                 case 0xc9:
1847                         adev->gfx.config.max_cu_per_sh = 3;
1848                         break;
1849                 case 0xd0:
1850                 case 0xd1:
1851                 case 0xd2:
1852                 default:
1853                         adev->gfx.config.max_cu_per_sh = 2;
1854                         break;
1855                 }
1856
1857                 adev->gfx.config.max_texture_channel_caches = 2;
1858                 adev->gfx.config.max_gprs = 256;
1859                 adev->gfx.config.max_gs_threads = 16;
1860                 adev->gfx.config.max_hw_contexts = 8;
1861
1862                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1867                 break;
1868         default:
1869                 adev->gfx.config.max_shader_engines = 2;
1870                 adev->gfx.config.max_tile_pipes = 4;
1871                 adev->gfx.config.max_cu_per_sh = 2;
1872                 adev->gfx.config.max_sh_per_se = 1;
1873                 adev->gfx.config.max_backends_per_se = 2;
1874                 adev->gfx.config.max_texture_channel_caches = 4;
1875                 adev->gfx.config.max_gprs = 256;
1876                 adev->gfx.config.max_gs_threads = 32;
1877                 adev->gfx.config.max_hw_contexts = 8;
1878
1879                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1884                 break;
1885         }
1886
1887         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1890
1891         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892         adev->gfx.config.mem_max_burst_length_bytes = 256;
1893         if (adev->flags & AMD_IS_APU) {
1894                 /* Get memory bank mapping mode. */
1895                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1898
1899                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1902
1903                 /* Validate settings in case only one DIMM installed. */
1904                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905                         dimm00_addr_map = 0;
1906                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907                         dimm01_addr_map = 0;
1908                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909                         dimm10_addr_map = 0;
1910                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911                         dimm11_addr_map = 0;
1912
1913                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916                         adev->gfx.config.mem_row_size_in_kb = 2;
1917                 else
1918                         adev->gfx.config.mem_row_size_in_kb = 1;
1919         } else {
1920                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1923                         adev->gfx.config.mem_row_size_in_kb = 4;
1924         }
1925
1926         adev->gfx.config.shader_engine_tile_size = 32;
1927         adev->gfx.config.num_gpus = 1;
1928         adev->gfx.config.multi_gpu_tile_size = 64;
1929
1930         /* fix up row size */
1931         switch (adev->gfx.config.mem_row_size_in_kb) {
1932         case 1:
1933         default:
1934                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1935                 break;
1936         case 2:
1937                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1938                 break;
1939         case 4:
1940                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1941                 break;
1942         }
1943         adev->gfx.config.gb_addr_config = gb_addr_config;
1944
1945         return 0;
1946 }
1947
1948 static int gfx_v8_0_sw_init(void *handle)
1949 {
1950         int i, r;
1951         struct amdgpu_ring *ring;
1952         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1953
1954         /* EOP Event */
1955         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1956         if (r)
1957                 return r;
1958
1959         /* Privileged reg */
1960         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1961         if (r)
1962                 return r;
1963
1964         /* Privileged inst */
1965         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1966         if (r)
1967                 return r;
1968
1969         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1970
1971         gfx_v8_0_scratch_init(adev);
1972
1973         r = gfx_v8_0_init_microcode(adev);
1974         if (r) {
1975                 DRM_ERROR("Failed to load gfx firmware!\n");
1976                 return r;
1977         }
1978
1979         r = gfx_v8_0_rlc_init(adev);
1980         if (r) {
1981                 DRM_ERROR("Failed to init rlc BOs!\n");
1982                 return r;
1983         }
1984
1985         r = gfx_v8_0_mec_init(adev);
1986         if (r) {
1987                 DRM_ERROR("Failed to init MEC BOs!\n");
1988                 return r;
1989         }
1990
1991         /* set up the gfx ring */
1992         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1993                 ring = &adev->gfx.gfx_ring[i];
1994                 ring->ring_obj = NULL;
1995                 sprintf(ring->name, "gfx");
1996                 /* no gfx doorbells on iceland */
1997                 if (adev->asic_type != CHIP_TOPAZ) {
1998                         ring->use_doorbell = true;
1999                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2000                 }
2001
2002                 r = amdgpu_ring_init(adev, ring, 1024,
2003                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2004                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2005                                      AMDGPU_RING_TYPE_GFX);
2006                 if (r)
2007                         return r;
2008         }
2009
2010         /* set up the compute queues */
2011         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2012                 unsigned irq_type;
2013
2014                 /* max 32 queues per MEC */
2015                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2016                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2017                         break;
2018                 }
2019                 ring = &adev->gfx.compute_ring[i];
2020                 ring->ring_obj = NULL;
2021                 ring->use_doorbell = true;
2022                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2023                 ring->me = 1; /* first MEC */
2024                 ring->pipe = i / 8;
2025                 ring->queue = i % 8;
2026                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2027                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2028                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2029                 r = amdgpu_ring_init(adev, ring, 1024,
2030                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2031                                      &adev->gfx.eop_irq, irq_type,
2032                                      AMDGPU_RING_TYPE_COMPUTE);
2033                 if (r)
2034                         return r;
2035         }
2036
2037         /* reserve GDS, GWS and OA resource for gfx */
2038         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2039                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2040                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2041         if (r)
2042                 return r;
2043
2044         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2045                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2046                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2047         if (r)
2048                 return r;
2049
2050         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2051                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2052                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2053         if (r)
2054                 return r;
2055
2056         adev->gfx.ce_ram_size = 0x8000;
2057
2058         r = gfx_v8_0_gpu_early_init(adev);
2059         if (r)
2060                 return r;
2061
2062         return 0;
2063 }
2064
2065 static int gfx_v8_0_sw_fini(void *handle)
2066 {
2067         int i;
2068         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2069
2070         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2071         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2072         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2073
2074         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2075                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2076         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2077                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2078
2079         gfx_v8_0_mec_fini(adev);
2080         gfx_v8_0_rlc_fini(adev);
2081         gfx_v8_0_free_microcode(adev);
2082
2083         return 0;
2084 }
2085
2086 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2087 {
2088         uint32_t *modearray, *mod2array;
2089         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2090         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2091         u32 reg_offset;
2092
2093         modearray = adev->gfx.config.tile_mode_array;
2094         mod2array = adev->gfx.config.macrotile_mode_array;
2095
2096         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2097                 modearray[reg_offset] = 0;
2098
2099         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2100                 mod2array[reg_offset] = 0;
2101
2102         switch (adev->asic_type) {
2103         case CHIP_TOPAZ:
2104                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105                                 PIPE_CONFIG(ADDR_SURF_P2) |
2106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2108                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2109                                 PIPE_CONFIG(ADDR_SURF_P2) |
2110                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2112                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2113                                 PIPE_CONFIG(ADDR_SURF_P2) |
2114                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2116                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117                                 PIPE_CONFIG(ADDR_SURF_P2) |
2118                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2120                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121                                 PIPE_CONFIG(ADDR_SURF_P2) |
2122                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2124                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125                                 PIPE_CONFIG(ADDR_SURF_P2) |
2126                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2129                                 PIPE_CONFIG(ADDR_SURF_P2) |
2130                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2132                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2133                                 PIPE_CONFIG(ADDR_SURF_P2));
2134                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2137                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                  PIPE_CONFIG(ADDR_SURF_P2) |
2140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                  PIPE_CONFIG(ADDR_SURF_P2) |
2144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2146                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147                                  PIPE_CONFIG(ADDR_SURF_P2) |
2148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2150                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                  PIPE_CONFIG(ADDR_SURF_P2) |
2152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2154                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2155                                  PIPE_CONFIG(ADDR_SURF_P2) |
2156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                  PIPE_CONFIG(ADDR_SURF_P2) |
2160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2162                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2163                                  PIPE_CONFIG(ADDR_SURF_P2) |
2164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2166                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2167                                  PIPE_CONFIG(ADDR_SURF_P2) |
2168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2170                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2174                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2178                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2182                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2186                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2190                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2194                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2206
2207                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210                                 NUM_BANKS(ADDR_SURF_8_BANK));
2211                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2214                                 NUM_BANKS(ADDR_SURF_8_BANK));
2215                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2218                                 NUM_BANKS(ADDR_SURF_8_BANK));
2219                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2221                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2222                                 NUM_BANKS(ADDR_SURF_8_BANK));
2223                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2226                                 NUM_BANKS(ADDR_SURF_8_BANK));
2227                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2228                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2229                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2230                                 NUM_BANKS(ADDR_SURF_8_BANK));
2231                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2233                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2234                                 NUM_BANKS(ADDR_SURF_8_BANK));
2235                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2236                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2237                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238                                 NUM_BANKS(ADDR_SURF_16_BANK));
2239                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242                                 NUM_BANKS(ADDR_SURF_16_BANK));
2243                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2244                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246                                  NUM_BANKS(ADDR_SURF_16_BANK));
2247                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2249                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2250                                  NUM_BANKS(ADDR_SURF_16_BANK));
2251                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2253                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254                                  NUM_BANKS(ADDR_SURF_16_BANK));
2255                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2258                                  NUM_BANKS(ADDR_SURF_16_BANK));
2259                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                  NUM_BANKS(ADDR_SURF_8_BANK));
2263
2264                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2265                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2266                             reg_offset != 23)
2267                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2268
2269                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2270                         if (reg_offset != 7)
2271                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2272
2273                 break;
2274         case CHIP_FIJI:
2275                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2296                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2304                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2307                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2308                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2309                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2312                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2321                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2334                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2338                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2341                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2345                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2346                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2350                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2354                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2366                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2373                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2377                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2397
2398                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401                                 NUM_BANKS(ADDR_SURF_8_BANK));
2402                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2405                                 NUM_BANKS(ADDR_SURF_8_BANK));
2406                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2409                                 NUM_BANKS(ADDR_SURF_8_BANK));
2410                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2413                                 NUM_BANKS(ADDR_SURF_8_BANK));
2414                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2417                                 NUM_BANKS(ADDR_SURF_8_BANK));
2418                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2421                                 NUM_BANKS(ADDR_SURF_8_BANK));
2422                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425                                 NUM_BANKS(ADDR_SURF_8_BANK));
2426                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2428                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2429                                 NUM_BANKS(ADDR_SURF_8_BANK));
2430                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433                                 NUM_BANKS(ADDR_SURF_8_BANK));
2434                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437                                  NUM_BANKS(ADDR_SURF_8_BANK));
2438                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2441                                  NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2444                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                  NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                  NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453                                  NUM_BANKS(ADDR_SURF_4_BANK));
2454
2455                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2456                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2457
2458                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2459                         if (reg_offset != 7)
2460                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2461
2462                 break;
2463         case CHIP_TONGA:
2464                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2467                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2468                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2471                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2472                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2473                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2475                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2476                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2479                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2483                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2487                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2488                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2491                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2492                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2495                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2496                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2497                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2498                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2501                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2507                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2509                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2510                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2511                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2514                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2522                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2530                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2535                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2538                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2539                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2541                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2542                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2543                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2546                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2550                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2555                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2562                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2566                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2586
2587                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2589                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2590                                 NUM_BANKS(ADDR_SURF_16_BANK));
2591                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594                                 NUM_BANKS(ADDR_SURF_16_BANK));
2595                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2597                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2598                                 NUM_BANKS(ADDR_SURF_16_BANK));
2599                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2601                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2602                                 NUM_BANKS(ADDR_SURF_16_BANK));
2603                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2625                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2626                                  NUM_BANKS(ADDR_SURF_16_BANK));
2627                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630                                  NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                  NUM_BANKS(ADDR_SURF_8_BANK));
2635                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638                                  NUM_BANKS(ADDR_SURF_4_BANK));
2639                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                  NUM_BANKS(ADDR_SURF_4_BANK));
2643
2644                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2645                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2646
2647                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648                         if (reg_offset != 7)
2649                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2650
2651                 break;
2652         case CHIP_POLARIS11:
2653                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2656                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2657                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2660                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2661                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2664                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2687                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2688                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2699                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2703                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2727                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2731                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775
2776                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2778                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779                                 NUM_BANKS(ADDR_SURF_16_BANK));
2780
2781                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2783                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2784                                 NUM_BANKS(ADDR_SURF_16_BANK));
2785
2786                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2788                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2789                                 NUM_BANKS(ADDR_SURF_16_BANK));
2790
2791                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794                                 NUM_BANKS(ADDR_SURF_16_BANK));
2795
2796                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799                                 NUM_BANKS(ADDR_SURF_16_BANK));
2800
2801                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804                                 NUM_BANKS(ADDR_SURF_16_BANK));
2805
2806                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2808                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2809                                 NUM_BANKS(ADDR_SURF_16_BANK));
2810
2811                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2812                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2813                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2814                                 NUM_BANKS(ADDR_SURF_16_BANK));
2815
2816                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820
2821                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_8_BANK));
2840
2841                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844                                 NUM_BANKS(ADDR_SURF_4_BANK));
2845
2846                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2847                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2848
2849                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2850                         if (reg_offset != 7)
2851                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2852
2853                 break;
2854         case CHIP_POLARIS10:
2855                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2859                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2889                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2890                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2892                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2896                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2905                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2925                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2929                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2933                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977
2978                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981                                 NUM_BANKS(ADDR_SURF_16_BANK));
2982
2983                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987
2988                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992
2993                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2995                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996                                 NUM_BANKS(ADDR_SURF_16_BANK));
2997
2998                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3000                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3001                                 NUM_BANKS(ADDR_SURF_16_BANK));
3002
3003                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007
3008                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3011                                 NUM_BANKS(ADDR_SURF_16_BANK));
3012
3013                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3015                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016                                 NUM_BANKS(ADDR_SURF_16_BANK));
3017
3018                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021                                 NUM_BANKS(ADDR_SURF_16_BANK));
3022
3023                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3036                                 NUM_BANKS(ADDR_SURF_8_BANK));
3037
3038                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041                                 NUM_BANKS(ADDR_SURF_4_BANK));
3042
3043                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3046                                 NUM_BANKS(ADDR_SURF_4_BANK));
3047
3048                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3049                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3050
3051                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3052                         if (reg_offset != 7)
3053                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3054
3055                 break;
3056         case CHIP_STONEY:
3057                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P2) |
3059                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3061                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P2) |
3063                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3064                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3065                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P2) |
3067                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P2) |
3071                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2));
3087                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3088                                 PIPE_CONFIG(ADDR_SURF_P2) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3090                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092                                  PIPE_CONFIG(ADDR_SURF_P2) |
3093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3095                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3096                                  PIPE_CONFIG(ADDR_SURF_P2) |
3097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3099                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3100                                  PIPE_CONFIG(ADDR_SURF_P2) |
3101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104                                  PIPE_CONFIG(ADDR_SURF_P2) |
3105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3119                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3123                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3159
3160                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163                                 NUM_BANKS(ADDR_SURF_8_BANK));
3164                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3167                                 NUM_BANKS(ADDR_SURF_8_BANK));
3168                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171                                 NUM_BANKS(ADDR_SURF_8_BANK));
3172                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175                                 NUM_BANKS(ADDR_SURF_8_BANK));
3176                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3191                                 NUM_BANKS(ADDR_SURF_16_BANK));
3192                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                 NUM_BANKS(ADDR_SURF_16_BANK));
3196                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3197                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3198                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                  NUM_BANKS(ADDR_SURF_16_BANK));
3200                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3201                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3202                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                  NUM_BANKS(ADDR_SURF_16_BANK));
3204                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3206                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                  NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                  NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                  NUM_BANKS(ADDR_SURF_8_BANK));
3216
3217                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3218                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3219                             reg_offset != 23)
3220                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3221
3222                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3223                         if (reg_offset != 7)
3224                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3225
3226                 break;
3227         default:
3228                 dev_warn(adev->dev,
3229                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3230                          adev->asic_type);
3231
3232         case CHIP_CARRIZO:
3233                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234                                 PIPE_CONFIG(ADDR_SURF_P2) |
3235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3238                                 PIPE_CONFIG(ADDR_SURF_P2) |
3239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2) |
3243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                 PIPE_CONFIG(ADDR_SURF_P2) |
3247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250                                 PIPE_CONFIG(ADDR_SURF_P2) |
3251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3254                                 PIPE_CONFIG(ADDR_SURF_P2) |
3255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2));
3263                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3264                                 PIPE_CONFIG(ADDR_SURF_P2) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3266                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3271                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3275                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3283                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3291                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3335
3336                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339                                 NUM_BANKS(ADDR_SURF_8_BANK));
3340                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343                                 NUM_BANKS(ADDR_SURF_8_BANK));
3344                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3347                                 NUM_BANKS(ADDR_SURF_8_BANK));
3348                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3351                                 NUM_BANKS(ADDR_SURF_8_BANK));
3352                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                 NUM_BANKS(ADDR_SURF_8_BANK));
3356                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359                                 NUM_BANKS(ADDR_SURF_8_BANK));
3360                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                 NUM_BANKS(ADDR_SURF_16_BANK));
3368                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371                                 NUM_BANKS(ADDR_SURF_16_BANK));
3372                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                  NUM_BANKS(ADDR_SURF_16_BANK));
3376                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3377                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3378                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3382                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383                                  NUM_BANKS(ADDR_SURF_16_BANK));
3384                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387                                  NUM_BANKS(ADDR_SURF_16_BANK));
3388                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391                                  NUM_BANKS(ADDR_SURF_8_BANK));
3392
3393                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3394                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3395                             reg_offset != 23)
3396                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3397
3398                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3399                         if (reg_offset != 7)
3400                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3401
3402                 break;
3403         }
3404 }
3405
3406 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3407                                   u32 se_num, u32 sh_num, u32 instance)
3408 {
3409         u32 data;
3410
3411         if (instance == 0xffffffff)
3412                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3413         else
3414                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3415
3416         if (se_num == 0xffffffff)
3417                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3418         else
3419                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3420
3421         if (sh_num == 0xffffffff)
3422                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3423         else
3424                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3425
3426         WREG32(mmGRBM_GFX_INDEX, data);
3427 }
3428
3429 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3430 {
3431         return (u32)((1ULL << bit_width) - 1);
3432 }
3433
3434 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3435 {
3436         u32 data, mask;
3437
3438         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3439                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3440
3441         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3442
3443         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3444                                        adev->gfx.config.max_sh_per_se);
3445
3446         return (~data) & mask;
3447 }
3448
3449 static void
3450 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3451 {
3452         switch (adev->asic_type) {
3453         case CHIP_FIJI:
3454                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3455                           RB_XSEL2(1) | PKR_MAP(2) |
3456                           PKR_XSEL(1) | PKR_YSEL(1) |
3457                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3458                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3459                            SE_PAIR_YSEL(2);
3460                 break;
3461         case CHIP_TONGA:
3462         case CHIP_POLARIS10:
3463                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3464                           SE_XSEL(1) | SE_YSEL(1);
3465                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3466                            SE_PAIR_YSEL(2);
3467                 break;
3468         case CHIP_TOPAZ:
3469         case CHIP_CARRIZO:
3470                 *rconf |= RB_MAP_PKR0(2);
3471                 *rconf1 |= 0x0;
3472                 break;
3473         case CHIP_POLARIS11:
3474                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3475                           SE_XSEL(1) | SE_YSEL(1);
3476                 *rconf1 |= 0x0;
3477                 break;
3478         case CHIP_STONEY:
3479                 *rconf |= 0x0;
3480                 *rconf1 |= 0x0;
3481                 break;
3482         default:
3483                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3484                 break;
3485         }
3486 }
3487
3488 static void
3489 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3490                                         u32 raster_config, u32 raster_config_1,
3491                                         unsigned rb_mask, unsigned num_rb)
3492 {
3493         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3494         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3495         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3496         unsigned rb_per_se = num_rb / num_se;
3497         unsigned se_mask[4];
3498         unsigned se;
3499
3500         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3501         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3502         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3503         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3504
3505         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3506         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3507         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3508
3509         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3510                              (!se_mask[2] && !se_mask[3]))) {
3511                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3512
3513                 if (!se_mask[0] && !se_mask[1]) {
3514                         raster_config_1 |=
3515                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3516                 } else {
3517                         raster_config_1 |=
3518                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3519                 }
3520         }
3521
3522         for (se = 0; se < num_se; se++) {
3523                 unsigned raster_config_se = raster_config;
3524                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3525                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3526                 int idx = (se / 2) * 2;
3527
3528                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3529                         raster_config_se &= ~SE_MAP_MASK;
3530
3531                         if (!se_mask[idx]) {
3532                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3533                         } else {
3534                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3535                         }
3536                 }
3537
3538                 pkr0_mask &= rb_mask;
3539                 pkr1_mask &= rb_mask;
3540                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3541                         raster_config_se &= ~PKR_MAP_MASK;
3542
3543                         if (!pkr0_mask) {
3544                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3545                         } else {
3546                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3547                         }
3548                 }
3549
3550                 if (rb_per_se >= 2) {
3551                         unsigned rb0_mask = 1 << (se * rb_per_se);
3552                         unsigned rb1_mask = rb0_mask << 1;
3553
3554                         rb0_mask &= rb_mask;
3555                         rb1_mask &= rb_mask;
3556                         if (!rb0_mask || !rb1_mask) {
3557                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3558
3559                                 if (!rb0_mask) {
3560                                         raster_config_se |=
3561                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3562                                 } else {
3563                                         raster_config_se |=
3564                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3565                                 }
3566                         }
3567
3568                         if (rb_per_se > 2) {
3569                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3570                                 rb1_mask = rb0_mask << 1;
3571                                 rb0_mask &= rb_mask;
3572                                 rb1_mask &= rb_mask;
3573                                 if (!rb0_mask || !rb1_mask) {
3574                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3575
3576                                         if (!rb0_mask) {
3577                                                 raster_config_se |=
3578                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3579                                         } else {
3580                                                 raster_config_se |=
3581                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3582                                         }
3583                                 }
3584                         }
3585                 }
3586
3587                 /* GRBM_GFX_INDEX has a different offset on VI */
3588                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3589                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3590                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3591         }
3592
3593         /* GRBM_GFX_INDEX has a different offset on VI */
3594         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3595 }
3596
3597 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3598 {
3599         int i, j;
3600         u32 data;
3601         u32 raster_config = 0, raster_config_1 = 0;
3602         u32 active_rbs = 0;
3603         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3604                                         adev->gfx.config.max_sh_per_se;
3605         unsigned num_rb_pipes;
3606
3607         mutex_lock(&adev->grbm_idx_mutex);
3608         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3609                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3610                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3611                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3612                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3613                                                rb_bitmap_width_per_sh);
3614                 }
3615         }
3616         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3617
3618         adev->gfx.config.backend_enable_mask = active_rbs;
3619         adev->gfx.config.num_rbs = hweight32(active_rbs);
3620
3621         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3622                              adev->gfx.config.max_shader_engines, 16);
3623
3624         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3625
3626         if (!adev->gfx.config.backend_enable_mask ||
3627                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3628                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3629                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3630         } else {
3631                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3632                                                         adev->gfx.config.backend_enable_mask,
3633                                                         num_rb_pipes);
3634         }
3635
3636         mutex_unlock(&adev->grbm_idx_mutex);
3637 }
3638
3639 /**
3640  * gfx_v8_0_init_compute_vmid - gart enable
3641  *
3642  * @rdev: amdgpu_device pointer
3643  *
3644  * Initialize compute vmid sh_mem registers
3645  *
3646  */
3647 #define DEFAULT_SH_MEM_BASES    (0x6000)
3648 #define FIRST_COMPUTE_VMID      (8)
3649 #define LAST_COMPUTE_VMID       (16)
3650 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3651 {
3652         int i;
3653         uint32_t sh_mem_config;
3654         uint32_t sh_mem_bases;
3655
3656         /*
3657          * Configure apertures:
3658          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3659          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3660          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3661          */
3662         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3663
3664         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3665                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3666                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3667                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3668                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3669                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3670
3671         mutex_lock(&adev->srbm_mutex);
3672         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3673                 vi_srbm_select(adev, 0, 0, 0, i);
3674                 /* CP and shaders */
3675                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3676                 WREG32(mmSH_MEM_APE1_BASE, 1);
3677                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3678                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3679         }
3680         vi_srbm_select(adev, 0, 0, 0, 0);
3681         mutex_unlock(&adev->srbm_mutex);
3682 }
3683
3684 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3685 {
3686         u32 tmp;
3687         int i;
3688
3689         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3690         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3691         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3692         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3693
3694         gfx_v8_0_tiling_mode_table_init(adev);
3695         gfx_v8_0_setup_rb(adev);
3696         gfx_v8_0_get_cu_info(adev);
3697
3698         /* XXX SH_MEM regs */
3699         /* where to put LDS, scratch, GPUVM in FSA64 space */
3700         mutex_lock(&adev->srbm_mutex);
3701         for (i = 0; i < 16; i++) {
3702                 vi_srbm_select(adev, 0, 0, 0, i);
3703                 /* CP and shaders */
3704                 if (i == 0) {
3705                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3706                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3707                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3708                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3709                         WREG32(mmSH_MEM_CONFIG, tmp);
3710                 } else {
3711                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3712                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3713                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3714                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3715                         WREG32(mmSH_MEM_CONFIG, tmp);
3716                 }
3717
3718                 WREG32(mmSH_MEM_APE1_BASE, 1);
3719                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3720                 WREG32(mmSH_MEM_BASES, 0);
3721         }
3722         vi_srbm_select(adev, 0, 0, 0, 0);
3723         mutex_unlock(&adev->srbm_mutex);
3724
3725         gfx_v8_0_init_compute_vmid(adev);
3726
3727         mutex_lock(&adev->grbm_idx_mutex);
3728         /*
3729          * making sure that the following register writes will be broadcasted
3730          * to all the shaders
3731          */
3732         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3733
3734         WREG32(mmPA_SC_FIFO_SIZE,
3735                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3736                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3737                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3738                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3739                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3740                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3741                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3742                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3743         mutex_unlock(&adev->grbm_idx_mutex);
3744
3745 }
3746
3747 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3748 {
3749         u32 i, j, k;
3750         u32 mask;
3751
3752         mutex_lock(&adev->grbm_idx_mutex);
3753         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3754                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3755                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3756                         for (k = 0; k < adev->usec_timeout; k++) {
3757                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3758                                         break;
3759                                 udelay(1);
3760                         }
3761                 }
3762         }
3763         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3764         mutex_unlock(&adev->grbm_idx_mutex);
3765
3766         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3767                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3768                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3769                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3770         for (k = 0; k < adev->usec_timeout; k++) {
3771                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3772                         break;
3773                 udelay(1);
3774         }
3775 }
3776
3777 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3778                                                bool enable)
3779 {
3780         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3781
3782         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3783         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3784         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3786
3787         WREG32(mmCP_INT_CNTL_RING0, tmp);
3788 }
3789
3790 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3791 {
3792         /* csib */
3793         WREG32(mmRLC_CSIB_ADDR_HI,
3794                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3795         WREG32(mmRLC_CSIB_ADDR_LO,
3796                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3797         WREG32(mmRLC_CSIB_LENGTH,
3798                         adev->gfx.rlc.clear_state_size);
3799 }
3800
3801 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3802                                 int ind_offset,
3803                                 int list_size,
3804                                 int *unique_indices,
3805                                 int *indices_count,
3806                                 int max_indices,
3807                                 int *ind_start_offsets,
3808                                 int *offset_count,
3809                                 int max_offset)
3810 {
3811         int indices;
3812         bool new_entry = true;
3813
3814         for (; ind_offset < list_size; ind_offset++) {
3815
3816                 if (new_entry) {
3817                         new_entry = false;
3818                         ind_start_offsets[*offset_count] = ind_offset;
3819                         *offset_count = *offset_count + 1;
3820                         BUG_ON(*offset_count >= max_offset);
3821                 }
3822
3823                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3824                         new_entry = true;
3825                         continue;
3826                 }
3827
3828                 ind_offset += 2;
3829
3830                 /* look for the matching indice */
3831                 for (indices = 0;
3832                         indices < *indices_count;
3833                         indices++) {
3834                         if (unique_indices[indices] ==
3835                                 register_list_format[ind_offset])
3836                                 break;
3837                 }
3838
3839                 if (indices >= *indices_count) {
3840                         unique_indices[*indices_count] =
3841                                 register_list_format[ind_offset];
3842                         indices = *indices_count;
3843                         *indices_count = *indices_count + 1;
3844                         BUG_ON(*indices_count >= max_indices);
3845                 }
3846
3847                 register_list_format[ind_offset] = indices;
3848         }
3849 }
3850
3851 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3852 {
3853         int i, temp, data;
3854         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3855         int indices_count = 0;
3856         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3857         int offset_count = 0;
3858
3859         int list_size;
3860         unsigned int *register_list_format =
3861                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3862         if (register_list_format == NULL)
3863                 return -ENOMEM;
3864         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3865                         adev->gfx.rlc.reg_list_format_size_bytes);
3866
3867         gfx_v8_0_parse_ind_reg_list(register_list_format,
3868                                 RLC_FormatDirectRegListLength,
3869                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3870                                 unique_indices,
3871                                 &indices_count,
3872                                 sizeof(unique_indices) / sizeof(int),
3873                                 indirect_start_offsets,
3874                                 &offset_count,
3875                                 sizeof(indirect_start_offsets)/sizeof(int));
3876
3877         /* save and restore list */
3878         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3879
3880         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3881         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3882                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3883
3884         /* indirect list */
3885         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3886         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3887                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3888
3889         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3890         list_size = list_size >> 1;
3891         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3892         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3893
3894         /* starting offsets starts */
3895         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3896                 adev->gfx.rlc.starting_offsets_start);
3897         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3898                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3899                                 indirect_start_offsets[i]);
3900
3901         /* unique indices */
3902         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3903         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3904         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3905                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3906                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3907         }
3908         kfree(register_list_format);
3909
3910         return 0;
3911 }
3912
3913 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3914 {
3915         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3916 }
3917
3918 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3919 {
3920         uint32_t data;
3921
3922         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3923                               AMD_PG_SUPPORT_GFX_SMG |
3924                               AMD_PG_SUPPORT_GFX_DMG)) {
3925                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3926
3927                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3928                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3929                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3930                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3931                 WREG32(mmRLC_PG_DELAY, data);
3932
3933                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3934                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3935         }
3936 }
3937
3938 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3939                                                 bool enable)
3940 {
3941         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3942 }
3943
3944 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3945                                                   bool enable)
3946 {
3947         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3948 }
3949
3950 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3951 {
3952         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3953 }
3954
3955 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3956 {
3957         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3958                               AMD_PG_SUPPORT_GFX_SMG |
3959                               AMD_PG_SUPPORT_GFX_DMG |
3960                               AMD_PG_SUPPORT_CP |
3961                               AMD_PG_SUPPORT_GDS |
3962                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3963                 gfx_v8_0_init_csb(adev);
3964                 gfx_v8_0_init_save_restore_list(adev);
3965                 gfx_v8_0_enable_save_restore_machine(adev);
3966
3967                 if ((adev->asic_type == CHIP_CARRIZO) ||
3968                     (adev->asic_type == CHIP_STONEY)) {
3969                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3970                         gfx_v8_0_init_power_gating(adev);
3971                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3972                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3973                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3974                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3975                         } else {
3976                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3977                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3978                         }
3979                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3980                                 cz_enable_cp_power_gating(adev, true);
3981                         else
3982                                 cz_enable_cp_power_gating(adev, false);
3983                 } else if (adev->asic_type == CHIP_POLARIS11) {
3984                         gfx_v8_0_init_power_gating(adev);
3985                 }
3986         }
3987 }
3988
3989 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3990 {
3991         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3992
3993         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3994         gfx_v8_0_wait_for_rlc_serdes(adev);
3995 }
3996
3997 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3998 {
3999         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4000         udelay(50);
4001
4002         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4003         udelay(50);
4004 }
4005
4006 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4007 {
4008         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4009
4010         /* carrizo do enable cp interrupt after cp inited */
4011         if (!(adev->flags & AMD_IS_APU))
4012                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4013
4014         udelay(50);
4015 }
4016
4017 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4018 {
4019         const struct rlc_firmware_header_v2_0 *hdr;
4020         const __le32 *fw_data;
4021         unsigned i, fw_size;
4022
4023         if (!adev->gfx.rlc_fw)
4024                 return -EINVAL;
4025
4026         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4027         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4028
4029         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4030                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4031         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4032
4033         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4034         for (i = 0; i < fw_size; i++)
4035                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4036         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4037
4038         return 0;
4039 }
4040
4041 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4042 {
4043         int r;
4044         u32 tmp;
4045
4046         gfx_v8_0_rlc_stop(adev);
4047
4048         /* disable CG */
4049         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4050         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4051                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4052         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4053         if (adev->asic_type == CHIP_POLARIS11 ||
4054             adev->asic_type == CHIP_POLARIS10) {
4055                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4056                 tmp &= ~0x3;
4057                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4058         }
4059
4060         /* disable PG */
4061         WREG32(mmRLC_PG_CNTL, 0);
4062
4063         gfx_v8_0_rlc_reset(adev);
4064         gfx_v8_0_init_pg(adev);
4065
4066         if (!adev->pp_enabled) {
4067                 if (!adev->firmware.smu_load) {
4068                         /* legacy rlc firmware loading */
4069                         r = gfx_v8_0_rlc_load_microcode(adev);
4070                         if (r)
4071                                 return r;
4072                 } else {
4073                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4074                                                         AMDGPU_UCODE_ID_RLC_G);
4075                         if (r)
4076                                 return -EINVAL;
4077                 }
4078         }
4079
4080         gfx_v8_0_rlc_start(adev);
4081
4082         return 0;
4083 }
4084
4085 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4086 {
4087         int i;
4088         u32 tmp = RREG32(mmCP_ME_CNTL);
4089
4090         if (enable) {
4091                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4092                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4093                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4094         } else {
4095                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4096                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4097                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4098                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4099                         adev->gfx.gfx_ring[i].ready = false;
4100         }
4101         WREG32(mmCP_ME_CNTL, tmp);
4102         udelay(50);
4103 }
4104
4105 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4106 {
4107         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4108         const struct gfx_firmware_header_v1_0 *ce_hdr;
4109         const struct gfx_firmware_header_v1_0 *me_hdr;
4110         const __le32 *fw_data;
4111         unsigned i, fw_size;
4112
4113         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4114                 return -EINVAL;
4115
4116         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4117                 adev->gfx.pfp_fw->data;
4118         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4119                 adev->gfx.ce_fw->data;
4120         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4121                 adev->gfx.me_fw->data;
4122
4123         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4124         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4125         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4126
4127         gfx_v8_0_cp_gfx_enable(adev, false);
4128
4129         /* PFP */
4130         fw_data = (const __le32 *)
4131                 (adev->gfx.pfp_fw->data +
4132                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4133         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4134         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4135         for (i = 0; i < fw_size; i++)
4136                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4137         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4138
4139         /* CE */
4140         fw_data = (const __le32 *)
4141                 (adev->gfx.ce_fw->data +
4142                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4143         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4144         WREG32(mmCP_CE_UCODE_ADDR, 0);
4145         for (i = 0; i < fw_size; i++)
4146                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4147         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4148
4149         /* ME */
4150         fw_data = (const __le32 *)
4151                 (adev->gfx.me_fw->data +
4152                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4153         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4154         WREG32(mmCP_ME_RAM_WADDR, 0);
4155         for (i = 0; i < fw_size; i++)
4156                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4157         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4158
4159         return 0;
4160 }
4161
4162 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4163 {
4164         u32 count = 0;
4165         const struct cs_section_def *sect = NULL;
4166         const struct cs_extent_def *ext = NULL;
4167
4168         /* begin clear state */
4169         count += 2;
4170         /* context control state */
4171         count += 3;
4172
4173         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4174                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4175                         if (sect->id == SECT_CONTEXT)
4176                                 count += 2 + ext->reg_count;
4177                         else
4178                                 return 0;
4179                 }
4180         }
4181         /* pa_sc_raster_config/pa_sc_raster_config1 */
4182         count += 4;
4183         /* end clear state */
4184         count += 2;
4185         /* clear state */
4186         count += 2;
4187
4188         return count;
4189 }
4190
4191 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4192 {
4193         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4194         const struct cs_section_def *sect = NULL;
4195         const struct cs_extent_def *ext = NULL;
4196         int r, i;
4197
4198         /* init the CP */
4199         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4200         WREG32(mmCP_ENDIAN_SWAP, 0);
4201         WREG32(mmCP_DEVICE_ID, 1);
4202
4203         gfx_v8_0_cp_gfx_enable(adev, true);
4204
4205         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4206         if (r) {
4207                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4208                 return r;
4209         }
4210
4211         /* clear state buffer */
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4214
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4216         amdgpu_ring_write(ring, 0x80000000);
4217         amdgpu_ring_write(ring, 0x80000000);
4218
4219         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4221                         if (sect->id == SECT_CONTEXT) {
4222                                 amdgpu_ring_write(ring,
4223                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4224                                                ext->reg_count));
4225                                 amdgpu_ring_write(ring,
4226                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4227                                 for (i = 0; i < ext->reg_count; i++)
4228                                         amdgpu_ring_write(ring, ext->extent[i]);
4229                         }
4230                 }
4231         }
4232
4233         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4234         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4235         switch (adev->asic_type) {
4236         case CHIP_TONGA:
4237         case CHIP_POLARIS10:
4238                 amdgpu_ring_write(ring, 0x16000012);
4239                 amdgpu_ring_write(ring, 0x0000002A);
4240                 break;
4241         case CHIP_POLARIS11:
4242                 amdgpu_ring_write(ring, 0x16000012);
4243                 amdgpu_ring_write(ring, 0x00000000);
4244                 break;
4245         case CHIP_FIJI:
4246                 amdgpu_ring_write(ring, 0x3a00161a);
4247                 amdgpu_ring_write(ring, 0x0000002e);
4248                 break;
4249         case CHIP_CARRIZO:
4250                 amdgpu_ring_write(ring, 0x00000002);
4251                 amdgpu_ring_write(ring, 0x00000000);
4252                 break;
4253         case CHIP_TOPAZ:
4254                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4255                                 0x00000000 : 0x00000002);
4256                 amdgpu_ring_write(ring, 0x00000000);
4257                 break;
4258         case CHIP_STONEY:
4259                 amdgpu_ring_write(ring, 0x00000000);
4260                 amdgpu_ring_write(ring, 0x00000000);
4261                 break;
4262         default:
4263                 BUG();
4264         }
4265
4266         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4267         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4268
4269         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4270         amdgpu_ring_write(ring, 0);
4271
4272         /* init the CE partitions */
4273         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4274         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4275         amdgpu_ring_write(ring, 0x8000);
4276         amdgpu_ring_write(ring, 0x8000);
4277
4278         amdgpu_ring_commit(ring);
4279
4280         return 0;
4281 }
4282
4283 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4284 {
4285         struct amdgpu_ring *ring;
4286         u32 tmp;
4287         u32 rb_bufsz;
4288         u64 rb_addr, rptr_addr;
4289         int r;
4290
4291         /* Set the write pointer delay */
4292         WREG32(mmCP_RB_WPTR_DELAY, 0);
4293
4294         /* set the RB to use vmid 0 */
4295         WREG32(mmCP_RB_VMID, 0);
4296
4297         /* Set ring buffer size */
4298         ring = &adev->gfx.gfx_ring[0];
4299         rb_bufsz = order_base_2(ring->ring_size / 8);
4300         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4301         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4302         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4303         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4304 #ifdef __BIG_ENDIAN
4305         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4306 #endif
4307         WREG32(mmCP_RB0_CNTL, tmp);
4308
4309         /* Initialize the ring buffer's read and write pointers */
4310         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4311         ring->wptr = 0;
4312         WREG32(mmCP_RB0_WPTR, ring->wptr);
4313
4314         /* set the wb address wether it's enabled or not */
4315         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4316         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4317         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4318
4319         mdelay(1);
4320         WREG32(mmCP_RB0_CNTL, tmp);
4321
4322         rb_addr = ring->gpu_addr >> 8;
4323         WREG32(mmCP_RB0_BASE, rb_addr);
4324         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4325
4326         /* no gfx doorbells on iceland */
4327         if (adev->asic_type != CHIP_TOPAZ) {
4328                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4329                 if (ring->use_doorbell) {
4330                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4331                                             DOORBELL_OFFSET, ring->doorbell_index);
4332                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4333                                             DOORBELL_HIT, 0);
4334                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4335                                             DOORBELL_EN, 1);
4336                 } else {
4337                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4338                                             DOORBELL_EN, 0);
4339                 }
4340                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4341
4342                 if (adev->asic_type == CHIP_TONGA) {
4343                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4344                                             DOORBELL_RANGE_LOWER,
4345                                             AMDGPU_DOORBELL_GFX_RING0);
4346                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4347
4348                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4349                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4350                 }
4351
4352         }
4353
4354         /* start the ring */
4355         gfx_v8_0_cp_gfx_start(adev);
4356         ring->ready = true;
4357         r = amdgpu_ring_test_ring(ring);
4358         if (r)
4359                 ring->ready = false;
4360
4361         return r;
4362 }
4363
4364 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4365 {
4366         int i;
4367
4368         if (enable) {
4369                 WREG32(mmCP_MEC_CNTL, 0);
4370         } else {
4371                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4372                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4373                         adev->gfx.compute_ring[i].ready = false;
4374         }
4375         udelay(50);
4376 }
4377
4378 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4379 {
4380         const struct gfx_firmware_header_v1_0 *mec_hdr;
4381         const __le32 *fw_data;
4382         unsigned i, fw_size;
4383
4384         if (!adev->gfx.mec_fw)
4385                 return -EINVAL;
4386
4387         gfx_v8_0_cp_compute_enable(adev, false);
4388
4389         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4390         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4391
4392         fw_data = (const __le32 *)
4393                 (adev->gfx.mec_fw->data +
4394                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4395         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4396
4397         /* MEC1 */
4398         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4399         for (i = 0; i < fw_size; i++)
4400                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4401         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4402
4403         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4404         if (adev->gfx.mec2_fw) {
4405                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4406
4407                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4408                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4409
4410                 fw_data = (const __le32 *)
4411                         (adev->gfx.mec2_fw->data +
4412                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4413                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4414
4415                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4416                 for (i = 0; i < fw_size; i++)
4417                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4418                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4419         }
4420
4421         return 0;
4422 }
4423
4424 struct vi_mqd {
4425         uint32_t header;  /* ordinal0 */
4426         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4427         uint32_t compute_dim_x;  /* ordinal2 */
4428         uint32_t compute_dim_y;  /* ordinal3 */
4429         uint32_t compute_dim_z;  /* ordinal4 */
4430         uint32_t compute_start_x;  /* ordinal5 */
4431         uint32_t compute_start_y;  /* ordinal6 */
4432         uint32_t compute_start_z;  /* ordinal7 */
4433         uint32_t compute_num_thread_x;  /* ordinal8 */
4434         uint32_t compute_num_thread_y;  /* ordinal9 */
4435         uint32_t compute_num_thread_z;  /* ordinal10 */
4436         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4437         uint32_t compute_perfcount_enable;  /* ordinal12 */
4438         uint32_t compute_pgm_lo;  /* ordinal13 */
4439         uint32_t compute_pgm_hi;  /* ordinal14 */
4440         uint32_t compute_tba_lo;  /* ordinal15 */
4441         uint32_t compute_tba_hi;  /* ordinal16 */
4442         uint32_t compute_tma_lo;  /* ordinal17 */
4443         uint32_t compute_tma_hi;  /* ordinal18 */
4444         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4445         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4446         uint32_t compute_vmid;  /* ordinal21 */
4447         uint32_t compute_resource_limits;  /* ordinal22 */
4448         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4449         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4450         uint32_t compute_tmpring_size;  /* ordinal25 */
4451         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4452         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4453         uint32_t compute_restart_x;  /* ordinal28 */
4454         uint32_t compute_restart_y;  /* ordinal29 */
4455         uint32_t compute_restart_z;  /* ordinal30 */
4456         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4457         uint32_t compute_misc_reserved;  /* ordinal32 */
4458         uint32_t compute_dispatch_id;  /* ordinal33 */
4459         uint32_t compute_threadgroup_id;  /* ordinal34 */
4460         uint32_t compute_relaunch;  /* ordinal35 */
4461         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4462         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4463         uint32_t compute_wave_restore_control;  /* ordinal38 */
4464         uint32_t reserved9;  /* ordinal39 */
4465         uint32_t reserved10;  /* ordinal40 */
4466         uint32_t reserved11;  /* ordinal41 */
4467         uint32_t reserved12;  /* ordinal42 */
4468         uint32_t reserved13;  /* ordinal43 */
4469         uint32_t reserved14;  /* ordinal44 */
4470         uint32_t reserved15;  /* ordinal45 */
4471         uint32_t reserved16;  /* ordinal46 */
4472         uint32_t reserved17;  /* ordinal47 */
4473         uint32_t reserved18;  /* ordinal48 */
4474         uint32_t reserved19;  /* ordinal49 */
4475         uint32_t reserved20;  /* ordinal50 */
4476         uint32_t reserved21;  /* ordinal51 */
4477         uint32_t reserved22;  /* ordinal52 */
4478         uint32_t reserved23;  /* ordinal53 */
4479         uint32_t reserved24;  /* ordinal54 */
4480         uint32_t reserved25;  /* ordinal55 */
4481         uint32_t reserved26;  /* ordinal56 */
4482         uint32_t reserved27;  /* ordinal57 */
4483         uint32_t reserved28;  /* ordinal58 */
4484         uint32_t reserved29;  /* ordinal59 */
4485         uint32_t reserved30;  /* ordinal60 */
4486         uint32_t reserved31;  /* ordinal61 */
4487         uint32_t reserved32;  /* ordinal62 */
4488         uint32_t reserved33;  /* ordinal63 */
4489         uint32_t reserved34;  /* ordinal64 */
4490         uint32_t compute_user_data_0;  /* ordinal65 */
4491         uint32_t compute_user_data_1;  /* ordinal66 */
4492         uint32_t compute_user_data_2;  /* ordinal67 */
4493         uint32_t compute_user_data_3;  /* ordinal68 */
4494         uint32_t compute_user_data_4;  /* ordinal69 */
4495         uint32_t compute_user_data_5;  /* ordinal70 */
4496         uint32_t compute_user_data_6;  /* ordinal71 */
4497         uint32_t compute_user_data_7;  /* ordinal72 */
4498         uint32_t compute_user_data_8;  /* ordinal73 */
4499         uint32_t compute_user_data_9;  /* ordinal74 */
4500         uint32_t compute_user_data_10;  /* ordinal75 */
4501         uint32_t compute_user_data_11;  /* ordinal76 */
4502         uint32_t compute_user_data_12;  /* ordinal77 */
4503         uint32_t compute_user_data_13;  /* ordinal78 */
4504         uint32_t compute_user_data_14;  /* ordinal79 */
4505         uint32_t compute_user_data_15;  /* ordinal80 */
4506         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4507         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4508         uint32_t reserved35;  /* ordinal83 */
4509         uint32_t reserved36;  /* ordinal84 */
4510         uint32_t reserved37;  /* ordinal85 */
4511         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4512         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4513         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4514         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4515         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4516         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4517         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4518         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4519         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4520         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4521         uint32_t reserved38;  /* ordinal96 */
4522         uint32_t reserved39;  /* ordinal97 */
4523         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4524         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4525         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4526         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4527         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4528         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4529         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4530         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4531         uint32_t reserved40;  /* ordinal106 */
4532         uint32_t reserved41;  /* ordinal107 */
4533         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4534         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4535         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4536         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4537         uint32_t reserved42;  /* ordinal112 */
4538         uint32_t reserved43;  /* ordinal113 */
4539         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4540         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4541         uint32_t cp_packet_id_lo;  /* ordinal116 */
4542         uint32_t cp_packet_id_hi;  /* ordinal117 */
4543         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4544         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4545         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4546         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4547         uint32_t gds_save_mask_lo;  /* ordinal122 */
4548         uint32_t gds_save_mask_hi;  /* ordinal123 */
4549         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4550         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4551         uint32_t reserved44;  /* ordinal126 */
4552         uint32_t reserved45;  /* ordinal127 */
4553         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4554         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4555         uint32_t cp_hqd_active;  /* ordinal130 */
4556         uint32_t cp_hqd_vmid;  /* ordinal131 */
4557         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4558         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4559         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4560         uint32_t cp_hqd_quantum;  /* ordinal135 */
4561         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4562         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4563         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4564         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4565         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4566         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4567         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4568         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4569         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4570         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4571         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4572         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4573         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4574         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4575         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4576         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4577         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4578         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4579         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4580         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4581         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4582         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4583         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4584         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4585         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4586         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4587         uint32_t cp_mqd_control;  /* ordinal162 */
4588         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4589         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4590         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4591         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4592         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4593         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4594         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4595         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4596         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4597         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4598         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4599         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4600         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4601         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4602         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4603         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4604         uint32_t cp_hqd_error;  /* ordinal179 */
4605         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4606         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4607         uint32_t reserved46;  /* ordinal182 */
4608         uint32_t reserved47;  /* ordinal183 */
4609         uint32_t reserved48;  /* ordinal184 */
4610         uint32_t reserved49;  /* ordinal185 */
4611         uint32_t reserved50;  /* ordinal186 */
4612         uint32_t reserved51;  /* ordinal187 */
4613         uint32_t reserved52;  /* ordinal188 */
4614         uint32_t reserved53;  /* ordinal189 */
4615         uint32_t reserved54;  /* ordinal190 */
4616         uint32_t reserved55;  /* ordinal191 */
4617         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4618         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4619         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4620         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4621         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4622         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4623         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4624         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4625         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4626         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4627         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4628         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4629         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4630         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4631         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4632         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4633         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4634         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4635         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4636         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4637         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4638         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4639         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4640         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4641         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4642         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4643         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4644         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4645         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4646         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4647         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4648         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4649         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4650         uint32_t reserved56;  /* ordinal225 */
4651         uint32_t reserved57;  /* ordinal226 */
4652         uint32_t reserved58;  /* ordinal227 */
4653         uint32_t set_resources_header;  /* ordinal228 */
4654         uint32_t set_resources_dw1;  /* ordinal229 */
4655         uint32_t set_resources_dw2;  /* ordinal230 */
4656         uint32_t set_resources_dw3;  /* ordinal231 */
4657         uint32_t set_resources_dw4;  /* ordinal232 */
4658         uint32_t set_resources_dw5;  /* ordinal233 */
4659         uint32_t set_resources_dw6;  /* ordinal234 */
4660         uint32_t set_resources_dw7;  /* ordinal235 */
4661         uint32_t reserved59;  /* ordinal236 */
4662         uint32_t reserved60;  /* ordinal237 */
4663         uint32_t reserved61;  /* ordinal238 */
4664         uint32_t reserved62;  /* ordinal239 */
4665         uint32_t reserved63;  /* ordinal240 */
4666         uint32_t reserved64;  /* ordinal241 */
4667         uint32_t reserved65;  /* ordinal242 */
4668         uint32_t reserved66;  /* ordinal243 */
4669         uint32_t reserved67;  /* ordinal244 */
4670         uint32_t reserved68;  /* ordinal245 */
4671         uint32_t reserved69;  /* ordinal246 */
4672         uint32_t reserved70;  /* ordinal247 */
4673         uint32_t reserved71;  /* ordinal248 */
4674         uint32_t reserved72;  /* ordinal249 */
4675         uint32_t reserved73;  /* ordinal250 */
4676         uint32_t reserved74;  /* ordinal251 */
4677         uint32_t reserved75;  /* ordinal252 */
4678         uint32_t reserved76;  /* ordinal253 */
4679         uint32_t reserved77;  /* ordinal254 */
4680         uint32_t reserved78;  /* ordinal255 */
4681
4682         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4683 };
4684
4685 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4686 {
4687         int i, r;
4688
4689         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4691
4692                 if (ring->mqd_obj) {
4693                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4694                         if (unlikely(r != 0))
4695                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4696
4697                         amdgpu_bo_unpin(ring->mqd_obj);
4698                         amdgpu_bo_unreserve(ring->mqd_obj);
4699
4700                         amdgpu_bo_unref(&ring->mqd_obj);
4701                         ring->mqd_obj = NULL;
4702                 }
4703         }
4704 }
4705
4706 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4707 {
4708         int r, i, j;
4709         u32 tmp;
4710         bool use_doorbell = true;
4711         u64 hqd_gpu_addr;
4712         u64 mqd_gpu_addr;
4713         u64 eop_gpu_addr;
4714         u64 wb_gpu_addr;
4715         u32 *buf;
4716         struct vi_mqd *mqd;
4717
4718         /* init the pipes */
4719         mutex_lock(&adev->srbm_mutex);
4720         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4721                 int me = (i < 4) ? 1 : 2;
4722                 int pipe = (i < 4) ? i : (i - 4);
4723
4724                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4725                 eop_gpu_addr >>= 8;
4726
4727                 vi_srbm_select(adev, me, pipe, 0, 0);
4728
4729                 /* write the EOP addr */
4730                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4731                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4732
4733                 /* set the VMID assigned */
4734                 WREG32(mmCP_HQD_VMID, 0);
4735
4736                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4737                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4738                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4739                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4740                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4741         }
4742         vi_srbm_select(adev, 0, 0, 0, 0);
4743         mutex_unlock(&adev->srbm_mutex);
4744
4745         /* init the queues.  Just two for now. */
4746         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4748
4749                 if (ring->mqd_obj == NULL) {
4750                         r = amdgpu_bo_create(adev,
4751                                              sizeof(struct vi_mqd),
4752                                              PAGE_SIZE, true,
4753                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4754                                              NULL, &ring->mqd_obj);
4755                         if (r) {
4756                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4757                                 return r;
4758                         }
4759                 }
4760
4761                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4762                 if (unlikely(r != 0)) {
4763                         gfx_v8_0_cp_compute_fini(adev);
4764                         return r;
4765                 }
4766                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4767                                   &mqd_gpu_addr);
4768                 if (r) {
4769                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4770                         gfx_v8_0_cp_compute_fini(adev);
4771                         return r;
4772                 }
4773                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4774                 if (r) {
4775                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4776                         gfx_v8_0_cp_compute_fini(adev);
4777                         return r;
4778                 }
4779
4780                 /* init the mqd struct */
4781                 memset(buf, 0, sizeof(struct vi_mqd));
4782
4783                 mqd = (struct vi_mqd *)buf;
4784                 mqd->header = 0xC0310800;
4785                 mqd->compute_pipelinestat_enable = 0x00000001;
4786                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4787                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4788                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4789                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4790                 mqd->compute_misc_reserved = 0x00000003;
4791
4792                 mutex_lock(&adev->srbm_mutex);
4793                 vi_srbm_select(adev, ring->me,
4794                                ring->pipe,
4795                                ring->queue, 0);
4796
4797                 /* disable wptr polling */
4798                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4799                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4800                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4801
4802                 mqd->cp_hqd_eop_base_addr_lo =
4803                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4804                 mqd->cp_hqd_eop_base_addr_hi =
4805                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4806
4807                 /* enable doorbell? */
4808                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4809                 if (use_doorbell) {
4810                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4811                 } else {
4812                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4813                 }
4814                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4815                 mqd->cp_hqd_pq_doorbell_control = tmp;
4816
4817                 /* disable the queue if it's active */
4818                 mqd->cp_hqd_dequeue_request = 0;
4819                 mqd->cp_hqd_pq_rptr = 0;
4820                 mqd->cp_hqd_pq_wptr= 0;
4821                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4822                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4823                         for (j = 0; j < adev->usec_timeout; j++) {
4824                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4825                                         break;
4826                                 udelay(1);
4827                         }
4828                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4829                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4830                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4831                 }
4832
4833                 /* set the pointer to the MQD */
4834                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4835                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4836                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4837                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4838
4839                 /* set MQD vmid to 0 */
4840                 tmp = RREG32(mmCP_MQD_CONTROL);
4841                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4842                 WREG32(mmCP_MQD_CONTROL, tmp);
4843                 mqd->cp_mqd_control = tmp;
4844
4845                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4846                 hqd_gpu_addr = ring->gpu_addr >> 8;
4847                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4848                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4849                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4850                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4851
4852                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4853                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4854                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4855                                     (order_base_2(ring->ring_size / 4) - 1));
4856                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4857                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4858 #ifdef __BIG_ENDIAN
4859                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4860 #endif
4861                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4862                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4863                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4864                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4865                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4866                 mqd->cp_hqd_pq_control = tmp;
4867
4868                 /* set the wb address wether it's enabled or not */
4869                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4870                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4871                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4872                         upper_32_bits(wb_gpu_addr) & 0xffff;
4873                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4874                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4875                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4876                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4877
4878                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4879                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4880                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4881                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4882                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4883                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4884                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4885
4886                 /* enable the doorbell if requested */
4887                 if (use_doorbell) {
4888                         if ((adev->asic_type == CHIP_CARRIZO) ||
4889                             (adev->asic_type == CHIP_FIJI) ||
4890                             (adev->asic_type == CHIP_STONEY) ||
4891                             (adev->asic_type == CHIP_POLARIS11) ||
4892                             (adev->asic_type == CHIP_POLARIS10)) {
4893                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4894                                        AMDGPU_DOORBELL_KIQ << 2);
4895                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4896                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4897                         }
4898                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4899                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4900                                             DOORBELL_OFFSET, ring->doorbell_index);
4901                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4902                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4903                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4904                         mqd->cp_hqd_pq_doorbell_control = tmp;
4905
4906                 } else {
4907                         mqd->cp_hqd_pq_doorbell_control = 0;
4908                 }
4909                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4910                        mqd->cp_hqd_pq_doorbell_control);
4911
4912                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4913                 ring->wptr = 0;
4914                 mqd->cp_hqd_pq_wptr = ring->wptr;
4915                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4916                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4917
4918                 /* set the vmid for the queue */
4919                 mqd->cp_hqd_vmid = 0;
4920                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4921
4922                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4923                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4924                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4925                 mqd->cp_hqd_persistent_state = tmp;
4926                 if (adev->asic_type == CHIP_STONEY ||
4927                         adev->asic_type == CHIP_POLARIS11 ||
4928                         adev->asic_type == CHIP_POLARIS10) {
4929                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4930                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4931                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4932                 }
4933
4934                 /* activate the queue */
4935                 mqd->cp_hqd_active = 1;
4936                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4937
4938                 vi_srbm_select(adev, 0, 0, 0, 0);
4939                 mutex_unlock(&adev->srbm_mutex);
4940
4941                 amdgpu_bo_kunmap(ring->mqd_obj);
4942                 amdgpu_bo_unreserve(ring->mqd_obj);
4943         }
4944
4945         if (use_doorbell) {
4946                 tmp = RREG32(mmCP_PQ_STATUS);
4947                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4948                 WREG32(mmCP_PQ_STATUS, tmp);
4949         }
4950
4951         gfx_v8_0_cp_compute_enable(adev, true);
4952
4953         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4954                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4955
4956                 ring->ready = true;
4957                 r = amdgpu_ring_test_ring(ring);
4958                 if (r)
4959                         ring->ready = false;
4960         }
4961
4962         return 0;
4963 }
4964
4965 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4966 {
4967         int r;
4968
4969         if (!(adev->flags & AMD_IS_APU))
4970                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4971
4972         if (!adev->pp_enabled) {
4973                 if (!adev->firmware.smu_load) {
4974                         /* legacy firmware loading */
4975                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4976                         if (r)
4977                                 return r;
4978
4979                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4980                         if (r)
4981                                 return r;
4982                 } else {
4983                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4984                                                         AMDGPU_UCODE_ID_CP_CE);
4985                         if (r)
4986                                 return -EINVAL;
4987
4988                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4989                                                         AMDGPU_UCODE_ID_CP_PFP);
4990                         if (r)
4991                                 return -EINVAL;
4992
4993                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4994                                                         AMDGPU_UCODE_ID_CP_ME);
4995                         if (r)
4996                                 return -EINVAL;
4997
4998                         if (adev->asic_type == CHIP_TOPAZ) {
4999                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5000                                 if (r)
5001                                         return r;
5002                         } else {
5003                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5004                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5005                                 if (r)
5006                                         return -EINVAL;
5007                         }
5008                 }
5009         }
5010
5011         r = gfx_v8_0_cp_gfx_resume(adev);
5012         if (r)
5013                 return r;
5014
5015         r = gfx_v8_0_cp_compute_resume(adev);
5016         if (r)
5017                 return r;
5018
5019         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5020
5021         return 0;
5022 }
5023
5024 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5025 {
5026         gfx_v8_0_cp_gfx_enable(adev, enable);
5027         gfx_v8_0_cp_compute_enable(adev, enable);
5028 }
5029
5030 static int gfx_v8_0_hw_init(void *handle)
5031 {
5032         int r;
5033         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034
5035         gfx_v8_0_init_golden_registers(adev);
5036         gfx_v8_0_gpu_init(adev);
5037
5038         r = gfx_v8_0_rlc_resume(adev);
5039         if (r)
5040                 return r;
5041
5042         r = gfx_v8_0_cp_resume(adev);
5043
5044         return r;
5045 }
5046
5047 static int gfx_v8_0_hw_fini(void *handle)
5048 {
5049         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5050
5051         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5052         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5053         gfx_v8_0_cp_enable(adev, false);
5054         gfx_v8_0_rlc_stop(adev);
5055         gfx_v8_0_cp_compute_fini(adev);
5056
5057         amdgpu_set_powergating_state(adev,
5058                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5059
5060         return 0;
5061 }
5062
5063 static int gfx_v8_0_suspend(void *handle)
5064 {
5065         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5066
5067         return gfx_v8_0_hw_fini(adev);
5068 }
5069
5070 static int gfx_v8_0_resume(void *handle)
5071 {
5072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073
5074         return gfx_v8_0_hw_init(adev);
5075 }
5076
5077 static bool gfx_v8_0_is_idle(void *handle)
5078 {
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080
5081         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5082                 return false;
5083         else
5084                 return true;
5085 }
5086
5087 static int gfx_v8_0_wait_for_idle(void *handle)
5088 {
5089         unsigned i;
5090         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5091
5092         for (i = 0; i < adev->usec_timeout; i++) {
5093                 if (gfx_v8_0_is_idle(handle))
5094                         return 0;
5095
5096                 udelay(1);
5097         }
5098         return -ETIMEDOUT;
5099 }
5100
5101 static bool gfx_v8_0_check_soft_reset(void *handle)
5102 {
5103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5104         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5105         u32 tmp;
5106
5107         /* GRBM_STATUS */
5108         tmp = RREG32(mmGRBM_STATUS);
5109         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5110                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5111                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5112                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5113                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5114                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5115                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5116                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5117                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5118                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5119                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5120                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5121                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5122         }
5123
5124         /* GRBM_STATUS2 */
5125         tmp = RREG32(mmGRBM_STATUS2);
5126         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5127                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5128                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5129
5130         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5131             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5132             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5133                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5134                                                 SOFT_RESET_CPF, 1);
5135                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5136                                                 SOFT_RESET_CPC, 1);
5137                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5138                                                 SOFT_RESET_CPG, 1);
5139                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5140                                                 SOFT_RESET_GRBM, 1);
5141         }
5142
5143         /* SRBM_STATUS */
5144         tmp = RREG32(mmSRBM_STATUS);
5145         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5146                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5147                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5148         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5149                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5150                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5151
5152         if (grbm_soft_reset || srbm_soft_reset) {
5153                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5154                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5155                 return true;
5156         } else {
5157                 adev->gfx.grbm_soft_reset = 0;
5158                 adev->gfx.srbm_soft_reset = 0;
5159                 return false;
5160         }
5161 }
5162
5163 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5164                                   struct amdgpu_ring *ring)
5165 {
5166         int i;
5167
5168         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5169         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5170                 u32 tmp;
5171                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5172                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5173                                     DEQUEUE_REQ, 2);
5174                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5175                 for (i = 0; i < adev->usec_timeout; i++) {
5176                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5177                                 break;
5178                         udelay(1);
5179                 }
5180         }
5181 }
5182
5183 static int gfx_v8_0_pre_soft_reset(void *handle)
5184 {
5185         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5186         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5187
5188         if ((!adev->gfx.grbm_soft_reset) &&
5189             (!adev->gfx.srbm_soft_reset))
5190                 return 0;
5191
5192         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5193         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5194
5195         /* stop the rlc */
5196         gfx_v8_0_rlc_stop(adev);
5197
5198         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5199             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5200                 /* Disable GFX parsing/prefetching */
5201                 gfx_v8_0_cp_gfx_enable(adev, false);
5202
5203         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5204             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5205             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5206             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5207                 int i;
5208
5209                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5210                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5211
5212                         gfx_v8_0_inactive_hqd(adev, ring);
5213                 }
5214                 /* Disable MEC parsing/prefetching */
5215                 gfx_v8_0_cp_compute_enable(adev, false);
5216         }
5217
5218        return 0;
5219 }
5220
5221 static int gfx_v8_0_soft_reset(void *handle)
5222 {
5223         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5224         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5225         u32 tmp;
5226
5227         if ((!adev->gfx.grbm_soft_reset) &&
5228             (!adev->gfx.srbm_soft_reset))
5229                 return 0;
5230
5231         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5232         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5233
5234         if (grbm_soft_reset || srbm_soft_reset) {
5235                 tmp = RREG32(mmGMCON_DEBUG);
5236                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5237                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5238                 WREG32(mmGMCON_DEBUG, tmp);
5239                 udelay(50);
5240         }
5241
5242         if (grbm_soft_reset) {
5243                 tmp = RREG32(mmGRBM_SOFT_RESET);
5244                 tmp |= grbm_soft_reset;
5245                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5246                 WREG32(mmGRBM_SOFT_RESET, tmp);
5247                 tmp = RREG32(mmGRBM_SOFT_RESET);
5248
5249                 udelay(50);
5250
5251                 tmp &= ~grbm_soft_reset;
5252                 WREG32(mmGRBM_SOFT_RESET, tmp);
5253                 tmp = RREG32(mmGRBM_SOFT_RESET);
5254         }
5255
5256         if (srbm_soft_reset) {
5257                 tmp = RREG32(mmSRBM_SOFT_RESET);
5258                 tmp |= srbm_soft_reset;
5259                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5260                 WREG32(mmSRBM_SOFT_RESET, tmp);
5261                 tmp = RREG32(mmSRBM_SOFT_RESET);
5262
5263                 udelay(50);
5264
5265                 tmp &= ~srbm_soft_reset;
5266                 WREG32(mmSRBM_SOFT_RESET, tmp);
5267                 tmp = RREG32(mmSRBM_SOFT_RESET);
5268         }
5269
5270         if (grbm_soft_reset || srbm_soft_reset) {
5271                 tmp = RREG32(mmGMCON_DEBUG);
5272                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5273                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5274                 WREG32(mmGMCON_DEBUG, tmp);
5275         }
5276
5277         /* Wait a little for things to settle down */
5278         udelay(50);
5279
5280         return 0;
5281 }
5282
5283 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5284                               struct amdgpu_ring *ring)
5285 {
5286         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5287         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5288         WREG32(mmCP_HQD_PQ_RPTR, 0);
5289         WREG32(mmCP_HQD_PQ_WPTR, 0);
5290         vi_srbm_select(adev, 0, 0, 0, 0);
5291 }
5292
5293 static int gfx_v8_0_post_soft_reset(void *handle)
5294 {
5295         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5296         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5297
5298         if ((!adev->gfx.grbm_soft_reset) &&
5299             (!adev->gfx.srbm_soft_reset))
5300                 return 0;
5301
5302         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5303         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5304
5305         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5306             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5307                 gfx_v8_0_cp_gfx_resume(adev);
5308
5309         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5310             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5311             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5312             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5313                 int i;
5314
5315                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5316                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5317
5318                         gfx_v8_0_init_hqd(adev, ring);
5319                 }
5320                 gfx_v8_0_cp_compute_resume(adev);
5321         }
5322         gfx_v8_0_rlc_start(adev);
5323
5324         return 0;
5325 }
5326
5327 /**
5328  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5329  *
5330  * @adev: amdgpu_device pointer
5331  *
5332  * Fetches a GPU clock counter snapshot.
5333  * Returns the 64 bit clock counter snapshot.
5334  */
5335 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5336 {
5337         uint64_t clock;
5338
5339         mutex_lock(&adev->gfx.gpu_clock_mutex);
5340         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5341         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5342                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5343         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5344         return clock;
5345 }
5346
5347 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5348                                           uint32_t vmid,
5349                                           uint32_t gds_base, uint32_t gds_size,
5350                                           uint32_t gws_base, uint32_t gws_size,
5351                                           uint32_t oa_base, uint32_t oa_size)
5352 {
5353         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5354         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5355
5356         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5357         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5358
5359         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5360         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5361
5362         /* GDS Base */
5363         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5364         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5365                                 WRITE_DATA_DST_SEL(0)));
5366         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5367         amdgpu_ring_write(ring, 0);
5368         amdgpu_ring_write(ring, gds_base);
5369
5370         /* GDS Size */
5371         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5372         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5373                                 WRITE_DATA_DST_SEL(0)));
5374         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5375         amdgpu_ring_write(ring, 0);
5376         amdgpu_ring_write(ring, gds_size);
5377
5378         /* GWS */
5379         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5380         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5381                                 WRITE_DATA_DST_SEL(0)));
5382         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5383         amdgpu_ring_write(ring, 0);
5384         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5385
5386         /* OA */
5387         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5388         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5389                                 WRITE_DATA_DST_SEL(0)));
5390         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5391         amdgpu_ring_write(ring, 0);
5392         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5393 }
5394
5395 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5396         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5397         .select_se_sh = &gfx_v8_0_select_se_sh,
5398 };
5399
5400 static int gfx_v8_0_early_init(void *handle)
5401 {
5402         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5403
5404         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5405         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5406         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5407         gfx_v8_0_set_ring_funcs(adev);
5408         gfx_v8_0_set_irq_funcs(adev);
5409         gfx_v8_0_set_gds_init(adev);
5410         gfx_v8_0_set_rlc_funcs(adev);
5411
5412         return 0;
5413 }
5414
5415 static int gfx_v8_0_late_init(void *handle)
5416 {
5417         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5418         int r;
5419
5420         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5421         if (r)
5422                 return r;
5423
5424         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5425         if (r)
5426                 return r;
5427
5428         /* requires IBs so do in late init after IB pool is initialized */
5429         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5430         if (r)
5431                 return r;
5432
5433         amdgpu_set_powergating_state(adev,
5434                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5435
5436         return 0;
5437 }
5438
5439 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5440                                                        bool enable)
5441 {
5442         if (adev->asic_type == CHIP_POLARIS11)
5443                 /* Send msg to SMU via Powerplay */
5444                 amdgpu_set_powergating_state(adev,
5445                                              AMD_IP_BLOCK_TYPE_SMC,
5446                                              enable ?
5447                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5448
5449         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5450 }
5451
5452 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5453                                                         bool enable)
5454 {
5455         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5456 }
5457
5458 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5459                 bool enable)
5460 {
5461         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5462 }
5463
5464 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5465                                           bool enable)
5466 {
5467         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5468 }
5469
5470 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5471                                                 bool enable)
5472 {
5473         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5474
5475         /* Read any GFX register to wake up GFX. */
5476         if (!enable)
5477                 RREG32(mmDB_RENDER_CONTROL);
5478 }
5479
5480 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5481                                           bool enable)
5482 {
5483         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5484                 cz_enable_gfx_cg_power_gating(adev, true);
5485                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5486                         cz_enable_gfx_pipeline_power_gating(adev, true);
5487         } else {
5488                 cz_enable_gfx_cg_power_gating(adev, false);
5489                 cz_enable_gfx_pipeline_power_gating(adev, false);
5490         }
5491 }
5492
5493 static int gfx_v8_0_set_powergating_state(void *handle,
5494                                           enum amd_powergating_state state)
5495 {
5496         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5497         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5498
5499         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5500                 return 0;
5501
5502         switch (adev->asic_type) {
5503         case CHIP_CARRIZO:
5504         case CHIP_STONEY:
5505                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5506                         cz_update_gfx_cg_power_gating(adev, enable);
5507
5508                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5509                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5510                 else
5511                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5512
5513                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5514                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5515                 else
5516                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5517                 break;
5518         case CHIP_POLARIS11:
5519                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5520                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5521                 else
5522                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5523
5524                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5525                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5526                 else
5527                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5528
5529                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5530                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5531                 else
5532                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5533                 break;
5534         default:
5535                 break;
5536         }
5537
5538         return 0;
5539 }
5540
5541 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5542                                      uint32_t reg_addr, uint32_t cmd)
5543 {
5544         uint32_t data;
5545
5546         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5547
5548         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5549         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5550
5551         data = RREG32(mmRLC_SERDES_WR_CTRL);
5552         if (adev->asic_type == CHIP_STONEY)
5553                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5554                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5555                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5556                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5557                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5558                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5559                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5560                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5561                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5562         else
5563                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5564                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5565                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5566                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5567                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5568                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5569                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5570                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5571                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5572                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5573                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5574         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5575                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5576                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5577                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5578
5579         WREG32(mmRLC_SERDES_WR_CTRL, data);
5580 }
5581
5582 #define MSG_ENTER_RLC_SAFE_MODE     1
5583 #define MSG_EXIT_RLC_SAFE_MODE      0
5584 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5585 #define RLC_GPR_REG2__REQ__SHIFT 0
5586 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5587 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5588
5589 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5590 {
5591         u32 data = 0;
5592         unsigned i;
5593
5594         data = RREG32(mmRLC_CNTL);
5595         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5596                 return;
5597
5598         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5599             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5600                                AMD_PG_SUPPORT_GFX_DMG))) {
5601                 data |= RLC_GPR_REG2__REQ_MASK;
5602                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5603                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5604                 WREG32(mmRLC_GPR_REG2, data);
5605
5606                 for (i = 0; i < adev->usec_timeout; i++) {
5607                         if ((RREG32(mmRLC_GPM_STAT) &
5608                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5609                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5610                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5611                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5612                                 break;
5613                         udelay(1);
5614                 }
5615
5616                 for (i = 0; i < adev->usec_timeout; i++) {
5617                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5618                                 break;
5619                         udelay(1);
5620                 }
5621                 adev->gfx.rlc.in_safe_mode = true;
5622         }
5623 }
5624
5625 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5626 {
5627         u32 data;
5628         unsigned i;
5629
5630         data = RREG32(mmRLC_CNTL);
5631         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5632                 return;
5633
5634         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5635             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5636                                AMD_PG_SUPPORT_GFX_DMG))) {
5637                 data |= RLC_GPR_REG2__REQ_MASK;
5638                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5639                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5640                 WREG32(mmRLC_GPR_REG2, data);
5641                 adev->gfx.rlc.in_safe_mode = false;
5642         }
5643
5644         for (i = 0; i < adev->usec_timeout; i++) {
5645                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5646                         break;
5647                 udelay(1);
5648         }
5649 }
5650
5651 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5652 {
5653         u32 data;
5654         unsigned i;
5655
5656         data = RREG32(mmRLC_CNTL);
5657         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5658                 return;
5659
5660         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5661                 data |= RLC_SAFE_MODE__CMD_MASK;
5662                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5663                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5664                 WREG32(mmRLC_SAFE_MODE, data);
5665
5666                 for (i = 0; i < adev->usec_timeout; i++) {
5667                         if ((RREG32(mmRLC_GPM_STAT) &
5668                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5669                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5670                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5671                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5672                                 break;
5673                         udelay(1);
5674                 }
5675
5676                 for (i = 0; i < adev->usec_timeout; i++) {
5677                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5678                                 break;
5679                         udelay(1);
5680                 }
5681                 adev->gfx.rlc.in_safe_mode = true;
5682         }
5683 }
5684
5685 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5686 {
5687         u32 data = 0;
5688         unsigned i;
5689
5690         data = RREG32(mmRLC_CNTL);
5691         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5692                 return;
5693
5694         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5695                 if (adev->gfx.rlc.in_safe_mode) {
5696                         data |= RLC_SAFE_MODE__CMD_MASK;
5697                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5698                         WREG32(mmRLC_SAFE_MODE, data);
5699                         adev->gfx.rlc.in_safe_mode = false;
5700                 }
5701         }
5702
5703         for (i = 0; i < adev->usec_timeout; i++) {
5704                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5705                         break;
5706                 udelay(1);
5707         }
5708 }
5709
5710 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5711 {
5712         adev->gfx.rlc.in_safe_mode = true;
5713 }
5714
5715 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5716 {
5717         adev->gfx.rlc.in_safe_mode = false;
5718 }
5719
5720 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5721         .enter_safe_mode = cz_enter_rlc_safe_mode,
5722         .exit_safe_mode = cz_exit_rlc_safe_mode
5723 };
5724
5725 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5726         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5727         .exit_safe_mode = iceland_exit_rlc_safe_mode
5728 };
5729
5730 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5731         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5732         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5733 };
5734
5735 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5736                                                       bool enable)
5737 {
5738         uint32_t temp, data;
5739
5740         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5741
5742         /* It is disabled by HW by default */
5743         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5744                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5745                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5746                                 /* 1 - RLC memory Light sleep */
5747                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5748
5749                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5750                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5751                 }
5752
5753                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5754                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5755                 if (adev->flags & AMD_IS_APU)
5756                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5757                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5758                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5759                 else
5760                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5761                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5762                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5763                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5764
5765                 if (temp != data)
5766                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5767
5768                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5769                 gfx_v8_0_wait_for_rlc_serdes(adev);
5770
5771                 /* 5 - clear mgcg override */
5772                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5773
5774                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5775                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5776                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5777                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5778                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5779                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5780                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5781                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5782                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5783                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5784                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5785                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5786                         if (temp != data)
5787                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5788                 }
5789                 udelay(50);
5790
5791                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792                 gfx_v8_0_wait_for_rlc_serdes(adev);
5793         } else {
5794                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5795                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5796                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5797                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5798                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5799                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5800                 if (temp != data)
5801                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5802
5803                 /* 2 - disable MGLS in RLC */
5804                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5805                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5806                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5807                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5808                 }
5809
5810                 /* 3 - disable MGLS in CP */
5811                 data = RREG32(mmCP_MEM_SLP_CNTL);
5812                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5813                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5814                         WREG32(mmCP_MEM_SLP_CNTL, data);
5815                 }
5816
5817                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5818                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5819                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5820                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5821                 if (temp != data)
5822                         WREG32(mmCGTS_SM_CTRL_REG, data);
5823
5824                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825                 gfx_v8_0_wait_for_rlc_serdes(adev);
5826
5827                 /* 6 - set mgcg override */
5828                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5829
5830                 udelay(50);
5831
5832                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5833                 gfx_v8_0_wait_for_rlc_serdes(adev);
5834         }
5835
5836         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5837 }
5838
5839 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5840                                                       bool enable)
5841 {
5842         uint32_t temp, temp1, data, data1;
5843
5844         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5845
5846         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5847
5848         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5849                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5850                  * Cmp_busy/GFX_Idle interrupts
5851                  */
5852                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5853
5854                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5855                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5856                 if (temp1 != data1)
5857                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5858
5859                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5860                 gfx_v8_0_wait_for_rlc_serdes(adev);
5861
5862                 /* 3 - clear cgcg override */
5863                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5864
5865                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866                 gfx_v8_0_wait_for_rlc_serdes(adev);
5867
5868                 /* 4 - write cmd to set CGLS */
5869                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5870
5871                 /* 5 - enable cgcg */
5872                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5873
5874                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5875                         /* enable cgls*/
5876                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5877
5878                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5879                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5880
5881                         if (temp1 != data1)
5882                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5883                 } else {
5884                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5885                 }
5886
5887                 if (temp != data)
5888                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5889         } else {
5890                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5891                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5892
5893                 /* TEST CGCG */
5894                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5895                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5896                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5897                 if (temp1 != data1)
5898                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5899
5900                 /* read gfx register to wake up cgcg */
5901                 RREG32(mmCB_CGTT_SCLK_CTRL);
5902                 RREG32(mmCB_CGTT_SCLK_CTRL);
5903                 RREG32(mmCB_CGTT_SCLK_CTRL);
5904                 RREG32(mmCB_CGTT_SCLK_CTRL);
5905
5906                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5907                 gfx_v8_0_wait_for_rlc_serdes(adev);
5908
5909                 /* write cmd to Set CGCG Overrride */
5910                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5911
5912                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5913                 gfx_v8_0_wait_for_rlc_serdes(adev);
5914
5915                 /* write cmd to Clear CGLS */
5916                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5917
5918                 /* disable cgcg, cgls should be disabled too. */
5919                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5920                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5921                 if (temp != data)
5922                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5923         }
5924
5925         gfx_v8_0_wait_for_rlc_serdes(adev);
5926
5927         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5928 }
5929 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5930                                             bool enable)
5931 {
5932         if (enable) {
5933                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5934                  * ===  MGCG + MGLS + TS(CG/LS) ===
5935                  */
5936                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5937                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5938         } else {
5939                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5940                  * ===  CGCG + CGLS ===
5941                  */
5942                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5943                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5944         }
5945         return 0;
5946 }
5947
5948 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5949                                           enum amd_clockgating_state state)
5950 {
5951         uint32_t msg_id, pp_state;
5952         void *pp_handle = adev->powerplay.pp_handle;
5953
5954         if (state == AMD_CG_STATE_UNGATE)
5955                 pp_state = 0;
5956         else
5957                 pp_state = PP_STATE_CG | PP_STATE_LS;
5958
5959         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5960                         PP_BLOCK_GFX_CG,
5961                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5962                         pp_state);
5963         amd_set_clockgating_by_smu(pp_handle, msg_id);
5964
5965         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5966                         PP_BLOCK_GFX_MG,
5967                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5968                         pp_state);
5969         amd_set_clockgating_by_smu(pp_handle, msg_id);
5970
5971         return 0;
5972 }
5973
5974 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5975                                           enum amd_clockgating_state state)
5976 {
5977         uint32_t msg_id, pp_state;
5978         void *pp_handle = adev->powerplay.pp_handle;
5979
5980         if (state == AMD_CG_STATE_UNGATE)
5981                 pp_state = 0;
5982         else
5983                 pp_state = PP_STATE_CG | PP_STATE_LS;
5984
5985         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986                         PP_BLOCK_GFX_CG,
5987                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5988                         pp_state);
5989         amd_set_clockgating_by_smu(pp_handle, msg_id);
5990
5991         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5992                         PP_BLOCK_GFX_3D,
5993                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5994                         pp_state);
5995         amd_set_clockgating_by_smu(pp_handle, msg_id);
5996
5997         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5998                         PP_BLOCK_GFX_MG,
5999                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6000                         pp_state);
6001         amd_set_clockgating_by_smu(pp_handle, msg_id);
6002
6003         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004                         PP_BLOCK_GFX_RLC,
6005                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6006                         pp_state);
6007         amd_set_clockgating_by_smu(pp_handle, msg_id);
6008
6009         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6010                         PP_BLOCK_GFX_CP,
6011                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6012                         pp_state);
6013         amd_set_clockgating_by_smu(pp_handle, msg_id);
6014
6015         return 0;
6016 }
6017
6018 static int gfx_v8_0_set_clockgating_state(void *handle,
6019                                           enum amd_clockgating_state state)
6020 {
6021         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6022
6023         switch (adev->asic_type) {
6024         case CHIP_FIJI:
6025         case CHIP_CARRIZO:
6026         case CHIP_STONEY:
6027                 gfx_v8_0_update_gfx_clock_gating(adev,
6028                                                  state == AMD_CG_STATE_GATE ? true : false);
6029                 break;
6030         case CHIP_TONGA:
6031                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6032                 break;
6033         case CHIP_POLARIS10:
6034         case CHIP_POLARIS11:
6035                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6036                 break;
6037         default:
6038                 break;
6039         }
6040         return 0;
6041 }
6042
6043 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6044 {
6045         return ring->adev->wb.wb[ring->rptr_offs];
6046 }
6047
6048 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6049 {
6050         struct amdgpu_device *adev = ring->adev;
6051
6052         if (ring->use_doorbell)
6053                 /* XXX check if swapping is necessary on BE */
6054                 return ring->adev->wb.wb[ring->wptr_offs];
6055         else
6056                 return RREG32(mmCP_RB0_WPTR);
6057 }
6058
6059 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6060 {
6061         struct amdgpu_device *adev = ring->adev;
6062
6063         if (ring->use_doorbell) {
6064                 /* XXX check if swapping is necessary on BE */
6065                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6066                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6067         } else {
6068                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6069                 (void)RREG32(mmCP_RB0_WPTR);
6070         }
6071 }
6072
6073 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6074 {
6075         u32 ref_and_mask, reg_mem_engine;
6076
6077         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6078                 switch (ring->me) {
6079                 case 1:
6080                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6081                         break;
6082                 case 2:
6083                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6084                         break;
6085                 default:
6086                         return;
6087                 }
6088                 reg_mem_engine = 0;
6089         } else {
6090                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6091                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6092         }
6093
6094         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6095         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6096                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6097                                  reg_mem_engine));
6098         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6099         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6100         amdgpu_ring_write(ring, ref_and_mask);
6101         amdgpu_ring_write(ring, ref_and_mask);
6102         amdgpu_ring_write(ring, 0x20); /* poll interval */
6103 }
6104
6105 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6106 {
6107         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6108         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6109                                  WRITE_DATA_DST_SEL(0) |
6110                                  WR_CONFIRM));
6111         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6112         amdgpu_ring_write(ring, 0);
6113         amdgpu_ring_write(ring, 1);
6114
6115 }
6116
6117 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6118                                       struct amdgpu_ib *ib,
6119                                       unsigned vm_id, bool ctx_switch)
6120 {
6121         u32 header, control = 0;
6122
6123         if (ib->flags & AMDGPU_IB_FLAG_CE)
6124                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6125         else
6126                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6127
6128         control |= ib->length_dw | (vm_id << 24);
6129
6130         amdgpu_ring_write(ring, header);
6131         amdgpu_ring_write(ring,
6132 #ifdef __BIG_ENDIAN
6133                           (2 << 0) |
6134 #endif
6135                           (ib->gpu_addr & 0xFFFFFFFC));
6136         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6137         amdgpu_ring_write(ring, control);
6138 }
6139
6140 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6141                                           struct amdgpu_ib *ib,
6142                                           unsigned vm_id, bool ctx_switch)
6143 {
6144         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6145
6146         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6147         amdgpu_ring_write(ring,
6148 #ifdef __BIG_ENDIAN
6149                                 (2 << 0) |
6150 #endif
6151                                 (ib->gpu_addr & 0xFFFFFFFC));
6152         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6153         amdgpu_ring_write(ring, control);
6154 }
6155
6156 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6157                                          u64 seq, unsigned flags)
6158 {
6159         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6160         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6161
6162         /* EVENT_WRITE_EOP - flush caches, send int */
6163         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6164         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6165                                  EOP_TC_ACTION_EN |
6166                                  EOP_TC_WB_ACTION_EN |
6167                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6168                                  EVENT_INDEX(5)));
6169         amdgpu_ring_write(ring, addr & 0xfffffffc);
6170         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6171                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6172         amdgpu_ring_write(ring, lower_32_bits(seq));
6173         amdgpu_ring_write(ring, upper_32_bits(seq));
6174
6175 }
6176
6177 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6178 {
6179         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6180         uint32_t seq = ring->fence_drv.sync_seq;
6181         uint64_t addr = ring->fence_drv.gpu_addr;
6182
6183         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6184         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6185                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6186                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6187         amdgpu_ring_write(ring, addr & 0xfffffffc);
6188         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6189         amdgpu_ring_write(ring, seq);
6190         amdgpu_ring_write(ring, 0xffffffff);
6191         amdgpu_ring_write(ring, 4); /* poll interval */
6192 }
6193
6194 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6195                                         unsigned vm_id, uint64_t pd_addr)
6196 {
6197         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6198
6199         /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6200         if (usepfp)
6201                 amdgpu_ring_insert_nop(ring, 128);
6202
6203         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6204         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6205                                  WRITE_DATA_DST_SEL(0)) |
6206                                  WR_CONFIRM);
6207         if (vm_id < 8) {
6208                 amdgpu_ring_write(ring,
6209                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6210         } else {
6211                 amdgpu_ring_write(ring,
6212                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6213         }
6214         amdgpu_ring_write(ring, 0);
6215         amdgpu_ring_write(ring, pd_addr >> 12);
6216
6217         /* bits 0-15 are the VM contexts0-15 */
6218         /* invalidate the cache */
6219         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6220         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6221                                  WRITE_DATA_DST_SEL(0)));
6222         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6223         amdgpu_ring_write(ring, 0);
6224         amdgpu_ring_write(ring, 1 << vm_id);
6225
6226         /* wait for the invalidate to complete */
6227         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6229                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6230                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6231         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6232         amdgpu_ring_write(ring, 0);
6233         amdgpu_ring_write(ring, 0); /* ref */
6234         amdgpu_ring_write(ring, 0); /* mask */
6235         amdgpu_ring_write(ring, 0x20); /* poll interval */
6236
6237         /* compute doesn't have PFP */
6238         if (usepfp) {
6239                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6240                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6241                 amdgpu_ring_write(ring, 0x0);
6242                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6243                 amdgpu_ring_insert_nop(ring, 128);
6244         }
6245 }
6246
6247 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6248 {
6249         return ring->adev->wb.wb[ring->wptr_offs];
6250 }
6251
6252 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6253 {
6254         struct amdgpu_device *adev = ring->adev;
6255
6256         /* XXX check if swapping is necessary on BE */
6257         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6258         WDOORBELL32(ring->doorbell_index, ring->wptr);
6259 }
6260
6261 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6262                                              u64 addr, u64 seq,
6263                                              unsigned flags)
6264 {
6265         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6266         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6267
6268         /* RELEASE_MEM - flush caches, send int */
6269         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6270         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6271                                  EOP_TC_ACTION_EN |
6272                                  EOP_TC_WB_ACTION_EN |
6273                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6274                                  EVENT_INDEX(5)));
6275         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6276         amdgpu_ring_write(ring, addr & 0xfffffffc);
6277         amdgpu_ring_write(ring, upper_32_bits(addr));
6278         amdgpu_ring_write(ring, lower_32_bits(seq));
6279         amdgpu_ring_write(ring, upper_32_bits(seq));
6280 }
6281
6282 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6283 {
6284         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6285         amdgpu_ring_write(ring, 0);
6286 }
6287
6288 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6289 {
6290         uint32_t dw2 = 0;
6291
6292         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6293         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6294                 /* set load_global_config & load_global_uconfig */
6295                 dw2 |= 0x8001;
6296                 /* set load_cs_sh_regs */
6297                 dw2 |= 0x01000000;
6298                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6299                 dw2 |= 0x10002;
6300
6301                 /* set load_ce_ram if preamble presented */
6302                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6303                         dw2 |= 0x10000000;
6304         } else {
6305                 /* still load_ce_ram if this is the first time preamble presented
6306                  * although there is no context switch happens.
6307                  */
6308                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6309                         dw2 |= 0x10000000;
6310         }
6311
6312         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6313         amdgpu_ring_write(ring, dw2);
6314         amdgpu_ring_write(ring, 0);
6315 }
6316
6317 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6318 {
6319         return
6320                 4; /* gfx_v8_0_ring_emit_ib_gfx */
6321 }
6322
6323 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6324 {
6325         return
6326                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6327                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6328                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6329                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6330                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6331                 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6332                 2 + /* gfx_v8_ring_emit_sb */
6333                 3; /* gfx_v8_ring_emit_cntxcntl */
6334 }
6335
6336 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6337 {
6338         return
6339                 4; /* gfx_v8_0_ring_emit_ib_compute */
6340 }
6341
6342 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6343 {
6344         return
6345                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6346                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6347                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6348                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6349                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6350                 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6351 }
6352
6353 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6354                                                  enum amdgpu_interrupt_state state)
6355 {
6356         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6357                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6358 }
6359
6360 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6361                                                      int me, int pipe,
6362                                                      enum amdgpu_interrupt_state state)
6363 {
6364         /*
6365          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6366          * handles the setting of interrupts for this specific pipe. All other
6367          * pipes' interrupts are set by amdkfd.
6368          */
6369
6370         if (me == 1) {
6371                 switch (pipe) {
6372                 case 0:
6373                         break;
6374                 default:
6375                         DRM_DEBUG("invalid pipe %d\n", pipe);
6376                         return;
6377                 }
6378         } else {
6379                 DRM_DEBUG("invalid me %d\n", me);
6380                 return;
6381         }
6382
6383         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6384                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6385 }
6386
6387 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6388                                              struct amdgpu_irq_src *source,
6389                                              unsigned type,
6390                                              enum amdgpu_interrupt_state state)
6391 {
6392         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6393                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6394
6395         return 0;
6396 }
6397
6398 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6399                                               struct amdgpu_irq_src *source,
6400                                               unsigned type,
6401                                               enum amdgpu_interrupt_state state)
6402 {
6403         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6404                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6405
6406         return 0;
6407 }
6408
6409 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6410                                             struct amdgpu_irq_src *src,
6411                                             unsigned type,
6412                                             enum amdgpu_interrupt_state state)
6413 {
6414         switch (type) {
6415         case AMDGPU_CP_IRQ_GFX_EOP:
6416                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6417                 break;
6418         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6419                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6420                 break;
6421         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6422                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6423                 break;
6424         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6425                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6426                 break;
6427         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6428                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6429                 break;
6430         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6431                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6432                 break;
6433         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6434                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6435                 break;
6436         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6437                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6438                 break;
6439         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6440                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6441                 break;
6442         default:
6443                 break;
6444         }
6445         return 0;
6446 }
6447
6448 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6449                             struct amdgpu_irq_src *source,
6450                             struct amdgpu_iv_entry *entry)
6451 {
6452         int i;
6453         u8 me_id, pipe_id, queue_id;
6454         struct amdgpu_ring *ring;
6455
6456         DRM_DEBUG("IH: CP EOP\n");
6457         me_id = (entry->ring_id & 0x0c) >> 2;
6458         pipe_id = (entry->ring_id & 0x03) >> 0;
6459         queue_id = (entry->ring_id & 0x70) >> 4;
6460
6461         switch (me_id) {
6462         case 0:
6463                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6464                 break;
6465         case 1:
6466         case 2:
6467                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6468                         ring = &adev->gfx.compute_ring[i];
6469                         /* Per-queue interrupt is supported for MEC starting from VI.
6470                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6471                           */
6472                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6473                                 amdgpu_fence_process(ring);
6474                 }
6475                 break;
6476         }
6477         return 0;
6478 }
6479
6480 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6481                                  struct amdgpu_irq_src *source,
6482                                  struct amdgpu_iv_entry *entry)
6483 {
6484         DRM_ERROR("Illegal register access in command stream\n");
6485         schedule_work(&adev->reset_work);
6486         return 0;
6487 }
6488
6489 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6490                                   struct amdgpu_irq_src *source,
6491                                   struct amdgpu_iv_entry *entry)
6492 {
6493         DRM_ERROR("Illegal instruction in command stream\n");
6494         schedule_work(&adev->reset_work);
6495         return 0;
6496 }
6497
6498 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6499         .name = "gfx_v8_0",
6500         .early_init = gfx_v8_0_early_init,
6501         .late_init = gfx_v8_0_late_init,
6502         .sw_init = gfx_v8_0_sw_init,
6503         .sw_fini = gfx_v8_0_sw_fini,
6504         .hw_init = gfx_v8_0_hw_init,
6505         .hw_fini = gfx_v8_0_hw_fini,
6506         .suspend = gfx_v8_0_suspend,
6507         .resume = gfx_v8_0_resume,
6508         .is_idle = gfx_v8_0_is_idle,
6509         .wait_for_idle = gfx_v8_0_wait_for_idle,
6510         .check_soft_reset = gfx_v8_0_check_soft_reset,
6511         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6512         .soft_reset = gfx_v8_0_soft_reset,
6513         .post_soft_reset = gfx_v8_0_post_soft_reset,
6514         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6515         .set_powergating_state = gfx_v8_0_set_powergating_state,
6516 };
6517
6518 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6519         .get_rptr = gfx_v8_0_ring_get_rptr,
6520         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6521         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6522         .parse_cs = NULL,
6523         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6524         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6525         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6526         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6527         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6528         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6529         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6530         .test_ring = gfx_v8_0_ring_test_ring,
6531         .test_ib = gfx_v8_0_ring_test_ib,
6532         .insert_nop = amdgpu_ring_insert_nop,
6533         .pad_ib = amdgpu_ring_generic_pad_ib,
6534         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6535         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6536         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6537         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6538 };
6539
6540 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6541         .get_rptr = gfx_v8_0_ring_get_rptr,
6542         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6543         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6544         .parse_cs = NULL,
6545         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6546         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6547         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6548         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6549         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6550         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6551         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6552         .test_ring = gfx_v8_0_ring_test_ring,
6553         .test_ib = gfx_v8_0_ring_test_ib,
6554         .insert_nop = amdgpu_ring_insert_nop,
6555         .pad_ib = amdgpu_ring_generic_pad_ib,
6556         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6557         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6558 };
6559
6560 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6561 {
6562         int i;
6563
6564         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6565                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6566
6567         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6568                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6569 }
6570
6571 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6572         .set = gfx_v8_0_set_eop_interrupt_state,
6573         .process = gfx_v8_0_eop_irq,
6574 };
6575
6576 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6577         .set = gfx_v8_0_set_priv_reg_fault_state,
6578         .process = gfx_v8_0_priv_reg_irq,
6579 };
6580
6581 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6582         .set = gfx_v8_0_set_priv_inst_fault_state,
6583         .process = gfx_v8_0_priv_inst_irq,
6584 };
6585
6586 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6587 {
6588         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6589         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6590
6591         adev->gfx.priv_reg_irq.num_types = 1;
6592         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6593
6594         adev->gfx.priv_inst_irq.num_types = 1;
6595         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6596 }
6597
6598 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6599 {
6600         switch (adev->asic_type) {
6601         case CHIP_TOPAZ:
6602                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6603                 break;
6604         case CHIP_STONEY:
6605         case CHIP_CARRIZO:
6606                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6607                 break;
6608         default:
6609                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6610                 break;
6611         }
6612 }
6613
6614 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6615 {
6616         /* init asci gds info */
6617         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6618         adev->gds.gws.total_size = 64;
6619         adev->gds.oa.total_size = 16;
6620
6621         if (adev->gds.mem.total_size == 64 * 1024) {
6622                 adev->gds.mem.gfx_partition_size = 4096;
6623                 adev->gds.mem.cs_partition_size = 4096;
6624
6625                 adev->gds.gws.gfx_partition_size = 4;
6626                 adev->gds.gws.cs_partition_size = 4;
6627
6628                 adev->gds.oa.gfx_partition_size = 4;
6629                 adev->gds.oa.cs_partition_size = 1;
6630         } else {
6631                 adev->gds.mem.gfx_partition_size = 1024;
6632                 adev->gds.mem.cs_partition_size = 1024;
6633
6634                 adev->gds.gws.gfx_partition_size = 16;
6635                 adev->gds.gws.cs_partition_size = 16;
6636
6637                 adev->gds.oa.gfx_partition_size = 4;
6638                 adev->gds.oa.cs_partition_size = 4;
6639         }
6640 }
6641
6642 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6643                                                  u32 bitmap)
6644 {
6645         u32 data;
6646
6647         if (!bitmap)
6648                 return;
6649
6650         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6651         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6652
6653         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6654 }
6655
6656 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6657 {
6658         u32 data, mask;
6659
6660         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6661                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6662
6663         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6664
6665         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6666 }
6667
6668 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6669 {
6670         int i, j, k, counter, active_cu_number = 0;
6671         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6672         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6673         unsigned disable_masks[4 * 2];
6674
6675         memset(cu_info, 0, sizeof(*cu_info));
6676
6677         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6678
6679         mutex_lock(&adev->grbm_idx_mutex);
6680         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6681                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6682                         mask = 1;
6683                         ao_bitmap = 0;
6684                         counter = 0;
6685                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6686                         if (i < 4 && j < 2)
6687                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6688                                         adev, disable_masks[i * 2 + j]);
6689                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6690                         cu_info->bitmap[i][j] = bitmap;
6691
6692                         for (k = 0; k < 16; k ++) {
6693                                 if (bitmap & mask) {
6694                                         if (counter < 2)
6695                                                 ao_bitmap |= mask;
6696                                         counter ++;
6697                                 }
6698                                 mask <<= 1;
6699                         }
6700                         active_cu_number += counter;
6701                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6702                 }
6703         }
6704         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6705         mutex_unlock(&adev->grbm_idx_mutex);
6706
6707         cu_info->number = active_cu_number;
6708         cu_info->ao_cu_mask = ao_cu_mask;
6709 }