GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 2048
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 /*(DEBLOBBED)*/
98
99 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
100 {
101         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
102         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
103         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
104         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
105         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
106         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
107         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
108         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
109         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
110         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
111         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
112         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
113         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
114         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
115         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
116         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
117 };
118
119 static const u32 golden_settings_tonga_a11[] =
120 {
121         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
122         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
123         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
124         mmGB_GPU_ID, 0x0000000f, 0x00000000,
125         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
126         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
127         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
128         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
129         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
130         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
131         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
132         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
133         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
134         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
135         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
136         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
137 };
138
139 static const u32 tonga_golden_common_all[] =
140 {
141         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
142         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
143         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
144         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
145         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
146         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
147         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
148         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
149 };
150
151 static const u32 tonga_mgcg_cgcg_init[] =
152 {
153         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
154         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
155         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
156         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
157         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
158         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
160         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
162         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
163         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
164         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
165         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
167         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
168         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
169         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
170         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
171         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
172         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
173         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
174         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
175         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
176         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
177         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
178         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
179         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
180         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
181         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
182         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
183         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
184         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
185         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
186         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
187         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
188         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
189         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
190         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
191         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
192         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
193         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
194         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
195         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
196         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
197         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
198         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
199         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
200         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
201         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
202         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
203         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
204         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
212         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
217         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
222         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
225         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
226         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
227         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
228 };
229
230 static const u32 golden_settings_vegam_a11[] =
231 {
232         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
233         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
234         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
235         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
236         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
237         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
238         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
239         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
240         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
241         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
242         mmSQ_CONFIG, 0x07f80000, 0x01180000,
243         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
244         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
245         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
246         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
247         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
248         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
249 };
250
251 static const u32 vegam_golden_common_all[] =
252 {
253         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
254         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
255         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
256         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
257         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
258         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
259 };
260
261 static const u32 golden_settings_polaris11_a11[] =
262 {
263         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
264         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
265         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
266         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
267         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
268         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
269         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
270         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
271         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
272         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
273         mmSQ_CONFIG, 0x07f80000, 0x01180000,
274         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
275         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
276         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
277         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
278         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
279         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
280 };
281
282 static const u32 polaris11_golden_common_all[] =
283 {
284         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
285         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
286         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
287         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
288         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
289         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
290 };
291
292 static const u32 golden_settings_polaris10_a11[] =
293 {
294         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
295         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
296         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
297         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
298         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
299         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
300         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
301         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
302         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
303         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
304         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
305         mmSQ_CONFIG, 0x07f80000, 0x07180000,
306         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
307         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
308         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
309         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
310         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
311 };
312
313 static const u32 polaris10_golden_common_all[] =
314 {
315         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
316         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
317         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
318         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
319         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
320         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
322         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
323 };
324
325 static const u32 fiji_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
329         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
330         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
334         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
337 };
338
339 static const u32 golden_settings_fiji_a10[] =
340 {
341         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
342         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
343         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
344         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
347         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
348         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
349         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
351         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353
354 static const u32 fiji_mgcg_cgcg_init[] =
355 {
356         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
357         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
358         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
360         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
363         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
365         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
367         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
374         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
375         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
378         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
381         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
382         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
383         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
385         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
386         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
387         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
388         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
389         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
390         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
391 };
392
393 static const u32 golden_settings_iceland_a11[] =
394 {
395         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
396         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
397         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
398         mmGB_GPU_ID, 0x0000000f, 0x00000000,
399         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
400         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
401         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
402         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
403         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
404         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
405         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
406         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
407         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
408         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
409         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
410         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
411 };
412
413 static const u32 iceland_golden_common_all[] =
414 {
415         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
416         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
417         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
418         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
419         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
420         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
421         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
422         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
423 };
424
425 static const u32 iceland_mgcg_cgcg_init[] =
426 {
427         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
428         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
429         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
430         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
432         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
433         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
434         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
436         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
438         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
449         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
450         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
452         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
453         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
454         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
455         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
458         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
459         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
460         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
461         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
462         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
463         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
464         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
465         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
466         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
467         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
468         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
471         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
481         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
489         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
490         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
491 };
492
493 static const u32 cz_golden_settings_a11[] =
494 {
495         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
496         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
497         mmGB_GPU_ID, 0x0000000f, 0x00000000,
498         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
499         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
500         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
501         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
502         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
503         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
504         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
507 };
508
509 static const u32 cz_golden_common_all[] =
510 {
511         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
518         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
519 };
520
521 static const u32 cz_mgcg_cgcg_init[] =
522 {
523         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
598 };
599
600 static const u32 stoney_golden_settings_a11[] =
601 {
602         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603         mmGB_GPU_ID, 0x0000000f, 0x00000000,
604         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
612 };
613
614 static const u32 stoney_golden_common_all[] =
615 {
616         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
623         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
624 };
625
626 static const u32 stoney_mgcg_cgcg_init[] =
627 {
628         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
633 };
634
635
636 static const char * const sq_edc_source_names[] = {
637         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
638         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
639         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
640         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
641         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
642         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
643         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
653 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
654
655 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
656 {
657         switch (adev->asic_type) {
658         case CHIP_TOPAZ:
659                 amdgpu_device_program_register_sequence(adev,
660                                                         iceland_mgcg_cgcg_init,
661                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
662                 amdgpu_device_program_register_sequence(adev,
663                                                         golden_settings_iceland_a11,
664                                                         ARRAY_SIZE(golden_settings_iceland_a11));
665                 amdgpu_device_program_register_sequence(adev,
666                                                         iceland_golden_common_all,
667                                                         ARRAY_SIZE(iceland_golden_common_all));
668                 break;
669         case CHIP_FIJI:
670                 amdgpu_device_program_register_sequence(adev,
671                                                         fiji_mgcg_cgcg_init,
672                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
673                 amdgpu_device_program_register_sequence(adev,
674                                                         golden_settings_fiji_a10,
675                                                         ARRAY_SIZE(golden_settings_fiji_a10));
676                 amdgpu_device_program_register_sequence(adev,
677                                                         fiji_golden_common_all,
678                                                         ARRAY_SIZE(fiji_golden_common_all));
679                 break;
680
681         case CHIP_TONGA:
682                 amdgpu_device_program_register_sequence(adev,
683                                                         tonga_mgcg_cgcg_init,
684                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
685                 amdgpu_device_program_register_sequence(adev,
686                                                         golden_settings_tonga_a11,
687                                                         ARRAY_SIZE(golden_settings_tonga_a11));
688                 amdgpu_device_program_register_sequence(adev,
689                                                         tonga_golden_common_all,
690                                                         ARRAY_SIZE(tonga_golden_common_all));
691                 break;
692         case CHIP_VEGAM:
693                 amdgpu_device_program_register_sequence(adev,
694                                                         golden_settings_vegam_a11,
695                                                         ARRAY_SIZE(golden_settings_vegam_a11));
696                 amdgpu_device_program_register_sequence(adev,
697                                                         vegam_golden_common_all,
698                                                         ARRAY_SIZE(vegam_golden_common_all));
699                 break;
700         case CHIP_POLARIS11:
701         case CHIP_POLARIS12:
702                 amdgpu_device_program_register_sequence(adev,
703                                                         golden_settings_polaris11_a11,
704                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
705                 amdgpu_device_program_register_sequence(adev,
706                                                         polaris11_golden_common_all,
707                                                         ARRAY_SIZE(polaris11_golden_common_all));
708                 break;
709         case CHIP_POLARIS10:
710                 amdgpu_device_program_register_sequence(adev,
711                                                         golden_settings_polaris10_a11,
712                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
713                 amdgpu_device_program_register_sequence(adev,
714                                                         polaris10_golden_common_all,
715                                                         ARRAY_SIZE(polaris10_golden_common_all));
716                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717                 if (adev->pdev->revision == 0xc7 &&
718                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
723                 }
724                 break;
725         case CHIP_CARRIZO:
726                 amdgpu_device_program_register_sequence(adev,
727                                                         cz_mgcg_cgcg_init,
728                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
729                 amdgpu_device_program_register_sequence(adev,
730                                                         cz_golden_settings_a11,
731                                                         ARRAY_SIZE(cz_golden_settings_a11));
732                 amdgpu_device_program_register_sequence(adev,
733                                                         cz_golden_common_all,
734                                                         ARRAY_SIZE(cz_golden_common_all));
735                 break;
736         case CHIP_STONEY:
737                 amdgpu_device_program_register_sequence(adev,
738                                                         stoney_mgcg_cgcg_init,
739                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
740                 amdgpu_device_program_register_sequence(adev,
741                                                         stoney_golden_settings_a11,
742                                                         ARRAY_SIZE(stoney_golden_settings_a11));
743                 amdgpu_device_program_register_sequence(adev,
744                                                         stoney_golden_common_all,
745                                                         ARRAY_SIZE(stoney_golden_common_all));
746                 break;
747         default:
748                 break;
749         }
750 }
751
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 {
754         adev->gfx.scratch.num_reg = 8;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
757 }
758
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 {
761         struct amdgpu_device *adev = ring->adev;
762         uint32_t scratch;
763         uint32_t tmp = 0;
764         unsigned i;
765         int r;
766
767         r = amdgpu_gfx_scratch_get(adev, &scratch);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
770                 return r;
771         }
772         WREG32(scratch, 0xCAFEDEAD);
773         r = amdgpu_ring_alloc(ring, 3);
774         if (r) {
775                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776                           ring->idx, r);
777                 amdgpu_gfx_scratch_free(adev, scratch);
778                 return r;
779         }
780         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782         amdgpu_ring_write(ring, 0xDEADBEEF);
783         amdgpu_ring_commit(ring);
784
785         for (i = 0; i < adev->usec_timeout; i++) {
786                 tmp = RREG32(scratch);
787                 if (tmp == 0xDEADBEEF)
788                         break;
789                 DRM_UDELAY(1);
790         }
791         if (i < adev->usec_timeout) {
792                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
793                          ring->idx, i);
794         } else {
795                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796                           ring->idx, scratch, tmp);
797                 r = -EINVAL;
798         }
799         amdgpu_gfx_scratch_free(adev, scratch);
800         return r;
801 }
802
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 {
805         struct amdgpu_device *adev = ring->adev;
806         struct amdgpu_ib ib;
807         struct dma_fence *f = NULL;
808
809         unsigned int index;
810         uint64_t gpu_addr;
811         uint32_t tmp;
812         long r;
813
814         r = amdgpu_device_wb_get(adev, &index);
815         if (r) {
816                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
817                 return r;
818         }
819
820         gpu_addr = adev->wb.gpu_addr + (index * 4);
821         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
822         memset(&ib, 0, sizeof(ib));
823         r = amdgpu_ib_get(adev, NULL, 16, &ib);
824         if (r) {
825                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
826                 goto err1;
827         }
828         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
829         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
830         ib.ptr[2] = lower_32_bits(gpu_addr);
831         ib.ptr[3] = upper_32_bits(gpu_addr);
832         ib.ptr[4] = 0xDEADBEEF;
833         ib.length_dw = 5;
834
835         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
836         if (r)
837                 goto err2;
838
839         r = dma_fence_wait_timeout(f, false, timeout);
840         if (r == 0) {
841                 DRM_ERROR("amdgpu: IB test timed out.\n");
842                 r = -ETIMEDOUT;
843                 goto err2;
844         } else if (r < 0) {
845                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
846                 goto err2;
847         }
848
849         tmp = adev->wb.wb[index];
850         if (tmp == 0xDEADBEEF) {
851                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
852                 r = 0;
853         } else {
854                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
855                 r = -EINVAL;
856         }
857
858 err2:
859         amdgpu_ib_free(adev, &ib, NULL);
860         dma_fence_put(f);
861 err1:
862         amdgpu_device_wb_free(adev, index);
863         return r;
864 }
865
866
867 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
868 {
869         release_firmware(adev->gfx.pfp_fw);
870         adev->gfx.pfp_fw = NULL;
871         release_firmware(adev->gfx.me_fw);
872         adev->gfx.me_fw = NULL;
873         release_firmware(adev->gfx.ce_fw);
874         adev->gfx.ce_fw = NULL;
875         release_firmware(adev->gfx.rlc_fw);
876         adev->gfx.rlc_fw = NULL;
877         release_firmware(adev->gfx.mec_fw);
878         adev->gfx.mec_fw = NULL;
879         if ((adev->asic_type != CHIP_STONEY) &&
880             (adev->asic_type != CHIP_TOPAZ))
881                 release_firmware(adev->gfx.mec2_fw);
882         adev->gfx.mec2_fw = NULL;
883
884         kfree(adev->gfx.rlc.register_list_format);
885 }
886
887 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
888 {
889         const char *chip_name;
890         char fw_name[30];
891         int err;
892         struct amdgpu_firmware_info *info = NULL;
893         const struct common_firmware_header *header = NULL;
894         const struct gfx_firmware_header_v1_0 *cp_hdr;
895         const struct rlc_firmware_header_v2_0 *rlc_hdr;
896         unsigned int *tmp = NULL, i;
897
898         DRM_DEBUG("\n");
899
900         switch (adev->asic_type) {
901         case CHIP_TOPAZ:
902                 chip_name = "topaz";
903                 break;
904         case CHIP_TONGA:
905                 chip_name = "tonga";
906                 break;
907         case CHIP_CARRIZO:
908                 chip_name = "carrizo";
909                 break;
910         case CHIP_FIJI:
911                 chip_name = "fiji";
912                 break;
913         case CHIP_STONEY:
914                 chip_name = "stoney";
915                 break;
916         case CHIP_POLARIS10:
917                 chip_name = "polaris10";
918                 break;
919         case CHIP_POLARIS11:
920                 chip_name = "polaris11";
921                 break;
922         case CHIP_POLARIS12:
923                 chip_name = "polaris12";
924                 break;
925         case CHIP_VEGAM:
926                 chip_name = "vegam";
927                 break;
928         default:
929                 BUG();
930         }
931
932         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
933                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
934                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
935                 if (err == -ENOENT) {
936                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
937                         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
938                 }
939         } else {
940                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
941                 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
942         }
943         if (err)
944                 goto out;
945         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
946         if (err)
947                 goto out;
948         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
949         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
950         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951
952         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
953                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
954                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
955                 if (err == -ENOENT) {
956                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
957                         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
958                 }
959         } else {
960                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
961                 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
962         }
963         if (err)
964                 goto out;
965         err = amdgpu_ucode_validate(adev->gfx.me_fw);
966         if (err)
967                 goto out;
968         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
969         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
970
971         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
972
973         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
974                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
975                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
976                 if (err == -ENOENT) {
977                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
978                         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
979                 }
980         } else {
981                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
982                 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
983         }
984         if (err)
985                 goto out;
986         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
987         if (err)
988                 goto out;
989         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
990         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
991         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
992
993         /*
994          * Support for MCBP/Virtualization in combination with chained IBs is
995          * formal released on feature version #46
996          */
997         if (adev->gfx.ce_feature_version >= 46 &&
998             adev->gfx.pfp_feature_version >= 46) {
999                 adev->virt.chained_ib_support = true;
1000                 DRM_INFO("Chained IB support enabled!\n");
1001         } else
1002                 adev->virt.chained_ib_support = false;
1003
1004         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1005         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1006         if (err)
1007                 goto out;
1008         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1009         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1010         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1011         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1012
1013         adev->gfx.rlc.save_and_restore_offset =
1014                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1015         adev->gfx.rlc.clear_state_descriptor_offset =
1016                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1017         adev->gfx.rlc.avail_scratch_ram_locations =
1018                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1019         adev->gfx.rlc.reg_restore_list_size =
1020                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1021         adev->gfx.rlc.reg_list_format_start =
1022                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1023         adev->gfx.rlc.reg_list_format_separate_start =
1024                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1025         adev->gfx.rlc.starting_offsets_start =
1026                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1027         adev->gfx.rlc.reg_list_format_size_bytes =
1028                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1029         adev->gfx.rlc.reg_list_size_bytes =
1030                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1031
1032         adev->gfx.rlc.register_list_format =
1033                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1034                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1035
1036         if (!adev->gfx.rlc.register_list_format) {
1037                 err = -ENOMEM;
1038                 goto out;
1039         }
1040
1041         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1042                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1043         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1044                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1045
1046         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1047
1048         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1049                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1050         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1051                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1052
1053         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1054                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1055                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1056                 if (err == -ENOENT) {
1057                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1058                         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1059                 }
1060         } else {
1061                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1062                 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1063         }
1064         if (err)
1065                 goto out;
1066         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1067         if (err)
1068                 goto out;
1069         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1070         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1071         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1072
1073         if ((adev->asic_type != CHIP_STONEY) &&
1074             (adev->asic_type != CHIP_TOPAZ)) {
1075                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1076                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1077                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1078                         if (err == -ENOENT) {
1079                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1080                                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1081                         }
1082                 } else {
1083                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1084                         err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1085                 }
1086                 if (!err) {
1087                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1088                         if (err)
1089                                 goto out;
1090                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1091                                 adev->gfx.mec2_fw->data;
1092                         adev->gfx.mec2_fw_version =
1093                                 le32_to_cpu(cp_hdr->header.ucode_version);
1094                         adev->gfx.mec2_feature_version =
1095                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1096                 } else {
1097                         err = 0;
1098                         adev->gfx.mec2_fw = NULL;
1099                 }
1100         }
1101
1102         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1103                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1104                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1105                 info->fw = adev->gfx.pfp_fw;
1106                 header = (const struct common_firmware_header *)info->fw->data;
1107                 adev->firmware.fw_size +=
1108                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1109
1110                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1111                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1112                 info->fw = adev->gfx.me_fw;
1113                 header = (const struct common_firmware_header *)info->fw->data;
1114                 adev->firmware.fw_size +=
1115                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1116
1117                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1118                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1119                 info->fw = adev->gfx.ce_fw;
1120                 header = (const struct common_firmware_header *)info->fw->data;
1121                 adev->firmware.fw_size +=
1122                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1123
1124                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1125                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1126                 info->fw = adev->gfx.rlc_fw;
1127                 header = (const struct common_firmware_header *)info->fw->data;
1128                 adev->firmware.fw_size +=
1129                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1130
1131                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1132                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1133                 info->fw = adev->gfx.mec_fw;
1134                 header = (const struct common_firmware_header *)info->fw->data;
1135                 adev->firmware.fw_size +=
1136                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1137
1138                 /* we need account JT in */
1139                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1140                 adev->firmware.fw_size +=
1141                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1142
1143                 if (amdgpu_sriov_vf(adev)) {
1144                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1145                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1146                         info->fw = adev->gfx.mec_fw;
1147                         adev->firmware.fw_size +=
1148                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1149                 }
1150
1151                 if (adev->gfx.mec2_fw) {
1152                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1153                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1154                         info->fw = adev->gfx.mec2_fw;
1155                         header = (const struct common_firmware_header *)info->fw->data;
1156                         adev->firmware.fw_size +=
1157                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1158                 }
1159
1160         }
1161
1162 out:
1163         if (err) {
1164                 dev_err(adev->dev,
1165                         "gfx8: Failed to load firmware \"%s\"\n",
1166                         fw_name);
1167                 release_firmware(adev->gfx.pfp_fw);
1168                 adev->gfx.pfp_fw = NULL;
1169                 release_firmware(adev->gfx.me_fw);
1170                 adev->gfx.me_fw = NULL;
1171                 release_firmware(adev->gfx.ce_fw);
1172                 adev->gfx.ce_fw = NULL;
1173                 release_firmware(adev->gfx.rlc_fw);
1174                 adev->gfx.rlc_fw = NULL;
1175                 release_firmware(adev->gfx.mec_fw);
1176                 adev->gfx.mec_fw = NULL;
1177                 release_firmware(adev->gfx.mec2_fw);
1178                 adev->gfx.mec2_fw = NULL;
1179         }
1180         return err;
1181 }
1182
1183 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1184                                     volatile u32 *buffer)
1185 {
1186         u32 count = 0, i;
1187         const struct cs_section_def *sect = NULL;
1188         const struct cs_extent_def *ext = NULL;
1189
1190         if (adev->gfx.rlc.cs_data == NULL)
1191                 return;
1192         if (buffer == NULL)
1193                 return;
1194
1195         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1196         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1197
1198         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1199         buffer[count++] = cpu_to_le32(0x80000000);
1200         buffer[count++] = cpu_to_le32(0x80000000);
1201
1202         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1203                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1204                         if (sect->id == SECT_CONTEXT) {
1205                                 buffer[count++] =
1206                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1207                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1208                                                 PACKET3_SET_CONTEXT_REG_START);
1209                                 for (i = 0; i < ext->reg_count; i++)
1210                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1211                         } else {
1212                                 return;
1213                         }
1214                 }
1215         }
1216
1217         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1218         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1219                         PACKET3_SET_CONTEXT_REG_START);
1220         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1221         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1222
1223         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1224         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1225
1226         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1227         buffer[count++] = cpu_to_le32(0);
1228 }
1229
1230 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1231 {
1232         const __le32 *fw_data;
1233         volatile u32 *dst_ptr;
1234         int me, i, max_me = 4;
1235         u32 bo_offset = 0;
1236         u32 table_offset, table_size;
1237
1238         if (adev->asic_type == CHIP_CARRIZO)
1239                 max_me = 5;
1240
1241         /* write the cp table buffer */
1242         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1243         for (me = 0; me < max_me; me++) {
1244                 if (me == 0) {
1245                         const struct gfx_firmware_header_v1_0 *hdr =
1246                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1247                         fw_data = (const __le32 *)
1248                                 (adev->gfx.ce_fw->data +
1249                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1250                         table_offset = le32_to_cpu(hdr->jt_offset);
1251                         table_size = le32_to_cpu(hdr->jt_size);
1252                 } else if (me == 1) {
1253                         const struct gfx_firmware_header_v1_0 *hdr =
1254                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1255                         fw_data = (const __le32 *)
1256                                 (adev->gfx.pfp_fw->data +
1257                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1258                         table_offset = le32_to_cpu(hdr->jt_offset);
1259                         table_size = le32_to_cpu(hdr->jt_size);
1260                 } else if (me == 2) {
1261                         const struct gfx_firmware_header_v1_0 *hdr =
1262                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1263                         fw_data = (const __le32 *)
1264                                 (adev->gfx.me_fw->data +
1265                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1266                         table_offset = le32_to_cpu(hdr->jt_offset);
1267                         table_size = le32_to_cpu(hdr->jt_size);
1268                 } else if (me == 3) {
1269                         const struct gfx_firmware_header_v1_0 *hdr =
1270                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1271                         fw_data = (const __le32 *)
1272                                 (adev->gfx.mec_fw->data +
1273                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1274                         table_offset = le32_to_cpu(hdr->jt_offset);
1275                         table_size = le32_to_cpu(hdr->jt_size);
1276                 } else  if (me == 4) {
1277                         const struct gfx_firmware_header_v1_0 *hdr =
1278                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1279                         fw_data = (const __le32 *)
1280                                 (adev->gfx.mec2_fw->data +
1281                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1282                         table_offset = le32_to_cpu(hdr->jt_offset);
1283                         table_size = le32_to_cpu(hdr->jt_size);
1284                 }
1285
1286                 for (i = 0; i < table_size; i ++) {
1287                         dst_ptr[bo_offset + i] =
1288                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1289                 }
1290
1291                 bo_offset += table_size;
1292         }
1293 }
1294
1295 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1296 {
1297         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1298         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1299 }
1300
1301 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1302 {
1303         volatile u32 *dst_ptr;
1304         u32 dws;
1305         const struct cs_section_def *cs_data;
1306         int r;
1307
1308         adev->gfx.rlc.cs_data = vi_cs_data;
1309
1310         cs_data = adev->gfx.rlc.cs_data;
1311
1312         if (cs_data) {
1313                 /* clear state block */
1314                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1315
1316                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1317                                               AMDGPU_GEM_DOMAIN_VRAM,
1318                                               &adev->gfx.rlc.clear_state_obj,
1319                                               &adev->gfx.rlc.clear_state_gpu_addr,
1320                                               (void **)&adev->gfx.rlc.cs_ptr);
1321                 if (r) {
1322                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1323                         gfx_v8_0_rlc_fini(adev);
1324                         return r;
1325                 }
1326
1327                 /* set up the cs buffer */
1328                 dst_ptr = adev->gfx.rlc.cs_ptr;
1329                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1330                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1331                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1332         }
1333
1334         if ((adev->asic_type == CHIP_CARRIZO) ||
1335             (adev->asic_type == CHIP_STONEY)) {
1336                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1337                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1338                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1339                                               &adev->gfx.rlc.cp_table_obj,
1340                                               &adev->gfx.rlc.cp_table_gpu_addr,
1341                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1342                 if (r) {
1343                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1344                         return r;
1345                 }
1346
1347                 cz_init_cp_jump_table(adev);
1348
1349                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1350                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1351         }
1352
1353         return 0;
1354 }
1355
1356 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1357 {
1358         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1359 }
1360
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1362 {
1363         int r;
1364         u32 *hpd;
1365         size_t mec_hpd_size;
1366
1367         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1368
1369         /* take ownership of the relevant compute queues */
1370         amdgpu_gfx_compute_queue_acquire(adev);
1371
1372         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1373
1374         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1375                                       AMDGPU_GEM_DOMAIN_GTT,
1376                                       &adev->gfx.mec.hpd_eop_obj,
1377                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1378                                       (void **)&hpd);
1379         if (r) {
1380                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1381                 return r;
1382         }
1383
1384         memset(hpd, 0, mec_hpd_size);
1385
1386         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1387         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1388
1389         return 0;
1390 }
1391
1392 static const u32 vgpr_init_compute_shader[] =
1393 {
1394         0x7e000209, 0x7e020208,
1395         0x7e040207, 0x7e060206,
1396         0x7e080205, 0x7e0a0204,
1397         0x7e0c0203, 0x7e0e0202,
1398         0x7e100201, 0x7e120200,
1399         0x7e140209, 0x7e160208,
1400         0x7e180207, 0x7e1a0206,
1401         0x7e1c0205, 0x7e1e0204,
1402         0x7e200203, 0x7e220202,
1403         0x7e240201, 0x7e260200,
1404         0x7e280209, 0x7e2a0208,
1405         0x7e2c0207, 0x7e2e0206,
1406         0x7e300205, 0x7e320204,
1407         0x7e340203, 0x7e360202,
1408         0x7e380201, 0x7e3a0200,
1409         0x7e3c0209, 0x7e3e0208,
1410         0x7e400207, 0x7e420206,
1411         0x7e440205, 0x7e460204,
1412         0x7e480203, 0x7e4a0202,
1413         0x7e4c0201, 0x7e4e0200,
1414         0x7e500209, 0x7e520208,
1415         0x7e540207, 0x7e560206,
1416         0x7e580205, 0x7e5a0204,
1417         0x7e5c0203, 0x7e5e0202,
1418         0x7e600201, 0x7e620200,
1419         0x7e640209, 0x7e660208,
1420         0x7e680207, 0x7e6a0206,
1421         0x7e6c0205, 0x7e6e0204,
1422         0x7e700203, 0x7e720202,
1423         0x7e740201, 0x7e760200,
1424         0x7e780209, 0x7e7a0208,
1425         0x7e7c0207, 0x7e7e0206,
1426         0xbf8a0000, 0xbf810000,
1427 };
1428
1429 static const u32 sgpr_init_compute_shader[] =
1430 {
1431         0xbe8a0100, 0xbe8c0102,
1432         0xbe8e0104, 0xbe900106,
1433         0xbe920108, 0xbe940100,
1434         0xbe960102, 0xbe980104,
1435         0xbe9a0106, 0xbe9c0108,
1436         0xbe9e0100, 0xbea00102,
1437         0xbea20104, 0xbea40106,
1438         0xbea60108, 0xbea80100,
1439         0xbeaa0102, 0xbeac0104,
1440         0xbeae0106, 0xbeb00108,
1441         0xbeb20100, 0xbeb40102,
1442         0xbeb60104, 0xbeb80106,
1443         0xbeba0108, 0xbebc0100,
1444         0xbebe0102, 0xbec00104,
1445         0xbec20106, 0xbec40108,
1446         0xbec60100, 0xbec80102,
1447         0xbee60004, 0xbee70005,
1448         0xbeea0006, 0xbeeb0007,
1449         0xbee80008, 0xbee90009,
1450         0xbefc0000, 0xbf8a0000,
1451         0xbf810000, 0x00000000,
1452 };
1453
1454 static const u32 vgpr_init_regs[] =
1455 {
1456         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1457         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1458         mmCOMPUTE_NUM_THREAD_X, 256*4,
1459         mmCOMPUTE_NUM_THREAD_Y, 1,
1460         mmCOMPUTE_NUM_THREAD_Z, 1,
1461         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1462         mmCOMPUTE_PGM_RSRC2, 20,
1463         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1464         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1465         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1466         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1467         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1468         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1469         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1470         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1471         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1472         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1473 };
1474
1475 static const u32 sgpr1_init_regs[] =
1476 {
1477         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1478         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1479         mmCOMPUTE_NUM_THREAD_X, 256*5,
1480         mmCOMPUTE_NUM_THREAD_Y, 1,
1481         mmCOMPUTE_NUM_THREAD_Z, 1,
1482         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1483         mmCOMPUTE_PGM_RSRC2, 20,
1484         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1485         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1486         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1487         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1488         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1489         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1490         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1491         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1492         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1493         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1494 };
1495
1496 static const u32 sgpr2_init_regs[] =
1497 {
1498         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1499         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1500         mmCOMPUTE_NUM_THREAD_X, 256*5,
1501         mmCOMPUTE_NUM_THREAD_Y, 1,
1502         mmCOMPUTE_NUM_THREAD_Z, 1,
1503         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1504         mmCOMPUTE_PGM_RSRC2, 20,
1505         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1506         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1507         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1508         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1509         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1510         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1511         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1512         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1513         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1514         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1515 };
1516
1517 static const u32 sec_ded_counter_registers[] =
1518 {
1519         mmCPC_EDC_ATC_CNT,
1520         mmCPC_EDC_SCRATCH_CNT,
1521         mmCPC_EDC_UCODE_CNT,
1522         mmCPF_EDC_ATC_CNT,
1523         mmCPF_EDC_ROQ_CNT,
1524         mmCPF_EDC_TAG_CNT,
1525         mmCPG_EDC_ATC_CNT,
1526         mmCPG_EDC_DMA_CNT,
1527         mmCPG_EDC_TAG_CNT,
1528         mmDC_EDC_CSINVOC_CNT,
1529         mmDC_EDC_RESTORE_CNT,
1530         mmDC_EDC_STATE_CNT,
1531         mmGDS_EDC_CNT,
1532         mmGDS_EDC_GRBM_CNT,
1533         mmGDS_EDC_OA_DED,
1534         mmSPI_EDC_CNT,
1535         mmSQC_ATC_EDC_GATCL1_CNT,
1536         mmSQC_EDC_CNT,
1537         mmSQ_EDC_DED_CNT,
1538         mmSQ_EDC_INFO,
1539         mmSQ_EDC_SEC_CNT,
1540         mmTCC_EDC_CNT,
1541         mmTCP_ATC_EDC_GATCL1_CNT,
1542         mmTCP_EDC_CNT,
1543         mmTD_EDC_CNT
1544 };
1545
1546 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1547 {
1548         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1549         struct amdgpu_ib ib;
1550         struct dma_fence *f = NULL;
1551         int r, i;
1552         u32 tmp;
1553         unsigned total_size, vgpr_offset, sgpr_offset;
1554         u64 gpu_addr;
1555
1556         /* only supported on CZ */
1557         if (adev->asic_type != CHIP_CARRIZO)
1558                 return 0;
1559
1560         /* bail if the compute ring is not ready */
1561         if (!ring->ready)
1562                 return 0;
1563
1564         tmp = RREG32(mmGB_EDC_MODE);
1565         WREG32(mmGB_EDC_MODE, 0);
1566
1567         total_size =
1568                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1569         total_size +=
1570                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1571         total_size +=
1572                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573         total_size = ALIGN(total_size, 256);
1574         vgpr_offset = total_size;
1575         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1576         sgpr_offset = total_size;
1577         total_size += sizeof(sgpr_init_compute_shader);
1578
1579         /* allocate an indirect buffer to put the commands in */
1580         memset(&ib, 0, sizeof(ib));
1581         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1582         if (r) {
1583                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1584                 return r;
1585         }
1586
1587         /* load the compute shaders */
1588         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1589                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1590
1591         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1592                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1593
1594         /* init the ib length to 0 */
1595         ib.length_dw = 0;
1596
1597         /* VGPR */
1598         /* write the register state for the compute dispatch */
1599         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1600                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1601                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1602                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1603         }
1604         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1605         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1607         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1608         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1609         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1610
1611         /* write dispatch packet */
1612         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1613         ib.ptr[ib.length_dw++] = 8; /* x */
1614         ib.ptr[ib.length_dw++] = 1; /* y */
1615         ib.ptr[ib.length_dw++] = 1; /* z */
1616         ib.ptr[ib.length_dw++] =
1617                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1618
1619         /* write CS partial flush packet */
1620         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1621         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1622
1623         /* SGPR1 */
1624         /* write the register state for the compute dispatch */
1625         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1626                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1627                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1628                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1629         }
1630         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1631         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1633         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1634         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1635         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1636
1637         /* write dispatch packet */
1638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1639         ib.ptr[ib.length_dw++] = 8; /* x */
1640         ib.ptr[ib.length_dw++] = 1; /* y */
1641         ib.ptr[ib.length_dw++] = 1; /* z */
1642         ib.ptr[ib.length_dw++] =
1643                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1644
1645         /* write CS partial flush packet */
1646         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1647         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1648
1649         /* SGPR2 */
1650         /* write the register state for the compute dispatch */
1651         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1652                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1653                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1654                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1655         }
1656         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1657         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1658         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1659         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1660         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1661         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1662
1663         /* write dispatch packet */
1664         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1665         ib.ptr[ib.length_dw++] = 8; /* x */
1666         ib.ptr[ib.length_dw++] = 1; /* y */
1667         ib.ptr[ib.length_dw++] = 1; /* z */
1668         ib.ptr[ib.length_dw++] =
1669                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1670
1671         /* write CS partial flush packet */
1672         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1673         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1674
1675         /* shedule the ib on the ring */
1676         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1677         if (r) {
1678                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1679                 goto fail;
1680         }
1681
1682         /* wait for the GPU to finish processing the IB */
1683         r = dma_fence_wait(f, false);
1684         if (r) {
1685                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1686                 goto fail;
1687         }
1688
1689         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1690         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1691         WREG32(mmGB_EDC_MODE, tmp);
1692
1693         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1694         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1695         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1696
1697
1698         /* read back registers to clear the counters */
1699         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1700                 RREG32(sec_ded_counter_registers[i]);
1701
1702 fail:
1703         amdgpu_ib_free(adev, &ib, NULL);
1704         dma_fence_put(f);
1705
1706         return r;
1707 }
1708
1709 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1710 {
1711         u32 gb_addr_config;
1712         u32 mc_shared_chmap, mc_arb_ramcfg;
1713         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1714         u32 tmp;
1715         int ret;
1716
1717         switch (adev->asic_type) {
1718         case CHIP_TOPAZ:
1719                 adev->gfx.config.max_shader_engines = 1;
1720                 adev->gfx.config.max_tile_pipes = 2;
1721                 adev->gfx.config.max_cu_per_sh = 6;
1722                 adev->gfx.config.max_sh_per_se = 1;
1723                 adev->gfx.config.max_backends_per_se = 2;
1724                 adev->gfx.config.max_texture_channel_caches = 2;
1725                 adev->gfx.config.max_gprs = 256;
1726                 adev->gfx.config.max_gs_threads = 32;
1727                 adev->gfx.config.max_hw_contexts = 8;
1728
1729                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1734                 break;
1735         case CHIP_FIJI:
1736                 adev->gfx.config.max_shader_engines = 4;
1737                 adev->gfx.config.max_tile_pipes = 16;
1738                 adev->gfx.config.max_cu_per_sh = 16;
1739                 adev->gfx.config.max_sh_per_se = 1;
1740                 adev->gfx.config.max_backends_per_se = 4;
1741                 adev->gfx.config.max_texture_channel_caches = 16;
1742                 adev->gfx.config.max_gprs = 256;
1743                 adev->gfx.config.max_gs_threads = 32;
1744                 adev->gfx.config.max_hw_contexts = 8;
1745
1746                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751                 break;
1752         case CHIP_POLARIS11:
1753         case CHIP_POLARIS12:
1754                 ret = amdgpu_atombios_get_gfx_info(adev);
1755                 if (ret)
1756                         return ret;
1757                 adev->gfx.config.max_gprs = 256;
1758                 adev->gfx.config.max_gs_threads = 32;
1759                 adev->gfx.config.max_hw_contexts = 8;
1760
1761                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1762                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1763                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1764                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1765                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1766                 break;
1767         case CHIP_POLARIS10:
1768         case CHIP_VEGAM:
1769                 ret = amdgpu_atombios_get_gfx_info(adev);
1770                 if (ret)
1771                         return ret;
1772                 adev->gfx.config.max_gprs = 256;
1773                 adev->gfx.config.max_gs_threads = 32;
1774                 adev->gfx.config.max_hw_contexts = 8;
1775
1776                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1781                 break;
1782         case CHIP_TONGA:
1783                 adev->gfx.config.max_shader_engines = 4;
1784                 adev->gfx.config.max_tile_pipes = 8;
1785                 adev->gfx.config.max_cu_per_sh = 8;
1786                 adev->gfx.config.max_sh_per_se = 1;
1787                 adev->gfx.config.max_backends_per_se = 2;
1788                 adev->gfx.config.max_texture_channel_caches = 8;
1789                 adev->gfx.config.max_gprs = 256;
1790                 adev->gfx.config.max_gs_threads = 32;
1791                 adev->gfx.config.max_hw_contexts = 8;
1792
1793                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1798                 break;
1799         case CHIP_CARRIZO:
1800                 adev->gfx.config.max_shader_engines = 1;
1801                 adev->gfx.config.max_tile_pipes = 2;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 2;
1804                 adev->gfx.config.max_cu_per_sh = 8;
1805                 adev->gfx.config.max_texture_channel_caches = 2;
1806                 adev->gfx.config.max_gprs = 256;
1807                 adev->gfx.config.max_gs_threads = 32;
1808                 adev->gfx.config.max_hw_contexts = 8;
1809
1810                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1815                 break;
1816         case CHIP_STONEY:
1817                 adev->gfx.config.max_shader_engines = 1;
1818                 adev->gfx.config.max_tile_pipes = 2;
1819                 adev->gfx.config.max_sh_per_se = 1;
1820                 adev->gfx.config.max_backends_per_se = 1;
1821                 adev->gfx.config.max_cu_per_sh = 3;
1822                 adev->gfx.config.max_texture_channel_caches = 2;
1823                 adev->gfx.config.max_gprs = 256;
1824                 adev->gfx.config.max_gs_threads = 16;
1825                 adev->gfx.config.max_hw_contexts = 8;
1826
1827                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1832                 break;
1833         default:
1834                 adev->gfx.config.max_shader_engines = 2;
1835                 adev->gfx.config.max_tile_pipes = 4;
1836                 adev->gfx.config.max_cu_per_sh = 2;
1837                 adev->gfx.config.max_sh_per_se = 1;
1838                 adev->gfx.config.max_backends_per_se = 2;
1839                 adev->gfx.config.max_texture_channel_caches = 4;
1840                 adev->gfx.config.max_gprs = 256;
1841                 adev->gfx.config.max_gs_threads = 32;
1842                 adev->gfx.config.max_hw_contexts = 8;
1843
1844                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1845                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1846                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1847                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1848                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1849                 break;
1850         }
1851
1852         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1853         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1854         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1855
1856         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1857         adev->gfx.config.mem_max_burst_length_bytes = 256;
1858         if (adev->flags & AMD_IS_APU) {
1859                 /* Get memory bank mapping mode. */
1860                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1861                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1862                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1863
1864                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1865                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1867
1868                 /* Validate settings in case only one DIMM installed. */
1869                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1870                         dimm00_addr_map = 0;
1871                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1872                         dimm01_addr_map = 0;
1873                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1874                         dimm10_addr_map = 0;
1875                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1876                         dimm11_addr_map = 0;
1877
1878                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1879                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1880                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1881                         adev->gfx.config.mem_row_size_in_kb = 2;
1882                 else
1883                         adev->gfx.config.mem_row_size_in_kb = 1;
1884         } else {
1885                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1886                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1887                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1888                         adev->gfx.config.mem_row_size_in_kb = 4;
1889         }
1890
1891         adev->gfx.config.shader_engine_tile_size = 32;
1892         adev->gfx.config.num_gpus = 1;
1893         adev->gfx.config.multi_gpu_tile_size = 64;
1894
1895         /* fix up row size */
1896         switch (adev->gfx.config.mem_row_size_in_kb) {
1897         case 1:
1898         default:
1899                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1900                 break;
1901         case 2:
1902                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1903                 break;
1904         case 4:
1905                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1906                 break;
1907         }
1908         adev->gfx.config.gb_addr_config = gb_addr_config;
1909
1910         return 0;
1911 }
1912
1913 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1914                                         int mec, int pipe, int queue)
1915 {
1916         int r;
1917         unsigned irq_type;
1918         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1919
1920         ring = &adev->gfx.compute_ring[ring_id];
1921
1922         /* mec0 is me1 */
1923         ring->me = mec + 1;
1924         ring->pipe = pipe;
1925         ring->queue = queue;
1926
1927         ring->ring_obj = NULL;
1928         ring->use_doorbell = true;
1929         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1930         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1931                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1932         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1933
1934         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1935                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1936                 + ring->pipe;
1937
1938         /* type-2 packets are deprecated on MEC, use type-3 instead */
1939         r = amdgpu_ring_init(adev, ring, 1024,
1940                         &adev->gfx.eop_irq, irq_type);
1941         if (r)
1942                 return r;
1943
1944
1945         return 0;
1946 }
1947
1948 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1949
1950 static int gfx_v8_0_sw_init(void *handle)
1951 {
1952         int i, j, k, r, ring_id;
1953         struct amdgpu_ring *ring;
1954         struct amdgpu_kiq *kiq;
1955         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1956
1957         switch (adev->asic_type) {
1958         case CHIP_TONGA:
1959         case CHIP_CARRIZO:
1960         case CHIP_FIJI:
1961         case CHIP_POLARIS10:
1962         case CHIP_POLARIS11:
1963         case CHIP_POLARIS12:
1964         case CHIP_VEGAM:
1965                 adev->gfx.mec.num_mec = 2;
1966                 break;
1967         case CHIP_TOPAZ:
1968         case CHIP_STONEY:
1969         default:
1970                 adev->gfx.mec.num_mec = 1;
1971                 break;
1972         }
1973
1974         adev->gfx.mec.num_pipe_per_mec = 4;
1975         adev->gfx.mec.num_queue_per_pipe = 8;
1976
1977         /* KIQ event */
1978         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
1979         if (r)
1980                 return r;
1981
1982         /* EOP Event */
1983         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1984         if (r)
1985                 return r;
1986
1987         /* Privileged reg */
1988         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1989                               &adev->gfx.priv_reg_irq);
1990         if (r)
1991                 return r;
1992
1993         /* Privileged inst */
1994         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1995                               &adev->gfx.priv_inst_irq);
1996         if (r)
1997                 return r;
1998
1999         /* Add CP EDC/ECC irq  */
2000         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2001                               &adev->gfx.cp_ecc_error_irq);
2002         if (r)
2003                 return r;
2004
2005         /* SQ interrupts. */
2006         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2007                               &adev->gfx.sq_irq);
2008         if (r) {
2009                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2010                 return r;
2011         }
2012
2013         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2014
2015         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016
2017         gfx_v8_0_scratch_init(adev);
2018
2019         r = gfx_v8_0_init_microcode(adev);
2020         if (r) {
2021                 DRM_ERROR("Failed to load gfx firmware!\n");
2022                 return r;
2023         }
2024
2025         r = gfx_v8_0_rlc_init(adev);
2026         if (r) {
2027                 DRM_ERROR("Failed to init rlc BOs!\n");
2028                 return r;
2029         }
2030
2031         r = gfx_v8_0_mec_init(adev);
2032         if (r) {
2033                 DRM_ERROR("Failed to init MEC BOs!\n");
2034                 return r;
2035         }
2036
2037         /* set up the gfx ring */
2038         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039                 ring = &adev->gfx.gfx_ring[i];
2040                 ring->ring_obj = NULL;
2041                 sprintf(ring->name, "gfx");
2042                 /* no gfx doorbells on iceland */
2043                 if (adev->asic_type != CHIP_TOPAZ) {
2044                         ring->use_doorbell = true;
2045                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046                 }
2047
2048                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2049                                      AMDGPU_CP_IRQ_GFX_EOP);
2050                 if (r)
2051                         return r;
2052         }
2053
2054
2055         /* set up the compute queues - allocate horizontally across pipes */
2056         ring_id = 0;
2057         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2058                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2059                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2060                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2061                                         continue;
2062
2063                                 r = gfx_v8_0_compute_ring_init(adev,
2064                                                                 ring_id,
2065                                                                 i, k, j);
2066                                 if (r)
2067                                         return r;
2068
2069                                 ring_id++;
2070                         }
2071                 }
2072         }
2073
2074         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2075         if (r) {
2076                 DRM_ERROR("Failed to init KIQ BOs!\n");
2077                 return r;
2078         }
2079
2080         kiq = &adev->gfx.kiq;
2081         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2082         if (r)
2083                 return r;
2084
2085         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2086         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2087         if (r)
2088                 return r;
2089
2090         /* reserve GDS, GWS and OA resource for gfx */
2091         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2092                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2093                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2094         if (r)
2095                 return r;
2096
2097         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2098                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2099                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2100         if (r)
2101                 return r;
2102
2103         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2104                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2105                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2106         if (r)
2107                 return r;
2108
2109         adev->gfx.ce_ram_size = 0x8000;
2110
2111         r = gfx_v8_0_gpu_early_init(adev);
2112         if (r)
2113                 return r;
2114
2115         return 0;
2116 }
2117
2118 static int gfx_v8_0_sw_fini(void *handle)
2119 {
2120         int i;
2121         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2122
2123         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2124         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2125         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2126
2127         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2128                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2129         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2130                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2131
2132         amdgpu_gfx_compute_mqd_sw_fini(adev);
2133         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2134         amdgpu_gfx_kiq_fini(adev);
2135
2136         gfx_v8_0_mec_fini(adev);
2137         gfx_v8_0_rlc_fini(adev);
2138         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2139                                 &adev->gfx.rlc.clear_state_gpu_addr,
2140                                 (void **)&adev->gfx.rlc.cs_ptr);
2141         if ((adev->asic_type == CHIP_CARRIZO) ||
2142             (adev->asic_type == CHIP_STONEY)) {
2143                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2144                                 &adev->gfx.rlc.cp_table_gpu_addr,
2145                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2146         }
2147         gfx_v8_0_free_microcode(adev);
2148
2149         return 0;
2150 }
2151
2152 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2153 {
2154         uint32_t *modearray, *mod2array;
2155         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2156         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2157         u32 reg_offset;
2158
2159         modearray = adev->gfx.config.tile_mode_array;
2160         mod2array = adev->gfx.config.macrotile_mode_array;
2161
2162         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2163                 modearray[reg_offset] = 0;
2164
2165         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2166                 mod2array[reg_offset] = 0;
2167
2168         switch (adev->asic_type) {
2169         case CHIP_TOPAZ:
2170                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                 PIPE_CONFIG(ADDR_SURF_P2) |
2172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175                                 PIPE_CONFIG(ADDR_SURF_P2) |
2176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2) |
2180                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2182                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                 PIPE_CONFIG(ADDR_SURF_P2) |
2184                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2186                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187                                 PIPE_CONFIG(ADDR_SURF_P2) |
2188                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2190                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191                                 PIPE_CONFIG(ADDR_SURF_P2) |
2192                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2193                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2194                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2195                                 PIPE_CONFIG(ADDR_SURF_P2) |
2196                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2197                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2198                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2199                                 PIPE_CONFIG(ADDR_SURF_P2));
2200                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201                                 PIPE_CONFIG(ADDR_SURF_P2) |
2202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2203                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2212                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2220                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2228                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2244                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2248                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2252                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2253                                  PIPE_CONFIG(ADDR_SURF_P2) |
2254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2256                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2257                                  PIPE_CONFIG(ADDR_SURF_P2) |
2258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2260                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2261                                  PIPE_CONFIG(ADDR_SURF_P2) |
2262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265                                  PIPE_CONFIG(ADDR_SURF_P2) |
2266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2268                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2269                                  PIPE_CONFIG(ADDR_SURF_P2) |
2270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2272
2273                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                 NUM_BANKS(ADDR_SURF_8_BANK));
2277                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280                                 NUM_BANKS(ADDR_SURF_8_BANK));
2281                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2284                                 NUM_BANKS(ADDR_SURF_8_BANK));
2285                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_8_BANK));
2289                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292                                 NUM_BANKS(ADDR_SURF_8_BANK));
2293                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2296                                 NUM_BANKS(ADDR_SURF_8_BANK));
2297                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300                                 NUM_BANKS(ADDR_SURF_8_BANK));
2301                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2302                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2303                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304                                 NUM_BANKS(ADDR_SURF_16_BANK));
2305                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2306                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2307                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2308                                 NUM_BANKS(ADDR_SURF_16_BANK));
2309                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2310                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2312                                  NUM_BANKS(ADDR_SURF_16_BANK));
2313                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2314                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2315                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2316                                  NUM_BANKS(ADDR_SURF_16_BANK));
2317                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2320                                  NUM_BANKS(ADDR_SURF_16_BANK));
2321                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324                                  NUM_BANKS(ADDR_SURF_16_BANK));
2325                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328                                  NUM_BANKS(ADDR_SURF_8_BANK));
2329
2330                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2331                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2332                             reg_offset != 23)
2333                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2334
2335                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2336                         if (reg_offset != 7)
2337                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2338
2339                 break;
2340         case CHIP_FIJI:
2341         case CHIP_VEGAM:
2342                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2345                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2349                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2353                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2354                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2357                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2361                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2362                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2365                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2366                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2369                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2370                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2372                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2373                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2374                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2375                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2376                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2388                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2428                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2432                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2433                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2436                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2437                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2440                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2441                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2444                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2445                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2448                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2464
2465                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                 NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                 NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                 NUM_BANKS(ADDR_SURF_8_BANK));
2485                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                                 NUM_BANKS(ADDR_SURF_8_BANK));
2489                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                                 NUM_BANKS(ADDR_SURF_8_BANK));
2493                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2495                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496                                 NUM_BANKS(ADDR_SURF_8_BANK));
2497                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500                                 NUM_BANKS(ADDR_SURF_8_BANK));
2501                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2503                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                                  NUM_BANKS(ADDR_SURF_8_BANK));
2505                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508                                  NUM_BANKS(ADDR_SURF_8_BANK));
2509                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                  NUM_BANKS(ADDR_SURF_8_BANK));
2513                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516                                  NUM_BANKS(ADDR_SURF_8_BANK));
2517                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                                  NUM_BANKS(ADDR_SURF_4_BANK));
2521
2522                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2523                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2524
2525                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2526                         if (reg_offset != 7)
2527                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2528
2529                 break;
2530         case CHIP_TONGA:
2531                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2534                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2538                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2542                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2546                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2550                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2554                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2555                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2558                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2559                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2562                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2563                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2564                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2565                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2566                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2577                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2622                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2625                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2626                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2629                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2630                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2632                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2633                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2634                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2636                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2637                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2652                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653
2654                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2664                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2665                                 NUM_BANKS(ADDR_SURF_16_BANK));
2666                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669                                 NUM_BANKS(ADDR_SURF_16_BANK));
2670                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2672                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2673                                 NUM_BANKS(ADDR_SURF_16_BANK));
2674                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677                                 NUM_BANKS(ADDR_SURF_16_BANK));
2678                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681                                 NUM_BANKS(ADDR_SURF_16_BANK));
2682                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2684                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2685                                 NUM_BANKS(ADDR_SURF_16_BANK));
2686                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689                                 NUM_BANKS(ADDR_SURF_16_BANK));
2690                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2693                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2696                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2697                                  NUM_BANKS(ADDR_SURF_16_BANK));
2698                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701                                  NUM_BANKS(ADDR_SURF_8_BANK));
2702                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705                                  NUM_BANKS(ADDR_SURF_4_BANK));
2706                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2709                                  NUM_BANKS(ADDR_SURF_4_BANK));
2710
2711                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2712                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2713
2714                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2715                         if (reg_offset != 7)
2716                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2717
2718                 break;
2719         case CHIP_POLARIS11:
2720         case CHIP_POLARIS12:
2721                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2741                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2745                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2749                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2753                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2755                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2767                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2775                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2815                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2819                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2823                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2827                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843
2844                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2856                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2857                                 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2861                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862                                 NUM_BANKS(ADDR_SURF_16_BANK));
2863
2864                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868
2869                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2872                                 NUM_BANKS(ADDR_SURF_16_BANK));
2873
2874                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877                                 NUM_BANKS(ADDR_SURF_16_BANK));
2878
2879                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2881                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882                                 NUM_BANKS(ADDR_SURF_16_BANK));
2883
2884                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2887                                 NUM_BANKS(ADDR_SURF_16_BANK));
2888
2889                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2891                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2892                                 NUM_BANKS(ADDR_SURF_16_BANK));
2893
2894                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2895                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2896                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2897                                 NUM_BANKS(ADDR_SURF_16_BANK));
2898
2899                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2902                                 NUM_BANKS(ADDR_SURF_16_BANK));
2903
2904                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2906                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2907                                 NUM_BANKS(ADDR_SURF_8_BANK));
2908
2909                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2911                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2912                                 NUM_BANKS(ADDR_SURF_4_BANK));
2913
2914                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2915                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2916
2917                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2918                         if (reg_offset != 7)
2919                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2920
2921                 break;
2922         case CHIP_POLARIS10:
2923                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2943                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2947                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2951                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2955                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2957                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2969                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3014                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3017                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3021                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3025                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3026                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3028                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3029                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3040                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3044                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045
3046                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3049                                 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054                                 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059                                 NUM_BANKS(ADDR_SURF_16_BANK));
3060
3061                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064                                 NUM_BANKS(ADDR_SURF_16_BANK));
3065
3066                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3069                                 NUM_BANKS(ADDR_SURF_16_BANK));
3070
3071                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3073                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3074                                 NUM_BANKS(ADDR_SURF_16_BANK));
3075
3076                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3079                                 NUM_BANKS(ADDR_SURF_16_BANK));
3080
3081                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3083                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084                                 NUM_BANKS(ADDR_SURF_16_BANK));
3085
3086                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3088                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3089                                 NUM_BANKS(ADDR_SURF_16_BANK));
3090
3091                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3092                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3093                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3094                                 NUM_BANKS(ADDR_SURF_16_BANK));
3095
3096                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3098                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3099                                 NUM_BANKS(ADDR_SURF_16_BANK));
3100
3101                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104                                 NUM_BANKS(ADDR_SURF_8_BANK));
3105
3106                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3109                                 NUM_BANKS(ADDR_SURF_4_BANK));
3110
3111                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3114                                 NUM_BANKS(ADDR_SURF_4_BANK));
3115
3116                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3117                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3118
3119                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3120                         if (reg_offset != 7)
3121                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3122
3123                 break;
3124         case CHIP_STONEY:
3125                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2) |
3127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P2) |
3131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138                                 PIPE_CONFIG(ADDR_SURF_P2) |
3139                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3140                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3141                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142                                 PIPE_CONFIG(ADDR_SURF_P2) |
3143                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3144                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3145                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146                                 PIPE_CONFIG(ADDR_SURF_P2) |
3147                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3148                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3150                                 PIPE_CONFIG(ADDR_SURF_P2) |
3151                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3152                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3153                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3154                                 PIPE_CONFIG(ADDR_SURF_P2));
3155                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3156                                 PIPE_CONFIG(ADDR_SURF_P2) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3158                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3179                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3183                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3203                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3207                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3211                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3212                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3215                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3216                                  PIPE_CONFIG(ADDR_SURF_P2) |
3217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3220                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                  PIPE_CONFIG(ADDR_SURF_P2) |
3225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3227
3228                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                 NUM_BANKS(ADDR_SURF_8_BANK));
3232                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                 NUM_BANKS(ADDR_SURF_8_BANK));
3236                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239                                 NUM_BANKS(ADDR_SURF_8_BANK));
3240                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3243                                 NUM_BANKS(ADDR_SURF_8_BANK));
3244                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                 NUM_BANKS(ADDR_SURF_8_BANK));
3248                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3249                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3250                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3251                                 NUM_BANKS(ADDR_SURF_8_BANK));
3252                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3254                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3255                                 NUM_BANKS(ADDR_SURF_8_BANK));
3256                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3257                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3258                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259                                 NUM_BANKS(ADDR_SURF_16_BANK));
3260                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263                                 NUM_BANKS(ADDR_SURF_16_BANK));
3264                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267                                  NUM_BANKS(ADDR_SURF_16_BANK));
3268                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3269                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271                                  NUM_BANKS(ADDR_SURF_16_BANK));
3272                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3274                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275                                  NUM_BANKS(ADDR_SURF_16_BANK));
3276                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279                                  NUM_BANKS(ADDR_SURF_16_BANK));
3280                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                  NUM_BANKS(ADDR_SURF_8_BANK));
3284
3285                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3286                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3287                             reg_offset != 23)
3288                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3289
3290                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3291                         if (reg_offset != 7)
3292                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3293
3294                 break;
3295         default:
3296                 dev_warn(adev->dev,
3297                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3298                          adev->asic_type);
3299
3300         case CHIP_CARRIZO:
3301                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302                                 PIPE_CONFIG(ADDR_SURF_P2) |
3303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3306                                 PIPE_CONFIG(ADDR_SURF_P2) |
3307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314                                 PIPE_CONFIG(ADDR_SURF_P2) |
3315                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3316                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3317                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318                                 PIPE_CONFIG(ADDR_SURF_P2) |
3319                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3320                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3321                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322                                 PIPE_CONFIG(ADDR_SURF_P2) |
3323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3325                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3326                                 PIPE_CONFIG(ADDR_SURF_P2) |
3327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3329                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3330                                 PIPE_CONFIG(ADDR_SURF_P2));
3331                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3332                                 PIPE_CONFIG(ADDR_SURF_P2) |
3333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3334                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3347                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3351                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3355                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3359                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3379                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3383                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3384                                  PIPE_CONFIG(ADDR_SURF_P2) |
3385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3387                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3388                                  PIPE_CONFIG(ADDR_SURF_P2) |
3389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3391                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3392                                  PIPE_CONFIG(ADDR_SURF_P2) |
3393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3395                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3396                                  PIPE_CONFIG(ADDR_SURF_P2) |
3397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3399                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400                                  PIPE_CONFIG(ADDR_SURF_P2) |
3401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3403
3404                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                 NUM_BANKS(ADDR_SURF_8_BANK));
3408                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                 NUM_BANKS(ADDR_SURF_8_BANK));
3412                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415                                 NUM_BANKS(ADDR_SURF_8_BANK));
3416                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3419                                 NUM_BANKS(ADDR_SURF_8_BANK));
3420                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423                                 NUM_BANKS(ADDR_SURF_8_BANK));
3424                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3425                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3426                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3427                                 NUM_BANKS(ADDR_SURF_8_BANK));
3428                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3430                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3431                                 NUM_BANKS(ADDR_SURF_8_BANK));
3432                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3433                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3434                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435                                 NUM_BANKS(ADDR_SURF_16_BANK));
3436                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439                                 NUM_BANKS(ADDR_SURF_16_BANK));
3440                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3441                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443                                  NUM_BANKS(ADDR_SURF_16_BANK));
3444                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3445                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447                                  NUM_BANKS(ADDR_SURF_16_BANK));
3448                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3450                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3451                                  NUM_BANKS(ADDR_SURF_16_BANK));
3452                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3455                                  NUM_BANKS(ADDR_SURF_16_BANK));
3456                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459                                  NUM_BANKS(ADDR_SURF_8_BANK));
3460
3461                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3462                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3463                             reg_offset != 23)
3464                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3465
3466                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3467                         if (reg_offset != 7)
3468                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3469
3470                 break;
3471         }
3472 }
3473
3474 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3475                                   u32 se_num, u32 sh_num, u32 instance)
3476 {
3477         u32 data;
3478
3479         if (instance == 0xffffffff)
3480                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3481         else
3482                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3483
3484         if (se_num == 0xffffffff)
3485                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3486         else
3487                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3488
3489         if (sh_num == 0xffffffff)
3490                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3491         else
3492                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3493
3494         WREG32(mmGRBM_GFX_INDEX, data);
3495 }
3496
3497 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3498                                   u32 me, u32 pipe, u32 q)
3499 {
3500         vi_srbm_select(adev, me, pipe, q, 0);
3501 }
3502
3503 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3504 {
3505         u32 data, mask;
3506
3507         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3508                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3509
3510         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3511
3512         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513                                          adev->gfx.config.max_sh_per_se);
3514
3515         return (~data) & mask;
3516 }
3517
3518 static void
3519 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3520 {
3521         switch (adev->asic_type) {
3522         case CHIP_FIJI:
3523         case CHIP_VEGAM:
3524                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3525                           RB_XSEL2(1) | PKR_MAP(2) |
3526                           PKR_XSEL(1) | PKR_YSEL(1) |
3527                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3528                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3529                            SE_PAIR_YSEL(2);
3530                 break;
3531         case CHIP_TONGA:
3532         case CHIP_POLARIS10:
3533                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3534                           SE_XSEL(1) | SE_YSEL(1);
3535                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3536                            SE_PAIR_YSEL(2);
3537                 break;
3538         case CHIP_TOPAZ:
3539         case CHIP_CARRIZO:
3540                 *rconf |= RB_MAP_PKR0(2);
3541                 *rconf1 |= 0x0;
3542                 break;
3543         case CHIP_POLARIS11:
3544         case CHIP_POLARIS12:
3545                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3546                           SE_XSEL(1) | SE_YSEL(1);
3547                 *rconf1 |= 0x0;
3548                 break;
3549         case CHIP_STONEY:
3550                 *rconf |= 0x0;
3551                 *rconf1 |= 0x0;
3552                 break;
3553         default:
3554                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3555                 break;
3556         }
3557 }
3558
3559 static void
3560 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3561                                         u32 raster_config, u32 raster_config_1,
3562                                         unsigned rb_mask, unsigned num_rb)
3563 {
3564         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3565         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3566         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3567         unsigned rb_per_se = num_rb / num_se;
3568         unsigned se_mask[4];
3569         unsigned se;
3570
3571         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3572         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3573         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3574         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3575
3576         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3577         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3578         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3579
3580         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3581                              (!se_mask[2] && !se_mask[3]))) {
3582                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3583
3584                 if (!se_mask[0] && !se_mask[1]) {
3585                         raster_config_1 |=
3586                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3587                 } else {
3588                         raster_config_1 |=
3589                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3590                 }
3591         }
3592
3593         for (se = 0; se < num_se; se++) {
3594                 unsigned raster_config_se = raster_config;
3595                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3596                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3597                 int idx = (se / 2) * 2;
3598
3599                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3600                         raster_config_se &= ~SE_MAP_MASK;
3601
3602                         if (!se_mask[idx]) {
3603                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3604                         } else {
3605                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3606                         }
3607                 }
3608
3609                 pkr0_mask &= rb_mask;
3610                 pkr1_mask &= rb_mask;
3611                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3612                         raster_config_se &= ~PKR_MAP_MASK;
3613
3614                         if (!pkr0_mask) {
3615                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3616                         } else {
3617                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3618                         }
3619                 }
3620
3621                 if (rb_per_se >= 2) {
3622                         unsigned rb0_mask = 1 << (se * rb_per_se);
3623                         unsigned rb1_mask = rb0_mask << 1;
3624
3625                         rb0_mask &= rb_mask;
3626                         rb1_mask &= rb_mask;
3627                         if (!rb0_mask || !rb1_mask) {
3628                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3629
3630                                 if (!rb0_mask) {
3631                                         raster_config_se |=
3632                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3633                                 } else {
3634                                         raster_config_se |=
3635                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3636                                 }
3637                         }
3638
3639                         if (rb_per_se > 2) {
3640                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3641                                 rb1_mask = rb0_mask << 1;
3642                                 rb0_mask &= rb_mask;
3643                                 rb1_mask &= rb_mask;
3644                                 if (!rb0_mask || !rb1_mask) {
3645                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3646
3647                                         if (!rb0_mask) {
3648                                                 raster_config_se |=
3649                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3650                                         } else {
3651                                                 raster_config_se |=
3652                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3653                                         }
3654                                 }
3655                         }
3656                 }
3657
3658                 /* GRBM_GFX_INDEX has a different offset on VI */
3659                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3660                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3661                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662         }
3663
3664         /* GRBM_GFX_INDEX has a different offset on VI */
3665         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3666 }
3667
3668 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3669 {
3670         int i, j;
3671         u32 data;
3672         u32 raster_config = 0, raster_config_1 = 0;
3673         u32 active_rbs = 0;
3674         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3675                                         adev->gfx.config.max_sh_per_se;
3676         unsigned num_rb_pipes;
3677
3678         mutex_lock(&adev->grbm_idx_mutex);
3679         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3680                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3681                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3682                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3683                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3684                                                rb_bitmap_width_per_sh);
3685                 }
3686         }
3687         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3688
3689         adev->gfx.config.backend_enable_mask = active_rbs;
3690         adev->gfx.config.num_rbs = hweight32(active_rbs);
3691
3692         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3693                              adev->gfx.config.max_shader_engines, 16);
3694
3695         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3696
3697         if (!adev->gfx.config.backend_enable_mask ||
3698                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3699                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3700                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3701         } else {
3702                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3703                                                         adev->gfx.config.backend_enable_mask,
3704                                                         num_rb_pipes);
3705         }
3706
3707         /* cache the values for userspace */
3708         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3709                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3710                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3711                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3712                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3713                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3714                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3715                         adev->gfx.config.rb_config[i][j].raster_config =
3716                                 RREG32(mmPA_SC_RASTER_CONFIG);
3717                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3718                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3719                 }
3720         }
3721         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3722         mutex_unlock(&adev->grbm_idx_mutex);
3723 }
3724
3725 /**
3726  * gfx_v8_0_init_compute_vmid - gart enable
3727  *
3728  * @adev: amdgpu_device pointer
3729  *
3730  * Initialize compute vmid sh_mem registers
3731  *
3732  */
3733 #define DEFAULT_SH_MEM_BASES    (0x6000)
3734 #define FIRST_COMPUTE_VMID      (8)
3735 #define LAST_COMPUTE_VMID       (16)
3736 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3737 {
3738         int i;
3739         uint32_t sh_mem_config;
3740         uint32_t sh_mem_bases;
3741
3742         /*
3743          * Configure apertures:
3744          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3745          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3746          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3747          */
3748         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3749
3750         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3751                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3752                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3753                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3754                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3755                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3756
3757         mutex_lock(&adev->srbm_mutex);
3758         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3759                 vi_srbm_select(adev, 0, 0, 0, i);
3760                 /* CP and shaders */
3761                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3762                 WREG32(mmSH_MEM_APE1_BASE, 1);
3763                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3764                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3765         }
3766         vi_srbm_select(adev, 0, 0, 0, 0);
3767         mutex_unlock(&adev->srbm_mutex);
3768 }
3769
3770 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3771 {
3772         switch (adev->asic_type) {
3773         default:
3774                 adev->gfx.config.double_offchip_lds_buf = 1;
3775                 break;
3776         case CHIP_CARRIZO:
3777         case CHIP_STONEY:
3778                 adev->gfx.config.double_offchip_lds_buf = 0;
3779                 break;
3780         }
3781 }
3782
3783 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3784 {
3785         u32 tmp, sh_static_mem_cfg;
3786         int i;
3787
3788         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3789         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3792
3793         gfx_v8_0_tiling_mode_table_init(adev);
3794         gfx_v8_0_setup_rb(adev);
3795         gfx_v8_0_get_cu_info(adev);
3796         gfx_v8_0_config_init(adev);
3797
3798         /* XXX SH_MEM regs */
3799         /* where to put LDS, scratch, GPUVM in FSA64 space */
3800         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3801                                    SWIZZLE_ENABLE, 1);
3802         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3803                                    ELEMENT_SIZE, 1);
3804         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3805                                    INDEX_STRIDE, 3);
3806         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3807
3808         mutex_lock(&adev->srbm_mutex);
3809         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3810                 vi_srbm_select(adev, 0, 0, 0, i);
3811                 /* CP and shaders */
3812                 if (i == 0) {
3813                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3814                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3815                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3816                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3817                         WREG32(mmSH_MEM_CONFIG, tmp);
3818                         WREG32(mmSH_MEM_BASES, 0);
3819                 } else {
3820                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3821                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3822                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3823                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3824                         WREG32(mmSH_MEM_CONFIG, tmp);
3825                         tmp = adev->gmc.shared_aperture_start >> 48;
3826                         WREG32(mmSH_MEM_BASES, tmp);
3827                 }
3828
3829                 WREG32(mmSH_MEM_APE1_BASE, 1);
3830                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3831         }
3832         vi_srbm_select(adev, 0, 0, 0, 0);
3833         mutex_unlock(&adev->srbm_mutex);
3834
3835         gfx_v8_0_init_compute_vmid(adev);
3836
3837         mutex_lock(&adev->grbm_idx_mutex);
3838         /*
3839          * making sure that the following register writes will be broadcasted
3840          * to all the shaders
3841          */
3842         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3843
3844         WREG32(mmPA_SC_FIFO_SIZE,
3845                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3846                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3847                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3848                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3849                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3850                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3851                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3852                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3853
3854         tmp = RREG32(mmSPI_ARB_PRIORITY);
3855         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3856         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3857         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3858         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3859         WREG32(mmSPI_ARB_PRIORITY, tmp);
3860
3861         mutex_unlock(&adev->grbm_idx_mutex);
3862
3863 }
3864
3865 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3866 {
3867         u32 i, j, k;
3868         u32 mask;
3869
3870         mutex_lock(&adev->grbm_idx_mutex);
3871         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3872                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3873                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3874                         for (k = 0; k < adev->usec_timeout; k++) {
3875                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3876                                         break;
3877                                 udelay(1);
3878                         }
3879                         if (k == adev->usec_timeout) {
3880                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3881                                                       0xffffffff, 0xffffffff);
3882                                 mutex_unlock(&adev->grbm_idx_mutex);
3883                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3884                                          i, j);
3885                                 return;
3886                         }
3887                 }
3888         }
3889         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3890         mutex_unlock(&adev->grbm_idx_mutex);
3891
3892         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3893                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3894                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3895                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3896         for (k = 0; k < adev->usec_timeout; k++) {
3897                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3898                         break;
3899                 udelay(1);
3900         }
3901 }
3902
3903 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3904                                                bool enable)
3905 {
3906         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3907
3908         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3909         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3910         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3911         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3912
3913         WREG32(mmCP_INT_CNTL_RING0, tmp);
3914 }
3915
3916 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3917 {
3918         /* csib */
3919         WREG32(mmRLC_CSIB_ADDR_HI,
3920                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3921         WREG32(mmRLC_CSIB_ADDR_LO,
3922                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3923         WREG32(mmRLC_CSIB_LENGTH,
3924                         adev->gfx.rlc.clear_state_size);
3925 }
3926
3927 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3928                                 int ind_offset,
3929                                 int list_size,
3930                                 int *unique_indices,
3931                                 int *indices_count,
3932                                 int max_indices,
3933                                 int *ind_start_offsets,
3934                                 int *offset_count,
3935                                 int max_offset)
3936 {
3937         int indices;
3938         bool new_entry = true;
3939
3940         for (; ind_offset < list_size; ind_offset++) {
3941
3942                 if (new_entry) {
3943                         new_entry = false;
3944                         ind_start_offsets[*offset_count] = ind_offset;
3945                         *offset_count = *offset_count + 1;
3946                         BUG_ON(*offset_count >= max_offset);
3947                 }
3948
3949                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3950                         new_entry = true;
3951                         continue;
3952                 }
3953
3954                 ind_offset += 2;
3955
3956                 /* look for the matching indice */
3957                 for (indices = 0;
3958                         indices < *indices_count;
3959                         indices++) {
3960                         if (unique_indices[indices] ==
3961                                 register_list_format[ind_offset])
3962                                 break;
3963                 }
3964
3965                 if (indices >= *indices_count) {
3966                         unique_indices[*indices_count] =
3967                                 register_list_format[ind_offset];
3968                         indices = *indices_count;
3969                         *indices_count = *indices_count + 1;
3970                         BUG_ON(*indices_count >= max_indices);
3971                 }
3972
3973                 register_list_format[ind_offset] = indices;
3974         }
3975 }
3976
3977 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3978 {
3979         int i, temp, data;
3980         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3981         int indices_count = 0;
3982         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3983         int offset_count = 0;
3984
3985         int list_size;
3986         unsigned int *register_list_format =
3987                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3988         if (!register_list_format)
3989                 return -ENOMEM;
3990         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3991                         adev->gfx.rlc.reg_list_format_size_bytes);
3992
3993         gfx_v8_0_parse_ind_reg_list(register_list_format,
3994                                 RLC_FormatDirectRegListLength,
3995                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3996                                 unique_indices,
3997                                 &indices_count,
3998                                 ARRAY_SIZE(unique_indices),
3999                                 indirect_start_offsets,
4000                                 &offset_count,
4001                                 ARRAY_SIZE(indirect_start_offsets));
4002
4003         /* save and restore list */
4004         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4005
4006         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4007         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4008                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4009
4010         /* indirect list */
4011         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4012         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4013                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4014
4015         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4016         list_size = list_size >> 1;
4017         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4018         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4019
4020         /* starting offsets starts */
4021         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4022                 adev->gfx.rlc.starting_offsets_start);
4023         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4024                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4025                                 indirect_start_offsets[i]);
4026
4027         /* unique indices */
4028         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4029         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4030         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4031                 if (unique_indices[i] != 0) {
4032                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4033                         WREG32(data + i, unique_indices[i] >> 20);
4034                 }
4035         }
4036         kfree(register_list_format);
4037
4038         return 0;
4039 }
4040
4041 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4042 {
4043         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4044 }
4045
4046 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4047 {
4048         uint32_t data;
4049
4050         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4051
4052         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4053         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4054         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4055         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4056         WREG32(mmRLC_PG_DELAY, data);
4057
4058         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4059         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4060
4061 }
4062
4063 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4064                                                 bool enable)
4065 {
4066         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4067 }
4068
4069 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4070                                                   bool enable)
4071 {
4072         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4073 }
4074
4075 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4076 {
4077         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4078 }
4079
4080 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4081 {
4082         if ((adev->asic_type == CHIP_CARRIZO) ||
4083             (adev->asic_type == CHIP_STONEY)) {
4084                 gfx_v8_0_init_csb(adev);
4085                 gfx_v8_0_init_save_restore_list(adev);
4086                 gfx_v8_0_enable_save_restore_machine(adev);
4087                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4088                 gfx_v8_0_init_power_gating(adev);
4089                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4090         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4091                    (adev->asic_type == CHIP_POLARIS12) ||
4092                    (adev->asic_type == CHIP_VEGAM)) {
4093                 gfx_v8_0_init_csb(adev);
4094                 gfx_v8_0_init_save_restore_list(adev);
4095                 gfx_v8_0_enable_save_restore_machine(adev);
4096                 gfx_v8_0_init_power_gating(adev);
4097         }
4098
4099 }
4100
4101 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4102 {
4103         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4104
4105         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4106         gfx_v8_0_wait_for_rlc_serdes(adev);
4107 }
4108
4109 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4110 {
4111         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4112         udelay(50);
4113
4114         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4115         udelay(50);
4116 }
4117
4118 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4119 {
4120         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4121
4122         /* carrizo do enable cp interrupt after cp inited */
4123         if (!(adev->flags & AMD_IS_APU))
4124                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4125
4126         udelay(50);
4127 }
4128
4129 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4130 {
4131         const struct rlc_firmware_header_v2_0 *hdr;
4132         const __le32 *fw_data;
4133         unsigned i, fw_size;
4134
4135         if (!adev->gfx.rlc_fw)
4136                 return -EINVAL;
4137
4138         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4139         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4140
4141         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4142                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4143         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4144
4145         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4146         for (i = 0; i < fw_size; i++)
4147                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4148         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4149
4150         return 0;
4151 }
4152
4153 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4154 {
4155         int r;
4156         u32 tmp;
4157
4158         gfx_v8_0_rlc_stop(adev);
4159
4160         /* disable CG */
4161         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4162         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4163                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4164         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4165         if (adev->asic_type == CHIP_POLARIS11 ||
4166             adev->asic_type == CHIP_POLARIS10 ||
4167             adev->asic_type == CHIP_POLARIS12 ||
4168             adev->asic_type == CHIP_VEGAM) {
4169                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4170                 tmp &= ~0x3;
4171                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4172         }
4173
4174         /* disable PG */
4175         WREG32(mmRLC_PG_CNTL, 0);
4176
4177         gfx_v8_0_rlc_reset(adev);
4178         gfx_v8_0_init_pg(adev);
4179
4180
4181         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4182                 /* legacy rlc firmware loading */
4183                 r = gfx_v8_0_rlc_load_microcode(adev);
4184                 if (r)
4185                         return r;
4186         }
4187
4188         gfx_v8_0_rlc_start(adev);
4189
4190         return 0;
4191 }
4192
4193 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4194 {
4195         int i;
4196         u32 tmp = RREG32(mmCP_ME_CNTL);
4197
4198         if (enable) {
4199                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4200                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4201                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4202         } else {
4203                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4204                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4205                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4206                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4207                         adev->gfx.gfx_ring[i].ready = false;
4208         }
4209         WREG32(mmCP_ME_CNTL, tmp);
4210         udelay(50);
4211 }
4212
4213 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4214 {
4215         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4216         const struct gfx_firmware_header_v1_0 *ce_hdr;
4217         const struct gfx_firmware_header_v1_0 *me_hdr;
4218         const __le32 *fw_data;
4219         unsigned i, fw_size;
4220
4221         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4222                 return -EINVAL;
4223
4224         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4225                 adev->gfx.pfp_fw->data;
4226         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4227                 adev->gfx.ce_fw->data;
4228         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4229                 adev->gfx.me_fw->data;
4230
4231         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4232         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4233         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4234
4235         gfx_v8_0_cp_gfx_enable(adev, false);
4236
4237         /* PFP */
4238         fw_data = (const __le32 *)
4239                 (adev->gfx.pfp_fw->data +
4240                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4241         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4242         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4243         for (i = 0; i < fw_size; i++)
4244                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4245         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4246
4247         /* CE */
4248         fw_data = (const __le32 *)
4249                 (adev->gfx.ce_fw->data +
4250                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4251         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4252         WREG32(mmCP_CE_UCODE_ADDR, 0);
4253         for (i = 0; i < fw_size; i++)
4254                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4255         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4256
4257         /* ME */
4258         fw_data = (const __le32 *)
4259                 (adev->gfx.me_fw->data +
4260                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4261         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4262         WREG32(mmCP_ME_RAM_WADDR, 0);
4263         for (i = 0; i < fw_size; i++)
4264                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4265         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4266
4267         return 0;
4268 }
4269
4270 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4271 {
4272         u32 count = 0;
4273         const struct cs_section_def *sect = NULL;
4274         const struct cs_extent_def *ext = NULL;
4275
4276         /* begin clear state */
4277         count += 2;
4278         /* context control state */
4279         count += 3;
4280
4281         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4282                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4283                         if (sect->id == SECT_CONTEXT)
4284                                 count += 2 + ext->reg_count;
4285                         else
4286                                 return 0;
4287                 }
4288         }
4289         /* pa_sc_raster_config/pa_sc_raster_config1 */
4290         count += 4;
4291         /* end clear state */
4292         count += 2;
4293         /* clear state */
4294         count += 2;
4295
4296         return count;
4297 }
4298
4299 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4300 {
4301         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4302         const struct cs_section_def *sect = NULL;
4303         const struct cs_extent_def *ext = NULL;
4304         int r, i;
4305
4306         /* init the CP */
4307         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4308         WREG32(mmCP_ENDIAN_SWAP, 0);
4309         WREG32(mmCP_DEVICE_ID, 1);
4310
4311         gfx_v8_0_cp_gfx_enable(adev, true);
4312
4313         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4314         if (r) {
4315                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4316                 return r;
4317         }
4318
4319         /* clear state buffer */
4320         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4321         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4322
4323         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4324         amdgpu_ring_write(ring, 0x80000000);
4325         amdgpu_ring_write(ring, 0x80000000);
4326
4327         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4328                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4329                         if (sect->id == SECT_CONTEXT) {
4330                                 amdgpu_ring_write(ring,
4331                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4332                                                ext->reg_count));
4333                                 amdgpu_ring_write(ring,
4334                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4335                                 for (i = 0; i < ext->reg_count; i++)
4336                                         amdgpu_ring_write(ring, ext->extent[i]);
4337                         }
4338                 }
4339         }
4340
4341         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4342         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4343         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4344         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4345
4346         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4347         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4348
4349         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4350         amdgpu_ring_write(ring, 0);
4351
4352         /* init the CE partitions */
4353         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4354         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4355         amdgpu_ring_write(ring, 0x8000);
4356         amdgpu_ring_write(ring, 0x8000);
4357
4358         amdgpu_ring_commit(ring);
4359
4360         return 0;
4361 }
4362 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4363 {
4364         u32 tmp;
4365         /* no gfx doorbells on iceland */
4366         if (adev->asic_type == CHIP_TOPAZ)
4367                 return;
4368
4369         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4370
4371         if (ring->use_doorbell) {
4372                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4373                                 DOORBELL_OFFSET, ring->doorbell_index);
4374                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4375                                                 DOORBELL_HIT, 0);
4376                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377                                             DOORBELL_EN, 1);
4378         } else {
4379                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4380         }
4381
4382         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4383
4384         if (adev->flags & AMD_IS_APU)
4385                 return;
4386
4387         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4388                                         DOORBELL_RANGE_LOWER,
4389                                         AMDGPU_DOORBELL_GFX_RING0);
4390         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4391
4392         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4393                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4394 }
4395
4396 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4397 {
4398         struct amdgpu_ring *ring;
4399         u32 tmp;
4400         u32 rb_bufsz;
4401         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4402         int r;
4403
4404         /* Set the write pointer delay */
4405         WREG32(mmCP_RB_WPTR_DELAY, 0);
4406
4407         /* set the RB to use vmid 0 */
4408         WREG32(mmCP_RB_VMID, 0);
4409
4410         /* Set ring buffer size */
4411         ring = &adev->gfx.gfx_ring[0];
4412         rb_bufsz = order_base_2(ring->ring_size / 8);
4413         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4414         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4415         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4416         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4417 #ifdef __BIG_ENDIAN
4418         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4419 #endif
4420         WREG32(mmCP_RB0_CNTL, tmp);
4421
4422         /* Initialize the ring buffer's read and write pointers */
4423         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4424         ring->wptr = 0;
4425         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4426
4427         /* set the wb address wether it's enabled or not */
4428         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4429         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4430         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4431
4432         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4433         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4434         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4435         mdelay(1);
4436         WREG32(mmCP_RB0_CNTL, tmp);
4437
4438         rb_addr = ring->gpu_addr >> 8;
4439         WREG32(mmCP_RB0_BASE, rb_addr);
4440         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4441
4442         gfx_v8_0_set_cpg_door_bell(adev, ring);
4443         /* start the ring */
4444         amdgpu_ring_clear_ring(ring);
4445         gfx_v8_0_cp_gfx_start(adev);
4446         ring->ready = true;
4447         r = amdgpu_ring_test_ring(ring);
4448         if (r)
4449                 ring->ready = false;
4450
4451         return r;
4452 }
4453
4454 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4455 {
4456         int i;
4457
4458         if (enable) {
4459                 WREG32(mmCP_MEC_CNTL, 0);
4460         } else {
4461                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4462                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4463                         adev->gfx.compute_ring[i].ready = false;
4464                 adev->gfx.kiq.ring.ready = false;
4465         }
4466         udelay(50);
4467 }
4468
4469 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4470 {
4471         const struct gfx_firmware_header_v1_0 *mec_hdr;
4472         const __le32 *fw_data;
4473         unsigned i, fw_size;
4474
4475         if (!adev->gfx.mec_fw)
4476                 return -EINVAL;
4477
4478         gfx_v8_0_cp_compute_enable(adev, false);
4479
4480         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4481         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4482
4483         fw_data = (const __le32 *)
4484                 (adev->gfx.mec_fw->data +
4485                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4486         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4487
4488         /* MEC1 */
4489         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4490         for (i = 0; i < fw_size; i++)
4491                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4492         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4493
4494         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4495         if (adev->gfx.mec2_fw) {
4496                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4497
4498                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4499                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4500
4501                 fw_data = (const __le32 *)
4502                         (adev->gfx.mec2_fw->data +
4503                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4504                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4505
4506                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4507                 for (i = 0; i < fw_size; i++)
4508                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4509                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4510         }
4511
4512         return 0;
4513 }
4514
4515 /* KIQ functions */
4516 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4517 {
4518         uint32_t tmp;
4519         struct amdgpu_device *adev = ring->adev;
4520
4521         /* tell RLC which is KIQ queue */
4522         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4523         tmp &= 0xffffff00;
4524         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4525         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4526         tmp |= 0x80;
4527         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4528 }
4529
4530 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4531 {
4532         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4533         uint32_t scratch, tmp = 0;
4534         uint64_t queue_mask = 0;
4535         int r, i;
4536
4537         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4538                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4539                         continue;
4540
4541                 /* This situation may be hit in the future if a new HW
4542                  * generation exposes more than 64 queues. If so, the
4543                  * definition of queue_mask needs updating */
4544                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4545                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4546                         break;
4547                 }
4548
4549                 queue_mask |= (1ull << i);
4550         }
4551
4552         r = amdgpu_gfx_scratch_get(adev, &scratch);
4553         if (r) {
4554                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4555                 return r;
4556         }
4557         WREG32(scratch, 0xCAFEDEAD);
4558
4559         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4560         if (r) {
4561                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4562                 amdgpu_gfx_scratch_free(adev, scratch);
4563                 return r;
4564         }
4565         /* set resources */
4566         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4567         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4568         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4569         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4570         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4571         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4572         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4573         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4574         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4575                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4576                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4577                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4578
4579                 /* map queues */
4580                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4581                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4582                 amdgpu_ring_write(kiq_ring,
4583                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4584                 amdgpu_ring_write(kiq_ring,
4585                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4586                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4587                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4588                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4589                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4590                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4591                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4592                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4593         }
4594         /* write to scratch for completion */
4595         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4596         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4597         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4598         amdgpu_ring_commit(kiq_ring);
4599
4600         for (i = 0; i < adev->usec_timeout; i++) {
4601                 tmp = RREG32(scratch);
4602                 if (tmp == 0xDEADBEEF)
4603                         break;
4604                 DRM_UDELAY(1);
4605         }
4606         if (i >= adev->usec_timeout) {
4607                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4608                           scratch, tmp);
4609                 r = -EINVAL;
4610         }
4611         amdgpu_gfx_scratch_free(adev, scratch);
4612
4613         return r;
4614 }
4615
4616 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4617 {
4618         int i, r = 0;
4619
4620         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4621                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4622                 for (i = 0; i < adev->usec_timeout; i++) {
4623                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4624                                 break;
4625                         udelay(1);
4626                 }
4627                 if (i == adev->usec_timeout)
4628                         r = -ETIMEDOUT;
4629         }
4630         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4631         WREG32(mmCP_HQD_PQ_RPTR, 0);
4632         WREG32(mmCP_HQD_PQ_WPTR, 0);
4633
4634         return r;
4635 }
4636
4637 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4638 {
4639         struct amdgpu_device *adev = ring->adev;
4640         struct vi_mqd *mqd = ring->mqd_ptr;
4641         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4642         uint32_t tmp;
4643
4644         mqd->header = 0xC0310800;
4645         mqd->compute_pipelinestat_enable = 0x00000001;
4646         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4647         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4648         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4649         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4650         mqd->compute_misc_reserved = 0x00000003;
4651         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4652                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4653         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4654                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4655         eop_base_addr = ring->eop_gpu_addr >> 8;
4656         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4657         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4658
4659         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4660         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4661         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4662                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4663
4664         mqd->cp_hqd_eop_control = tmp;
4665
4666         /* enable doorbell? */
4667         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4668                             CP_HQD_PQ_DOORBELL_CONTROL,
4669                             DOORBELL_EN,
4670                             ring->use_doorbell ? 1 : 0);
4671
4672         mqd->cp_hqd_pq_doorbell_control = tmp;
4673
4674         /* set the pointer to the MQD */
4675         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4676         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4677
4678         /* set MQD vmid to 0 */
4679         tmp = RREG32(mmCP_MQD_CONTROL);
4680         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4681         mqd->cp_mqd_control = tmp;
4682
4683         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4684         hqd_gpu_addr = ring->gpu_addr >> 8;
4685         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4686         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4687
4688         /* set up the HQD, this is similar to CP_RB0_CNTL */
4689         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4690         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4691                             (order_base_2(ring->ring_size / 4) - 1));
4692         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4693                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4694 #ifdef __BIG_ENDIAN
4695         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4696 #endif
4697         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4698         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4699         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4700         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4701         mqd->cp_hqd_pq_control = tmp;
4702
4703         /* set the wb address whether it's enabled or not */
4704         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4705         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4706         mqd->cp_hqd_pq_rptr_report_addr_hi =
4707                 upper_32_bits(wb_gpu_addr) & 0xffff;
4708
4709         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4710         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4711         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4712         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4713
4714         tmp = 0;
4715         /* enable the doorbell if requested */
4716         if (ring->use_doorbell) {
4717                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4718                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4719                                 DOORBELL_OFFSET, ring->doorbell_index);
4720
4721                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4722                                          DOORBELL_EN, 1);
4723                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4724                                          DOORBELL_SOURCE, 0);
4725                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4726                                          DOORBELL_HIT, 0);
4727         }
4728
4729         mqd->cp_hqd_pq_doorbell_control = tmp;
4730
4731         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4732         ring->wptr = 0;
4733         mqd->cp_hqd_pq_wptr = ring->wptr;
4734         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4735
4736         /* set the vmid for the queue */
4737         mqd->cp_hqd_vmid = 0;
4738
4739         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4740         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4741         mqd->cp_hqd_persistent_state = tmp;
4742
4743         /* set MTYPE */
4744         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4745         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4746         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4747         mqd->cp_hqd_ib_control = tmp;
4748
4749         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4750         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4751         mqd->cp_hqd_iq_timer = tmp;
4752
4753         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4754         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4755         mqd->cp_hqd_ctx_save_control = tmp;
4756
4757         /* defaults */
4758         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4759         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4760         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4761         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4762         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4763         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4764         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4765         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4766         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4767         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4768         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4769         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4770         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4771         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4772         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4773
4774         /* activate the queue */
4775         mqd->cp_hqd_active = 1;
4776
4777         return 0;
4778 }
4779
4780 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4781                         struct vi_mqd *mqd)
4782 {
4783         uint32_t mqd_reg;
4784         uint32_t *mqd_data;
4785
4786         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4787         mqd_data = &mqd->cp_mqd_base_addr_lo;
4788
4789         /* disable wptr polling */
4790         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4791
4792         /* program all HQD registers */
4793         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4794                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4795
4796         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4797          * This is safe since EOP RPTR==WPTR for any inactive HQD
4798          * on ASICs that do not support context-save.
4799          * EOP writes/reads can start anywhere in the ring.
4800          */
4801         if (adev->asic_type != CHIP_TONGA) {
4802                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4803                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4804                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4805         }
4806
4807         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4808                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4809
4810         /* activate the HQD */
4811         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4812                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4813
4814         return 0;
4815 }
4816
4817 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4818 {
4819         struct amdgpu_device *adev = ring->adev;
4820         struct vi_mqd *mqd = ring->mqd_ptr;
4821         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4822
4823         gfx_v8_0_kiq_setting(ring);
4824
4825         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4826                 /* reset MQD to a clean status */
4827                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4828                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4829
4830                 /* reset ring buffer */
4831                 ring->wptr = 0;
4832                 amdgpu_ring_clear_ring(ring);
4833                 mutex_lock(&adev->srbm_mutex);
4834                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4835                 gfx_v8_0_mqd_commit(adev, mqd);
4836                 vi_srbm_select(adev, 0, 0, 0, 0);
4837                 mutex_unlock(&adev->srbm_mutex);
4838         } else {
4839                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4840                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4841                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4842                 mutex_lock(&adev->srbm_mutex);
4843                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4844                 gfx_v8_0_mqd_init(ring);
4845                 gfx_v8_0_mqd_commit(adev, mqd);
4846                 vi_srbm_select(adev, 0, 0, 0, 0);
4847                 mutex_unlock(&adev->srbm_mutex);
4848
4849                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4850                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4851         }
4852
4853         return 0;
4854 }
4855
4856 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4857 {
4858         struct amdgpu_device *adev = ring->adev;
4859         struct vi_mqd *mqd = ring->mqd_ptr;
4860         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4861
4862         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4863                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4864                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4865                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4866                 mutex_lock(&adev->srbm_mutex);
4867                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4868                 gfx_v8_0_mqd_init(ring);
4869                 vi_srbm_select(adev, 0, 0, 0, 0);
4870                 mutex_unlock(&adev->srbm_mutex);
4871
4872                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4873                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4874         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4875                 /* reset MQD to a clean status */
4876                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4877                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4878                 /* reset ring buffer */
4879                 ring->wptr = 0;
4880                 amdgpu_ring_clear_ring(ring);
4881         } else {
4882                 amdgpu_ring_clear_ring(ring);
4883         }
4884         return 0;
4885 }
4886
4887 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4888 {
4889         if (adev->asic_type > CHIP_TONGA) {
4890                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4891                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4892         }
4893         /* enable doorbells */
4894         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895 }
4896
4897 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4898 {
4899         struct amdgpu_ring *ring = NULL;
4900         int r = 0, i;
4901
4902         gfx_v8_0_cp_compute_enable(adev, true);
4903
4904         ring = &adev->gfx.kiq.ring;
4905
4906         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4907         if (unlikely(r != 0))
4908                 goto done;
4909
4910         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4911         if (!r) {
4912                 r = gfx_v8_0_kiq_init_queue(ring);
4913                 amdgpu_bo_kunmap(ring->mqd_obj);
4914                 ring->mqd_ptr = NULL;
4915         }
4916         amdgpu_bo_unreserve(ring->mqd_obj);
4917         if (r)
4918                 goto done;
4919
4920         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4921                 ring = &adev->gfx.compute_ring[i];
4922
4923                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4924                 if (unlikely(r != 0))
4925                         goto done;
4926                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4927                 if (!r) {
4928                         r = gfx_v8_0_kcq_init_queue(ring);
4929                         amdgpu_bo_kunmap(ring->mqd_obj);
4930                         ring->mqd_ptr = NULL;
4931                 }
4932                 amdgpu_bo_unreserve(ring->mqd_obj);
4933                 if (r)
4934                         goto done;
4935         }
4936
4937         gfx_v8_0_set_mec_doorbell_range(adev);
4938
4939         r = gfx_v8_0_kiq_kcq_enable(adev);
4940         if (r)
4941                 goto done;
4942
4943         /* Test KIQ */
4944         ring = &adev->gfx.kiq.ring;
4945         ring->ready = true;
4946         r = amdgpu_ring_test_ring(ring);
4947         if (r) {
4948                 ring->ready = false;
4949                 goto done;
4950         }
4951
4952         /* Test KCQs */
4953         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4954                 ring = &adev->gfx.compute_ring[i];
4955                 ring->ready = true;
4956                 r = amdgpu_ring_test_ring(ring);
4957                 if (r)
4958                         ring->ready = false;
4959         }
4960
4961 done:
4962         return r;
4963 }
4964
4965 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4966 {
4967         int r;
4968
4969         if (!(adev->flags & AMD_IS_APU))
4970                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4971
4972         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4973                         /* legacy firmware loading */
4974                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4975                 if (r)
4976                         return r;
4977
4978                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4979                 if (r)
4980                         return r;
4981         }
4982
4983         r = gfx_v8_0_cp_gfx_resume(adev);
4984         if (r)
4985                 return r;
4986
4987         r = gfx_v8_0_kiq_resume(adev);
4988         if (r)
4989                 return r;
4990
4991         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4992
4993         return 0;
4994 }
4995
4996 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4997 {
4998         gfx_v8_0_cp_gfx_enable(adev, enable);
4999         gfx_v8_0_cp_compute_enable(adev, enable);
5000 }
5001
5002 static int gfx_v8_0_hw_init(void *handle)
5003 {
5004         int r;
5005         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5006
5007         gfx_v8_0_init_golden_registers(adev);
5008         gfx_v8_0_gpu_init(adev);
5009
5010         r = gfx_v8_0_rlc_resume(adev);
5011         if (r)
5012                 return r;
5013
5014         r = gfx_v8_0_cp_resume(adev);
5015
5016         return r;
5017 }
5018
5019 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5020 {
5021         struct amdgpu_device *adev = kiq_ring->adev;
5022         uint32_t scratch, tmp = 0;
5023         int r, i;
5024
5025         r = amdgpu_gfx_scratch_get(adev, &scratch);
5026         if (r) {
5027                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5028                 return r;
5029         }
5030         WREG32(scratch, 0xCAFEDEAD);
5031
5032         r = amdgpu_ring_alloc(kiq_ring, 10);
5033         if (r) {
5034                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5035                 amdgpu_gfx_scratch_free(adev, scratch);
5036                 return r;
5037         }
5038
5039         /* unmap queues */
5040         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5041         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5042                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5043                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5044                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5045                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5046         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5047         amdgpu_ring_write(kiq_ring, 0);
5048         amdgpu_ring_write(kiq_ring, 0);
5049         amdgpu_ring_write(kiq_ring, 0);
5050         /* write to scratch for completion */
5051         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5052         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5053         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5054         amdgpu_ring_commit(kiq_ring);
5055
5056         for (i = 0; i < adev->usec_timeout; i++) {
5057                 tmp = RREG32(scratch);
5058                 if (tmp == 0xDEADBEEF)
5059                         break;
5060                 DRM_UDELAY(1);
5061         }
5062         if (i >= adev->usec_timeout) {
5063                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5064                 r = -EINVAL;
5065         }
5066         amdgpu_gfx_scratch_free(adev, scratch);
5067         return r;
5068 }
5069
5070 static int gfx_v8_0_hw_fini(void *handle)
5071 {
5072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073         int i;
5074
5075         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5076         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5077
5078         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5079
5080         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5081
5082         /* disable KCQ to avoid CPC touch memory not valid anymore */
5083         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5084                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5085
5086         if (amdgpu_sriov_vf(adev)) {
5087                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5088                 return 0;
5089         }
5090         gfx_v8_0_cp_enable(adev, false);
5091         gfx_v8_0_rlc_stop(adev);
5092
5093         amdgpu_device_ip_set_powergating_state(adev,
5094                                                AMD_IP_BLOCK_TYPE_GFX,
5095                                                AMD_PG_STATE_UNGATE);
5096
5097         return 0;
5098 }
5099
5100 static int gfx_v8_0_suspend(void *handle)
5101 {
5102         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103         adev->gfx.in_suspend = true;
5104         return gfx_v8_0_hw_fini(adev);
5105 }
5106
5107 static int gfx_v8_0_resume(void *handle)
5108 {
5109         int r;
5110         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
5112         r = gfx_v8_0_hw_init(adev);
5113         adev->gfx.in_suspend = false;
5114         return r;
5115 }
5116
5117 static bool gfx_v8_0_is_idle(void *handle)
5118 {
5119         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5120
5121         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5122                 return false;
5123         else
5124                 return true;
5125 }
5126
5127 static int gfx_v8_0_wait_for_idle(void *handle)
5128 {
5129         unsigned i;
5130         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5131
5132         for (i = 0; i < adev->usec_timeout; i++) {
5133                 if (gfx_v8_0_is_idle(handle))
5134                         return 0;
5135
5136                 udelay(1);
5137         }
5138         return -ETIMEDOUT;
5139 }
5140
5141 static bool gfx_v8_0_check_soft_reset(void *handle)
5142 {
5143         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5145         u32 tmp;
5146
5147         /* GRBM_STATUS */
5148         tmp = RREG32(mmGRBM_STATUS);
5149         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5150                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5151                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5152                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5153                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5154                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5155                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5156                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5157                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5158                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5160                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5161                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5162         }
5163
5164         /* GRBM_STATUS2 */
5165         tmp = RREG32(mmGRBM_STATUS2);
5166         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5167                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5168                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5169
5170         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5171             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5172             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5173                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5174                                                 SOFT_RESET_CPF, 1);
5175                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5176                                                 SOFT_RESET_CPC, 1);
5177                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5178                                                 SOFT_RESET_CPG, 1);
5179                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5180                                                 SOFT_RESET_GRBM, 1);
5181         }
5182
5183         /* SRBM_STATUS */
5184         tmp = RREG32(mmSRBM_STATUS);
5185         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5186                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5187                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5188         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5189                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5190                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5191
5192         if (grbm_soft_reset || srbm_soft_reset) {
5193                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5194                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5195                 return true;
5196         } else {
5197                 adev->gfx.grbm_soft_reset = 0;
5198                 adev->gfx.srbm_soft_reset = 0;
5199                 return false;
5200         }
5201 }
5202
5203 static int gfx_v8_0_pre_soft_reset(void *handle)
5204 {
5205         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5207
5208         if ((!adev->gfx.grbm_soft_reset) &&
5209             (!adev->gfx.srbm_soft_reset))
5210                 return 0;
5211
5212         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5213         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5214
5215         /* stop the rlc */
5216         gfx_v8_0_rlc_stop(adev);
5217
5218         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5219             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5220                 /* Disable GFX parsing/prefetching */
5221                 gfx_v8_0_cp_gfx_enable(adev, false);
5222
5223         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5224             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5225             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5226             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5227                 int i;
5228
5229                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5230                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5231
5232                         mutex_lock(&adev->srbm_mutex);
5233                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5234                         gfx_v8_0_deactivate_hqd(adev, 2);
5235                         vi_srbm_select(adev, 0, 0, 0, 0);
5236                         mutex_unlock(&adev->srbm_mutex);
5237                 }
5238                 /* Disable MEC parsing/prefetching */
5239                 gfx_v8_0_cp_compute_enable(adev, false);
5240         }
5241
5242        return 0;
5243 }
5244
5245 static int gfx_v8_0_soft_reset(void *handle)
5246 {
5247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5248         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5249         u32 tmp;
5250
5251         if ((!adev->gfx.grbm_soft_reset) &&
5252             (!adev->gfx.srbm_soft_reset))
5253                 return 0;
5254
5255         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5256         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5257
5258         if (grbm_soft_reset || srbm_soft_reset) {
5259                 tmp = RREG32(mmGMCON_DEBUG);
5260                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5261                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5262                 WREG32(mmGMCON_DEBUG, tmp);
5263                 udelay(50);
5264         }
5265
5266         if (grbm_soft_reset) {
5267                 tmp = RREG32(mmGRBM_SOFT_RESET);
5268                 tmp |= grbm_soft_reset;
5269                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5270                 WREG32(mmGRBM_SOFT_RESET, tmp);
5271                 tmp = RREG32(mmGRBM_SOFT_RESET);
5272
5273                 udelay(50);
5274
5275                 tmp &= ~grbm_soft_reset;
5276                 WREG32(mmGRBM_SOFT_RESET, tmp);
5277                 tmp = RREG32(mmGRBM_SOFT_RESET);
5278         }
5279
5280         if (srbm_soft_reset) {
5281                 tmp = RREG32(mmSRBM_SOFT_RESET);
5282                 tmp |= srbm_soft_reset;
5283                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5284                 WREG32(mmSRBM_SOFT_RESET, tmp);
5285                 tmp = RREG32(mmSRBM_SOFT_RESET);
5286
5287                 udelay(50);
5288
5289                 tmp &= ~srbm_soft_reset;
5290                 WREG32(mmSRBM_SOFT_RESET, tmp);
5291                 tmp = RREG32(mmSRBM_SOFT_RESET);
5292         }
5293
5294         if (grbm_soft_reset || srbm_soft_reset) {
5295                 tmp = RREG32(mmGMCON_DEBUG);
5296                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5297                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5298                 WREG32(mmGMCON_DEBUG, tmp);
5299         }
5300
5301         /* Wait a little for things to settle down */
5302         udelay(50);
5303
5304         return 0;
5305 }
5306
5307 static int gfx_v8_0_post_soft_reset(void *handle)
5308 {
5309         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5310         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5311
5312         if ((!adev->gfx.grbm_soft_reset) &&
5313             (!adev->gfx.srbm_soft_reset))
5314                 return 0;
5315
5316         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5317         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5318
5319         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5320             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5321                 gfx_v8_0_cp_gfx_resume(adev);
5322
5323         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5324             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5325             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5326             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5327                 int i;
5328
5329                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5330                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5331
5332                         mutex_lock(&adev->srbm_mutex);
5333                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5334                         gfx_v8_0_deactivate_hqd(adev, 2);
5335                         vi_srbm_select(adev, 0, 0, 0, 0);
5336                         mutex_unlock(&adev->srbm_mutex);
5337                 }
5338                 gfx_v8_0_kiq_resume(adev);
5339         }
5340         gfx_v8_0_rlc_start(adev);
5341
5342         return 0;
5343 }
5344
5345 /**
5346  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5347  *
5348  * @adev: amdgpu_device pointer
5349  *
5350  * Fetches a GPU clock counter snapshot.
5351  * Returns the 64 bit clock counter snapshot.
5352  */
5353 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5354 {
5355         uint64_t clock;
5356
5357         mutex_lock(&adev->gfx.gpu_clock_mutex);
5358         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5359         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5360                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5361         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5362         return clock;
5363 }
5364
5365 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5366                                           uint32_t vmid,
5367                                           uint32_t gds_base, uint32_t gds_size,
5368                                           uint32_t gws_base, uint32_t gws_size,
5369                                           uint32_t oa_base, uint32_t oa_size)
5370 {
5371         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5372         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5373
5374         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5375         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5376
5377         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5378         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5379
5380         /* GDS Base */
5381         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5382         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5383                                 WRITE_DATA_DST_SEL(0)));
5384         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5385         amdgpu_ring_write(ring, 0);
5386         amdgpu_ring_write(ring, gds_base);
5387
5388         /* GDS Size */
5389         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5390         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5391                                 WRITE_DATA_DST_SEL(0)));
5392         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5393         amdgpu_ring_write(ring, 0);
5394         amdgpu_ring_write(ring, gds_size);
5395
5396         /* GWS */
5397         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5398         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5399                                 WRITE_DATA_DST_SEL(0)));
5400         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5401         amdgpu_ring_write(ring, 0);
5402         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5403
5404         /* OA */
5405         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5406         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5407                                 WRITE_DATA_DST_SEL(0)));
5408         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5409         amdgpu_ring_write(ring, 0);
5410         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5411 }
5412
5413 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5414 {
5415         WREG32(mmSQ_IND_INDEX,
5416                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5417                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5418                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5419                 (SQ_IND_INDEX__FORCE_READ_MASK));
5420         return RREG32(mmSQ_IND_DATA);
5421 }
5422
5423 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5424                            uint32_t wave, uint32_t thread,
5425                            uint32_t regno, uint32_t num, uint32_t *out)
5426 {
5427         WREG32(mmSQ_IND_INDEX,
5428                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5429                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5430                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5431                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5432                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5433                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5434         while (num--)
5435                 *(out++) = RREG32(mmSQ_IND_DATA);
5436 }
5437
5438 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5439 {
5440         /* type 0 wave data */
5441         dst[(*no_fields)++] = 0;
5442         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5443         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5444         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5445         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5446         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5447         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5448         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5449         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5450         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5451         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5452         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5453         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5454         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5455         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5456         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5457         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5458         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5459         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5460 }
5461
5462 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5463                                      uint32_t wave, uint32_t start,
5464                                      uint32_t size, uint32_t *dst)
5465 {
5466         wave_read_regs(
5467                 adev, simd, wave, 0,
5468                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5469 }
5470
5471
5472 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5473         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5474         .select_se_sh = &gfx_v8_0_select_se_sh,
5475         .read_wave_data = &gfx_v8_0_read_wave_data,
5476         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5477         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5478 };
5479
5480 static int gfx_v8_0_early_init(void *handle)
5481 {
5482         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5483
5484         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5485         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5486         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5487         gfx_v8_0_set_ring_funcs(adev);
5488         gfx_v8_0_set_irq_funcs(adev);
5489         gfx_v8_0_set_gds_init(adev);
5490         gfx_v8_0_set_rlc_funcs(adev);
5491
5492         return 0;
5493 }
5494
5495 static int gfx_v8_0_late_init(void *handle)
5496 {
5497         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5498         int r;
5499
5500         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5501         if (r)
5502                 return r;
5503
5504         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5505         if (r)
5506                 return r;
5507
5508         /* requires IBs so do in late init after IB pool is initialized */
5509         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5510         if (r)
5511                 return r;
5512
5513         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5514         if (r) {
5515                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5516                 return r;
5517         }
5518
5519         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5520         if (r) {
5521                 DRM_ERROR(
5522                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5523                         r);
5524                 return r;
5525         }
5526
5527         return 0;
5528 }
5529
5530 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5531                                                        bool enable)
5532 {
5533         if (((adev->asic_type == CHIP_POLARIS11) ||
5534             (adev->asic_type == CHIP_POLARIS12) ||
5535             (adev->asic_type == CHIP_VEGAM)) &&
5536             adev->powerplay.pp_funcs->set_powergating_by_smu)
5537                 /* Send msg to SMU via Powerplay */
5538                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5539
5540         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5541 }
5542
5543 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5544                                                         bool enable)
5545 {
5546         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5547 }
5548
5549 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5550                 bool enable)
5551 {
5552         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5553 }
5554
5555 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5556                                           bool enable)
5557 {
5558         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5559 }
5560
5561 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5562                                                 bool enable)
5563 {
5564         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5565
5566         /* Read any GFX register to wake up GFX. */
5567         if (!enable)
5568                 RREG32(mmDB_RENDER_CONTROL);
5569 }
5570
5571 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5572                                           bool enable)
5573 {
5574         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5575                 cz_enable_gfx_cg_power_gating(adev, true);
5576                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5577                         cz_enable_gfx_pipeline_power_gating(adev, true);
5578         } else {
5579                 cz_enable_gfx_cg_power_gating(adev, false);
5580                 cz_enable_gfx_pipeline_power_gating(adev, false);
5581         }
5582 }
5583
5584 static int gfx_v8_0_set_powergating_state(void *handle,
5585                                           enum amd_powergating_state state)
5586 {
5587         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5588         bool enable = (state == AMD_PG_STATE_GATE);
5589
5590         if (amdgpu_sriov_vf(adev))
5591                 return 0;
5592
5593         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5594                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5595                                 AMD_PG_SUPPORT_CP |
5596                                 AMD_PG_SUPPORT_GFX_DMG))
5597                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5598         switch (adev->asic_type) {
5599         case CHIP_CARRIZO:
5600         case CHIP_STONEY:
5601
5602                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5603                         cz_enable_sck_slow_down_on_power_up(adev, true);
5604                         cz_enable_sck_slow_down_on_power_down(adev, true);
5605                 } else {
5606                         cz_enable_sck_slow_down_on_power_up(adev, false);
5607                         cz_enable_sck_slow_down_on_power_down(adev, false);
5608                 }
5609                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5610                         cz_enable_cp_power_gating(adev, true);
5611                 else
5612                         cz_enable_cp_power_gating(adev, false);
5613
5614                 cz_update_gfx_cg_power_gating(adev, enable);
5615
5616                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5617                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5618                 else
5619                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5620
5621                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5622                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5623                 else
5624                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5625                 break;
5626         case CHIP_POLARIS11:
5627         case CHIP_POLARIS12:
5628         case CHIP_VEGAM:
5629                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5630                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5631                 else
5632                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5633
5634                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5635                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5636                 else
5637                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5638
5639                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5640                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5641                 else
5642                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5643                 break;
5644         default:
5645                 break;
5646         }
5647         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5648                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5649                                 AMD_PG_SUPPORT_CP |
5650                                 AMD_PG_SUPPORT_GFX_DMG))
5651                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5652         return 0;
5653 }
5654
5655 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5656 {
5657         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5658         int data;
5659
5660         if (amdgpu_sriov_vf(adev))
5661                 *flags = 0;
5662
5663         /* AMD_CG_SUPPORT_GFX_MGCG */
5664         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5666                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5667
5668         /* AMD_CG_SUPPORT_GFX_CGLG */
5669         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5670         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5671                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5672
5673         /* AMD_CG_SUPPORT_GFX_CGLS */
5674         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5675                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5676
5677         /* AMD_CG_SUPPORT_GFX_CGTS */
5678         data = RREG32(mmCGTS_SM_CTRL_REG);
5679         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5680                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5681
5682         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5683         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5684                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5685
5686         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5687         data = RREG32(mmRLC_MEM_SLP_CNTL);
5688         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5689                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5690
5691         /* AMD_CG_SUPPORT_GFX_CP_LS */
5692         data = RREG32(mmCP_MEM_SLP_CNTL);
5693         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5694                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5695 }
5696
5697 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5698                                      uint32_t reg_addr, uint32_t cmd)
5699 {
5700         uint32_t data;
5701
5702         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5703
5704         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5705         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5706
5707         data = RREG32(mmRLC_SERDES_WR_CTRL);
5708         if (adev->asic_type == CHIP_STONEY)
5709                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5710                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5711                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5712                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5713                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5714                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5715                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5716                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5717                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5718         else
5719                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5720                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5721                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5722                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5723                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5724                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5725                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5726                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5727                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5728                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5729                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5730         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5731                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5732                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5733                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5734
5735         WREG32(mmRLC_SERDES_WR_CTRL, data);
5736 }
5737
5738 #define MSG_ENTER_RLC_SAFE_MODE     1
5739 #define MSG_EXIT_RLC_SAFE_MODE      0
5740 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5741 #define RLC_GPR_REG2__REQ__SHIFT 0
5742 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5743 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5744
5745 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5746 {
5747         u32 data;
5748         unsigned i;
5749
5750         data = RREG32(mmRLC_CNTL);
5751         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5752                 return;
5753
5754         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5755                 data |= RLC_SAFE_MODE__CMD_MASK;
5756                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5757                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5758                 WREG32(mmRLC_SAFE_MODE, data);
5759
5760                 for (i = 0; i < adev->usec_timeout; i++) {
5761                         if ((RREG32(mmRLC_GPM_STAT) &
5762                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5763                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5764                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5765                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5766                                 break;
5767                         udelay(1);
5768                 }
5769
5770                 for (i = 0; i < adev->usec_timeout; i++) {
5771                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5772                                 break;
5773                         udelay(1);
5774                 }
5775                 adev->gfx.rlc.in_safe_mode = true;
5776         }
5777 }
5778
5779 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5780 {
5781         u32 data = 0;
5782         unsigned i;
5783
5784         data = RREG32(mmRLC_CNTL);
5785         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5786                 return;
5787
5788         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5789                 if (adev->gfx.rlc.in_safe_mode) {
5790                         data |= RLC_SAFE_MODE__CMD_MASK;
5791                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5792                         WREG32(mmRLC_SAFE_MODE, data);
5793                         adev->gfx.rlc.in_safe_mode = false;
5794                 }
5795         }
5796
5797         for (i = 0; i < adev->usec_timeout; i++) {
5798                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5799                         break;
5800                 udelay(1);
5801         }
5802 }
5803
5804 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5805         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5806         .exit_safe_mode = iceland_exit_rlc_safe_mode
5807 };
5808
5809 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5810                                                       bool enable)
5811 {
5812         uint32_t temp, data;
5813
5814         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5815
5816         /* It is disabled by HW by default */
5817         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5818                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5819                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5820                                 /* 1 - RLC memory Light sleep */
5821                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5822
5823                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5824                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5825                 }
5826
5827                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5828                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5829                 if (adev->flags & AMD_IS_APU)
5830                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5831                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5832                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5833                 else
5834                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5835                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5836                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5837                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5838
5839                 if (temp != data)
5840                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5841
5842                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5843                 gfx_v8_0_wait_for_rlc_serdes(adev);
5844
5845                 /* 5 - clear mgcg override */
5846                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5847
5848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5849                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5850                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5851                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5852                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5853                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5854                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5855                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5856                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5857                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5858                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5859                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5860                         if (temp != data)
5861                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5862                 }
5863                 udelay(50);
5864
5865                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866                 gfx_v8_0_wait_for_rlc_serdes(adev);
5867         } else {
5868                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5869                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5870                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5871                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5872                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5873                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5874                 if (temp != data)
5875                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5876
5877                 /* 2 - disable MGLS in RLC */
5878                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5879                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5880                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5881                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5882                 }
5883
5884                 /* 3 - disable MGLS in CP */
5885                 data = RREG32(mmCP_MEM_SLP_CNTL);
5886                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5887                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5888                         WREG32(mmCP_MEM_SLP_CNTL, data);
5889                 }
5890
5891                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5892                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5893                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5894                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5895                 if (temp != data)
5896                         WREG32(mmCGTS_SM_CTRL_REG, data);
5897
5898                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5899                 gfx_v8_0_wait_for_rlc_serdes(adev);
5900
5901                 /* 6 - set mgcg override */
5902                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5903
5904                 udelay(50);
5905
5906                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5907                 gfx_v8_0_wait_for_rlc_serdes(adev);
5908         }
5909
5910         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5911 }
5912
5913 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5914                                                       bool enable)
5915 {
5916         uint32_t temp, temp1, data, data1;
5917
5918         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5919
5920         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5921
5922         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5923                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5925                 if (temp1 != data1)
5926                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5927
5928                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929                 gfx_v8_0_wait_for_rlc_serdes(adev);
5930
5931                 /* 2 - clear cgcg override */
5932                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5933
5934                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935                 gfx_v8_0_wait_for_rlc_serdes(adev);
5936
5937                 /* 3 - write cmd to set CGLS */
5938                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5939
5940                 /* 4 - enable cgcg */
5941                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5942
5943                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5944                         /* enable cgls*/
5945                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5946
5947                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5948                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5949
5950                         if (temp1 != data1)
5951                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5952                 } else {
5953                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5954                 }
5955
5956                 if (temp != data)
5957                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5958
5959                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5960                  * Cmp_busy/GFX_Idle interrupts
5961                  */
5962                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5963         } else {
5964                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5965                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5966
5967                 /* TEST CGCG */
5968                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5969                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5970                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5971                 if (temp1 != data1)
5972                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5973
5974                 /* read gfx register to wake up cgcg */
5975                 RREG32(mmCB_CGTT_SCLK_CTRL);
5976                 RREG32(mmCB_CGTT_SCLK_CTRL);
5977                 RREG32(mmCB_CGTT_SCLK_CTRL);
5978                 RREG32(mmCB_CGTT_SCLK_CTRL);
5979
5980                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5981                 gfx_v8_0_wait_for_rlc_serdes(adev);
5982
5983                 /* write cmd to Set CGCG Overrride */
5984                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5985
5986                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5987                 gfx_v8_0_wait_for_rlc_serdes(adev);
5988
5989                 /* write cmd to Clear CGLS */
5990                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5991
5992                 /* disable cgcg, cgls should be disabled too. */
5993                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5994                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5995                 if (temp != data)
5996                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5997                 /* enable interrupts again for PG */
5998                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5999         }
6000
6001         gfx_v8_0_wait_for_rlc_serdes(adev);
6002
6003         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6004 }
6005 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6006                                             bool enable)
6007 {
6008         if (enable) {
6009                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6010                  * ===  MGCG + MGLS + TS(CG/LS) ===
6011                  */
6012                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6013                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6014         } else {
6015                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6016                  * ===  CGCG + CGLS ===
6017                  */
6018                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6019                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6020         }
6021         return 0;
6022 }
6023
6024 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6025                                           enum amd_clockgating_state state)
6026 {
6027         uint32_t msg_id, pp_state = 0;
6028         uint32_t pp_support_state = 0;
6029
6030         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6031                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6032                         pp_support_state = PP_STATE_SUPPORT_LS;
6033                         pp_state = PP_STATE_LS;
6034                 }
6035                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6036                         pp_support_state |= PP_STATE_SUPPORT_CG;
6037                         pp_state |= PP_STATE_CG;
6038                 }
6039                 if (state == AMD_CG_STATE_UNGATE)
6040                         pp_state = 0;
6041
6042                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6043                                 PP_BLOCK_GFX_CG,
6044                                 pp_support_state,
6045                                 pp_state);
6046                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6047                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6048         }
6049
6050         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6051                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6052                         pp_support_state = PP_STATE_SUPPORT_LS;
6053                         pp_state = PP_STATE_LS;
6054                 }
6055
6056                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6057                         pp_support_state |= PP_STATE_SUPPORT_CG;
6058                         pp_state |= PP_STATE_CG;
6059                 }
6060
6061                 if (state == AMD_CG_STATE_UNGATE)
6062                         pp_state = 0;
6063
6064                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6065                                 PP_BLOCK_GFX_MG,
6066                                 pp_support_state,
6067                                 pp_state);
6068                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6069                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6070         }
6071
6072         return 0;
6073 }
6074
6075 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6076                                           enum amd_clockgating_state state)
6077 {
6078
6079         uint32_t msg_id, pp_state = 0;
6080         uint32_t pp_support_state = 0;
6081
6082         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6083                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6084                         pp_support_state = PP_STATE_SUPPORT_LS;
6085                         pp_state = PP_STATE_LS;
6086                 }
6087                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6088                         pp_support_state |= PP_STATE_SUPPORT_CG;
6089                         pp_state |= PP_STATE_CG;
6090                 }
6091                 if (state == AMD_CG_STATE_UNGATE)
6092                         pp_state = 0;
6093
6094                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6095                                 PP_BLOCK_GFX_CG,
6096                                 pp_support_state,
6097                                 pp_state);
6098                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6099                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6100         }
6101
6102         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6103                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6104                         pp_support_state = PP_STATE_SUPPORT_LS;
6105                         pp_state = PP_STATE_LS;
6106                 }
6107                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6108                         pp_support_state |= PP_STATE_SUPPORT_CG;
6109                         pp_state |= PP_STATE_CG;
6110                 }
6111                 if (state == AMD_CG_STATE_UNGATE)
6112                         pp_state = 0;
6113
6114                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6115                                 PP_BLOCK_GFX_3D,
6116                                 pp_support_state,
6117                                 pp_state);
6118                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6119                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6120         }
6121
6122         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6123                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6124                         pp_support_state = PP_STATE_SUPPORT_LS;
6125                         pp_state = PP_STATE_LS;
6126                 }
6127
6128                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6129                         pp_support_state |= PP_STATE_SUPPORT_CG;
6130                         pp_state |= PP_STATE_CG;
6131                 }
6132
6133                 if (state == AMD_CG_STATE_UNGATE)
6134                         pp_state = 0;
6135
6136                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6137                                 PP_BLOCK_GFX_MG,
6138                                 pp_support_state,
6139                                 pp_state);
6140                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6141                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6142         }
6143
6144         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6145                 pp_support_state = PP_STATE_SUPPORT_LS;
6146
6147                 if (state == AMD_CG_STATE_UNGATE)
6148                         pp_state = 0;
6149                 else
6150                         pp_state = PP_STATE_LS;
6151
6152                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6153                                 PP_BLOCK_GFX_RLC,
6154                                 pp_support_state,
6155                                 pp_state);
6156                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6157                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6158         }
6159
6160         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6161                 pp_support_state = PP_STATE_SUPPORT_LS;
6162
6163                 if (state == AMD_CG_STATE_UNGATE)
6164                         pp_state = 0;
6165                 else
6166                         pp_state = PP_STATE_LS;
6167                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168                         PP_BLOCK_GFX_CP,
6169                         pp_support_state,
6170                         pp_state);
6171                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6173         }
6174
6175         return 0;
6176 }
6177
6178 static int gfx_v8_0_set_clockgating_state(void *handle,
6179                                           enum amd_clockgating_state state)
6180 {
6181         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6182
6183         if (amdgpu_sriov_vf(adev))
6184                 return 0;
6185
6186         switch (adev->asic_type) {
6187         case CHIP_FIJI:
6188         case CHIP_CARRIZO:
6189         case CHIP_STONEY:
6190                 gfx_v8_0_update_gfx_clock_gating(adev,
6191                                                  state == AMD_CG_STATE_GATE);
6192                 break;
6193         case CHIP_TONGA:
6194                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6195                 break;
6196         case CHIP_POLARIS10:
6197         case CHIP_POLARIS11:
6198         case CHIP_POLARIS12:
6199         case CHIP_VEGAM:
6200                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6201                 break;
6202         default:
6203                 break;
6204         }
6205         return 0;
6206 }
6207
6208 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6209 {
6210         return ring->adev->wb.wb[ring->rptr_offs];
6211 }
6212
6213 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6214 {
6215         struct amdgpu_device *adev = ring->adev;
6216
6217         if (ring->use_doorbell)
6218                 /* XXX check if swapping is necessary on BE */
6219                 return ring->adev->wb.wb[ring->wptr_offs];
6220         else
6221                 return RREG32(mmCP_RB0_WPTR);
6222 }
6223
6224 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6225 {
6226         struct amdgpu_device *adev = ring->adev;
6227
6228         if (ring->use_doorbell) {
6229                 /* XXX check if swapping is necessary on BE */
6230                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6231                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6232         } else {
6233                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6234                 (void)RREG32(mmCP_RB0_WPTR);
6235         }
6236 }
6237
6238 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6239 {
6240         u32 ref_and_mask, reg_mem_engine;
6241
6242         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6243             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6244                 switch (ring->me) {
6245                 case 1:
6246                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6247                         break;
6248                 case 2:
6249                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6250                         break;
6251                 default:
6252                         return;
6253                 }
6254                 reg_mem_engine = 0;
6255         } else {
6256                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6257                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6258         }
6259
6260         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6261         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6262                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6263                                  reg_mem_engine));
6264         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6265         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6266         amdgpu_ring_write(ring, ref_and_mask);
6267         amdgpu_ring_write(ring, ref_and_mask);
6268         amdgpu_ring_write(ring, 0x20); /* poll interval */
6269 }
6270
6271 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6272 {
6273         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6274         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6275                 EVENT_INDEX(4));
6276
6277         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6278         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6279                 EVENT_INDEX(0));
6280 }
6281
6282 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6283                                       struct amdgpu_ib *ib,
6284                                       unsigned vmid, bool ctx_switch)
6285 {
6286         u32 header, control = 0;
6287
6288         if (ib->flags & AMDGPU_IB_FLAG_CE)
6289                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6290         else
6291                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6292
6293         control |= ib->length_dw | (vmid << 24);
6294
6295         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6296                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6297
6298                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6299                         gfx_v8_0_ring_emit_de_meta(ring);
6300         }
6301
6302         amdgpu_ring_write(ring, header);
6303         amdgpu_ring_write(ring,
6304 #ifdef __BIG_ENDIAN
6305                           (2 << 0) |
6306 #endif
6307                           (ib->gpu_addr & 0xFFFFFFFC));
6308         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6309         amdgpu_ring_write(ring, control);
6310 }
6311
6312 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6313                                           struct amdgpu_ib *ib,
6314                                           unsigned vmid, bool ctx_switch)
6315 {
6316         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6317
6318         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6319         amdgpu_ring_write(ring,
6320 #ifdef __BIG_ENDIAN
6321                                 (2 << 0) |
6322 #endif
6323                                 (ib->gpu_addr & 0xFFFFFFFC));
6324         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6325         amdgpu_ring_write(ring, control);
6326 }
6327
6328 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6329                                          u64 seq, unsigned flags)
6330 {
6331         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6332         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6333
6334         /* Workaround for cache flush problems. First send a dummy EOP
6335          * event down the pipe with seq one below.
6336          */
6337         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6338         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6339                                  EOP_TC_ACTION_EN |
6340                                  EOP_TC_WB_ACTION_EN |
6341                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6342                                  EVENT_INDEX(5)));
6343         amdgpu_ring_write(ring, addr & 0xfffffffc);
6344         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6345                                 DATA_SEL(1) | INT_SEL(0));
6346         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6347         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6348
6349         /* Then send the real EOP event down the pipe:
6350          * EVENT_WRITE_EOP - flush caches, send int */
6351         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6352         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6353                                  EOP_TC_ACTION_EN |
6354                                  EOP_TC_WB_ACTION_EN |
6355                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6356                                  EVENT_INDEX(5)));
6357         amdgpu_ring_write(ring, addr & 0xfffffffc);
6358         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6359                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6360         amdgpu_ring_write(ring, lower_32_bits(seq));
6361         amdgpu_ring_write(ring, upper_32_bits(seq));
6362
6363 }
6364
6365 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6366 {
6367         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6368         uint32_t seq = ring->fence_drv.sync_seq;
6369         uint64_t addr = ring->fence_drv.gpu_addr;
6370
6371         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6372         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6373                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6374                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6375         amdgpu_ring_write(ring, addr & 0xfffffffc);
6376         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6377         amdgpu_ring_write(ring, seq);
6378         amdgpu_ring_write(ring, 0xffffffff);
6379         amdgpu_ring_write(ring, 4); /* poll interval */
6380 }
6381
6382 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6383                                         unsigned vmid, uint64_t pd_addr)
6384 {
6385         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6386
6387         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6388
6389         /* wait for the invalidate to complete */
6390         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6391         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6392                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6393                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6394         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6395         amdgpu_ring_write(ring, 0);
6396         amdgpu_ring_write(ring, 0); /* ref */
6397         amdgpu_ring_write(ring, 0); /* mask */
6398         amdgpu_ring_write(ring, 0x20); /* poll interval */
6399
6400         /* compute doesn't have PFP */
6401         if (usepfp) {
6402                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6403                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6404                 amdgpu_ring_write(ring, 0x0);
6405         }
6406 }
6407
6408 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6409 {
6410         return ring->adev->wb.wb[ring->wptr_offs];
6411 }
6412
6413 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6414 {
6415         struct amdgpu_device *adev = ring->adev;
6416
6417         /* XXX check if swapping is necessary on BE */
6418         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6419         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6420 }
6421
6422 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6423                                            bool acquire)
6424 {
6425         struct amdgpu_device *adev = ring->adev;
6426         int pipe_num, tmp, reg;
6427         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6428
6429         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6430
6431         /* first me only has 2 entries, GFX and HP3D */
6432         if (ring->me > 0)
6433                 pipe_num -= 2;
6434
6435         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6436         tmp = RREG32(reg);
6437         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6438         WREG32(reg, tmp);
6439 }
6440
6441 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6442                                             struct amdgpu_ring *ring,
6443                                             bool acquire)
6444 {
6445         int i, pipe;
6446         bool reserve;
6447         struct amdgpu_ring *iring;
6448
6449         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6450         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6451         if (acquire)
6452                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6453         else
6454                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6455
6456         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6457                 /* Clear all reservations - everyone reacquires all resources */
6458                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6459                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6460                                                        true);
6461
6462                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6463                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6464                                                        true);
6465         } else {
6466                 /* Lower all pipes without a current reservation */
6467                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6468                         iring = &adev->gfx.gfx_ring[i];
6469                         pipe = amdgpu_gfx_queue_to_bit(adev,
6470                                                        iring->me,
6471                                                        iring->pipe,
6472                                                        0);
6473                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6474                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6475                 }
6476
6477                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6478                         iring = &adev->gfx.compute_ring[i];
6479                         pipe = amdgpu_gfx_queue_to_bit(adev,
6480                                                        iring->me,
6481                                                        iring->pipe,
6482                                                        0);
6483                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6484                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6485                 }
6486         }
6487
6488         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6489 }
6490
6491 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6492                                       struct amdgpu_ring *ring,
6493                                       bool acquire)
6494 {
6495         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6496         uint32_t queue_priority = acquire ? 0xf : 0x0;
6497
6498         mutex_lock(&adev->srbm_mutex);
6499         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6500
6501         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6502         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6503
6504         vi_srbm_select(adev, 0, 0, 0, 0);
6505         mutex_unlock(&adev->srbm_mutex);
6506 }
6507 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6508                                                enum drm_sched_priority priority)
6509 {
6510         struct amdgpu_device *adev = ring->adev;
6511         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6512
6513         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6514                 return;
6515
6516         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6517         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6518 }
6519
6520 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6521                                              u64 addr, u64 seq,
6522                                              unsigned flags)
6523 {
6524         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6525         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6526
6527         /* RELEASE_MEM - flush caches, send int */
6528         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6529         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6530                                  EOP_TC_ACTION_EN |
6531                                  EOP_TC_WB_ACTION_EN |
6532                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6533                                  EVENT_INDEX(5)));
6534         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6535         amdgpu_ring_write(ring, addr & 0xfffffffc);
6536         amdgpu_ring_write(ring, upper_32_bits(addr));
6537         amdgpu_ring_write(ring, lower_32_bits(seq));
6538         amdgpu_ring_write(ring, upper_32_bits(seq));
6539 }
6540
6541 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6542                                          u64 seq, unsigned int flags)
6543 {
6544         /* we only allocate 32bit for each seq wb address */
6545         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6546
6547         /* write fence seq to the "addr" */
6548         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6549         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6550                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6551         amdgpu_ring_write(ring, lower_32_bits(addr));
6552         amdgpu_ring_write(ring, upper_32_bits(addr));
6553         amdgpu_ring_write(ring, lower_32_bits(seq));
6554
6555         if (flags & AMDGPU_FENCE_FLAG_INT) {
6556                 /* set register to trigger INT */
6557                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6558                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6559                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6560                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6561                 amdgpu_ring_write(ring, 0);
6562                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6563         }
6564 }
6565
6566 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6567 {
6568         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6569         amdgpu_ring_write(ring, 0);
6570 }
6571
6572 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6573 {
6574         uint32_t dw2 = 0;
6575
6576         if (amdgpu_sriov_vf(ring->adev))
6577                 gfx_v8_0_ring_emit_ce_meta(ring);
6578
6579         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6580         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6581                 gfx_v8_0_ring_emit_vgt_flush(ring);
6582                 /* set load_global_config & load_global_uconfig */
6583                 dw2 |= 0x8001;
6584                 /* set load_cs_sh_regs */
6585                 dw2 |= 0x01000000;
6586                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6587                 dw2 |= 0x10002;
6588
6589                 /* set load_ce_ram if preamble presented */
6590                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6591                         dw2 |= 0x10000000;
6592         } else {
6593                 /* still load_ce_ram if this is the first time preamble presented
6594                  * although there is no context switch happens.
6595                  */
6596                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6597                         dw2 |= 0x10000000;
6598         }
6599
6600         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6601         amdgpu_ring_write(ring, dw2);
6602         amdgpu_ring_write(ring, 0);
6603 }
6604
6605 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6606 {
6607         unsigned ret;
6608
6609         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6610         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6611         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6612         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6613         ret = ring->wptr & ring->buf_mask;
6614         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6615         return ret;
6616 }
6617
6618 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6619 {
6620         unsigned cur;
6621
6622         BUG_ON(offset > ring->buf_mask);
6623         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6624
6625         cur = (ring->wptr & ring->buf_mask) - 1;
6626         if (likely(cur > offset))
6627                 ring->ring[offset] = cur - offset;
6628         else
6629                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6630 }
6631
6632 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6633 {
6634         struct amdgpu_device *adev = ring->adev;
6635
6636         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6637         amdgpu_ring_write(ring, 0 |     /* src: register*/
6638                                 (5 << 8) |      /* dst: memory */
6639                                 (1 << 20));     /* write confirm */
6640         amdgpu_ring_write(ring, reg);
6641         amdgpu_ring_write(ring, 0);
6642         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6643                                 adev->virt.reg_val_offs * 4));
6644         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6645                                 adev->virt.reg_val_offs * 4));
6646 }
6647
6648 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6649                                   uint32_t val)
6650 {
6651         uint32_t cmd;
6652
6653         switch (ring->funcs->type) {
6654         case AMDGPU_RING_TYPE_GFX:
6655                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6656                 break;
6657         case AMDGPU_RING_TYPE_KIQ:
6658                 cmd = 1 << 16; /* no inc addr */
6659                 break;
6660         default:
6661                 cmd = WR_CONFIRM;
6662                 break;
6663         }
6664
6665         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6666         amdgpu_ring_write(ring, cmd);
6667         amdgpu_ring_write(ring, reg);
6668         amdgpu_ring_write(ring, 0);
6669         amdgpu_ring_write(ring, val);
6670 }
6671
6672 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6673                                                  enum amdgpu_interrupt_state state)
6674 {
6675         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6676                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6677 }
6678
6679 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6680                                                      int me, int pipe,
6681                                                      enum amdgpu_interrupt_state state)
6682 {
6683         u32 mec_int_cntl, mec_int_cntl_reg;
6684
6685         /*
6686          * amdgpu controls only the first MEC. That's why this function only
6687          * handles the setting of interrupts for this specific MEC. All other
6688          * pipes' interrupts are set by amdkfd.
6689          */
6690
6691         if (me == 1) {
6692                 switch (pipe) {
6693                 case 0:
6694                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6695                         break;
6696                 case 1:
6697                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6698                         break;
6699                 case 2:
6700                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6701                         break;
6702                 case 3:
6703                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6704                         break;
6705                 default:
6706                         DRM_DEBUG("invalid pipe %d\n", pipe);
6707                         return;
6708                 }
6709         } else {
6710                 DRM_DEBUG("invalid me %d\n", me);
6711                 return;
6712         }
6713
6714         switch (state) {
6715         case AMDGPU_IRQ_STATE_DISABLE:
6716                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6717                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6718                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6719                 break;
6720         case AMDGPU_IRQ_STATE_ENABLE:
6721                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6722                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6723                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6724                 break;
6725         default:
6726                 break;
6727         }
6728 }
6729
6730 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6731                                              struct amdgpu_irq_src *source,
6732                                              unsigned type,
6733                                              enum amdgpu_interrupt_state state)
6734 {
6735         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6736                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6737
6738         return 0;
6739 }
6740
6741 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6742                                               struct amdgpu_irq_src *source,
6743                                               unsigned type,
6744                                               enum amdgpu_interrupt_state state)
6745 {
6746         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6747                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6748
6749         return 0;
6750 }
6751
6752 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6753                                             struct amdgpu_irq_src *src,
6754                                             unsigned type,
6755                                             enum amdgpu_interrupt_state state)
6756 {
6757         switch (type) {
6758         case AMDGPU_CP_IRQ_GFX_EOP:
6759                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6760                 break;
6761         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6762                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6763                 break;
6764         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6765                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6766                 break;
6767         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6768                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6769                 break;
6770         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6771                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6772                 break;
6773         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6774                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6775                 break;
6776         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6777                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6778                 break;
6779         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6780                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6781                 break;
6782         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6783                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6784                 break;
6785         default:
6786                 break;
6787         }
6788         return 0;
6789 }
6790
6791 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6792                                          struct amdgpu_irq_src *source,
6793                                          unsigned int type,
6794                                          enum amdgpu_interrupt_state state)
6795 {
6796         int enable_flag;
6797
6798         switch (state) {
6799         case AMDGPU_IRQ_STATE_DISABLE:
6800                 enable_flag = 0;
6801                 break;
6802
6803         case AMDGPU_IRQ_STATE_ENABLE:
6804                 enable_flag = 1;
6805                 break;
6806
6807         default:
6808                 return -EINVAL;
6809         }
6810
6811         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6812         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6813         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6814         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6815         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6816         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6817                      enable_flag);
6818         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6819                      enable_flag);
6820         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6821                      enable_flag);
6822         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6823                      enable_flag);
6824         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6825                      enable_flag);
6826         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6827                      enable_flag);
6828         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6829                      enable_flag);
6830         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6831                      enable_flag);
6832
6833         return 0;
6834 }
6835
6836 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6837                                      struct amdgpu_irq_src *source,
6838                                      unsigned int type,
6839                                      enum amdgpu_interrupt_state state)
6840 {
6841         int enable_flag;
6842
6843         switch (state) {
6844         case AMDGPU_IRQ_STATE_DISABLE:
6845                 enable_flag = 1;
6846                 break;
6847
6848         case AMDGPU_IRQ_STATE_ENABLE:
6849                 enable_flag = 0;
6850                 break;
6851
6852         default:
6853                 return -EINVAL;
6854         }
6855
6856         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6857                      enable_flag);
6858
6859         return 0;
6860 }
6861
6862 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6863                             struct amdgpu_irq_src *source,
6864                             struct amdgpu_iv_entry *entry)
6865 {
6866         int i;
6867         u8 me_id, pipe_id, queue_id;
6868         struct amdgpu_ring *ring;
6869
6870         DRM_DEBUG("IH: CP EOP\n");
6871         me_id = (entry->ring_id & 0x0c) >> 2;
6872         pipe_id = (entry->ring_id & 0x03) >> 0;
6873         queue_id = (entry->ring_id & 0x70) >> 4;
6874
6875         switch (me_id) {
6876         case 0:
6877                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6878                 break;
6879         case 1:
6880         case 2:
6881                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6882                         ring = &adev->gfx.compute_ring[i];
6883                         /* Per-queue interrupt is supported for MEC starting from VI.
6884                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6885                           */
6886                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6887                                 amdgpu_fence_process(ring);
6888                 }
6889                 break;
6890         }
6891         return 0;
6892 }
6893
6894 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6895                                  struct amdgpu_irq_src *source,
6896                                  struct amdgpu_iv_entry *entry)
6897 {
6898         DRM_ERROR("Illegal register access in command stream\n");
6899         schedule_work(&adev->reset_work);
6900         return 0;
6901 }
6902
6903 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6904                                   struct amdgpu_irq_src *source,
6905                                   struct amdgpu_iv_entry *entry)
6906 {
6907         DRM_ERROR("Illegal instruction in command stream\n");
6908         schedule_work(&adev->reset_work);
6909         return 0;
6910 }
6911
6912 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6913                                      struct amdgpu_irq_src *source,
6914                                      struct amdgpu_iv_entry *entry)
6915 {
6916         DRM_ERROR("CP EDC/ECC error detected.");
6917         return 0;
6918 }
6919
6920 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6921 {
6922         u32 enc, se_id, sh_id, cu_id;
6923         char type[20];
6924         int sq_edc_source = -1;
6925
6926         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6927         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6928
6929         switch (enc) {
6930                 case 0:
6931                         DRM_INFO("SQ general purpose intr detected:"
6932                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6933                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6934                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6935                                         "wlt %d, thread_trace %d.\n",
6936                                         se_id,
6937                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6938                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6939                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6940                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6941                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6942                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6943                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6944                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6945                                         );
6946                         break;
6947                 case 1:
6948                 case 2:
6949
6950                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6951                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6952
6953                         /*
6954                          * This function can be called either directly from ISR
6955                          * or from BH in which case we can access SQ_EDC_INFO
6956                          * instance
6957                          */
6958                         if (in_task()) {
6959                                 mutex_lock(&adev->grbm_idx_mutex);
6960                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6961
6962                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6963
6964                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6965                                 mutex_unlock(&adev->grbm_idx_mutex);
6966                         }
6967
6968                         if (enc == 1)
6969                                 sprintf(type, "instruction intr");
6970                         else
6971                                 sprintf(type, "EDC/ECC error");
6972
6973                         DRM_INFO(
6974                                 "SQ %s detected: "
6975                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6976                                         "trap %s, sq_ed_info.source %s.\n",
6977                                         type, se_id, sh_id, cu_id,
6978                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6979                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6980                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6981                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6982                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6983                                 );
6984                         break;
6985                 default:
6986                         DRM_ERROR("SQ invalid encoding type\n.");
6987         }
6988 }
6989
6990 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6991 {
6992
6993         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6994         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6995
6996         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6997 }
6998
6999 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7000                            struct amdgpu_irq_src *source,
7001                            struct amdgpu_iv_entry *entry)
7002 {
7003         unsigned ih_data = entry->src_data[0];
7004
7005         /*
7006          * Try to submit work so SQ_EDC_INFO can be accessed from
7007          * BH. If previous work submission hasn't finished yet
7008          * just print whatever info is possible directly from the ISR.
7009          */
7010         if (work_pending(&adev->gfx.sq_work.work)) {
7011                 gfx_v8_0_parse_sq_irq(adev, ih_data);
7012         } else {
7013                 adev->gfx.sq_work.ih_data = ih_data;
7014                 schedule_work(&adev->gfx.sq_work.work);
7015         }
7016
7017         return 0;
7018 }
7019
7020 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7021                                             struct amdgpu_irq_src *src,
7022                                             unsigned int type,
7023                                             enum amdgpu_interrupt_state state)
7024 {
7025         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7026
7027         switch (type) {
7028         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7029                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7030                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7031                 if (ring->me == 1)
7032                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7033                                      ring->pipe,
7034                                      GENERIC2_INT_ENABLE,
7035                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7036                 else
7037                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7038                                      ring->pipe,
7039                                      GENERIC2_INT_ENABLE,
7040                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7041                 break;
7042         default:
7043                 BUG(); /* kiq only support GENERIC2_INT now */
7044                 break;
7045         }
7046         return 0;
7047 }
7048
7049 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7050                             struct amdgpu_irq_src *source,
7051                             struct amdgpu_iv_entry *entry)
7052 {
7053         u8 me_id, pipe_id, queue_id;
7054         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7055
7056         me_id = (entry->ring_id & 0x0c) >> 2;
7057         pipe_id = (entry->ring_id & 0x03) >> 0;
7058         queue_id = (entry->ring_id & 0x70) >> 4;
7059         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7060                    me_id, pipe_id, queue_id);
7061
7062         amdgpu_fence_process(ring);
7063         return 0;
7064 }
7065
7066 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7067         .name = "gfx_v8_0",
7068         .early_init = gfx_v8_0_early_init,
7069         .late_init = gfx_v8_0_late_init,
7070         .sw_init = gfx_v8_0_sw_init,
7071         .sw_fini = gfx_v8_0_sw_fini,
7072         .hw_init = gfx_v8_0_hw_init,
7073         .hw_fini = gfx_v8_0_hw_fini,
7074         .suspend = gfx_v8_0_suspend,
7075         .resume = gfx_v8_0_resume,
7076         .is_idle = gfx_v8_0_is_idle,
7077         .wait_for_idle = gfx_v8_0_wait_for_idle,
7078         .check_soft_reset = gfx_v8_0_check_soft_reset,
7079         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7080         .soft_reset = gfx_v8_0_soft_reset,
7081         .post_soft_reset = gfx_v8_0_post_soft_reset,
7082         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7083         .set_powergating_state = gfx_v8_0_set_powergating_state,
7084         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7085 };
7086
7087 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7088         .type = AMDGPU_RING_TYPE_GFX,
7089         .align_mask = 0xff,
7090         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7091         .support_64bit_ptrs = false,
7092         .get_rptr = gfx_v8_0_ring_get_rptr,
7093         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7094         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7095         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7096                 5 +  /* COND_EXEC */
7097                 7 +  /* PIPELINE_SYNC */
7098                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7099                 12 +  /* FENCE for VM_FLUSH */
7100                 20 + /* GDS switch */
7101                 4 + /* double SWITCH_BUFFER,
7102                        the first COND_EXEC jump to the place just
7103                            prior to this double SWITCH_BUFFER  */
7104                 5 + /* COND_EXEC */
7105                 7 +      /*     HDP_flush */
7106                 4 +      /*     VGT_flush */
7107                 14 + /* CE_META */
7108                 31 + /* DE_META */
7109                 3 + /* CNTX_CTRL */
7110                 5 + /* HDP_INVL */
7111                 12 + 12 + /* FENCE x2 */
7112                 2, /* SWITCH_BUFFER */
7113         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7114         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7115         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7116         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7117         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7118         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7119         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7120         .test_ring = gfx_v8_0_ring_test_ring,
7121         .test_ib = gfx_v8_0_ring_test_ib,
7122         .insert_nop = amdgpu_ring_insert_nop,
7123         .pad_ib = amdgpu_ring_generic_pad_ib,
7124         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7125         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7126         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7127         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7128         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7129 };
7130
7131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7132         .type = AMDGPU_RING_TYPE_COMPUTE,
7133         .align_mask = 0xff,
7134         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7135         .support_64bit_ptrs = false,
7136         .get_rptr = gfx_v8_0_ring_get_rptr,
7137         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7138         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7139         .emit_frame_size =
7140                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7141                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7142                 5 + /* hdp_invalidate */
7143                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7144                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7145                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7146         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7147         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7148         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7149         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7150         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7151         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7152         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7153         .test_ring = gfx_v8_0_ring_test_ring,
7154         .test_ib = gfx_v8_0_ring_test_ib,
7155         .insert_nop = amdgpu_ring_insert_nop,
7156         .pad_ib = amdgpu_ring_generic_pad_ib,
7157         .set_priority = gfx_v8_0_ring_set_priority_compute,
7158         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7159 };
7160
7161 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7162         .type = AMDGPU_RING_TYPE_KIQ,
7163         .align_mask = 0xff,
7164         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7165         .support_64bit_ptrs = false,
7166         .get_rptr = gfx_v8_0_ring_get_rptr,
7167         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7168         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7169         .emit_frame_size =
7170                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7171                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7172                 5 + /* hdp_invalidate */
7173                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7174                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7175                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7176         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7177         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7178         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7179         .test_ring = gfx_v8_0_ring_test_ring,
7180         .test_ib = gfx_v8_0_ring_test_ib,
7181         .insert_nop = amdgpu_ring_insert_nop,
7182         .pad_ib = amdgpu_ring_generic_pad_ib,
7183         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7184         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7185 };
7186
7187 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7188 {
7189         int i;
7190
7191         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7192
7193         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7194                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7195
7196         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7197                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7198 }
7199
7200 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7201         .set = gfx_v8_0_set_eop_interrupt_state,
7202         .process = gfx_v8_0_eop_irq,
7203 };
7204
7205 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7206         .set = gfx_v8_0_set_priv_reg_fault_state,
7207         .process = gfx_v8_0_priv_reg_irq,
7208 };
7209
7210 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7211         .set = gfx_v8_0_set_priv_inst_fault_state,
7212         .process = gfx_v8_0_priv_inst_irq,
7213 };
7214
7215 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7216         .set = gfx_v8_0_kiq_set_interrupt_state,
7217         .process = gfx_v8_0_kiq_irq,
7218 };
7219
7220 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7221         .set = gfx_v8_0_set_cp_ecc_int_state,
7222         .process = gfx_v8_0_cp_ecc_error_irq,
7223 };
7224
7225 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7226         .set = gfx_v8_0_set_sq_int_state,
7227         .process = gfx_v8_0_sq_irq,
7228 };
7229
7230 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7231 {
7232         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7233         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7234
7235         adev->gfx.priv_reg_irq.num_types = 1;
7236         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7237
7238         adev->gfx.priv_inst_irq.num_types = 1;
7239         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7240
7241         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7242         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7243
7244         adev->gfx.cp_ecc_error_irq.num_types = 1;
7245         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7246
7247         adev->gfx.sq_irq.num_types = 1;
7248         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7249 }
7250
7251 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7252 {
7253         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7254 }
7255
7256 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7257 {
7258         /* init asci gds info */
7259         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7260         adev->gds.gws.total_size = 64;
7261         adev->gds.oa.total_size = 16;
7262
7263         if (adev->gds.mem.total_size == 64 * 1024) {
7264                 adev->gds.mem.gfx_partition_size = 4096;
7265                 adev->gds.mem.cs_partition_size = 4096;
7266
7267                 adev->gds.gws.gfx_partition_size = 4;
7268                 adev->gds.gws.cs_partition_size = 4;
7269
7270                 adev->gds.oa.gfx_partition_size = 4;
7271                 adev->gds.oa.cs_partition_size = 1;
7272         } else {
7273                 adev->gds.mem.gfx_partition_size = 1024;
7274                 adev->gds.mem.cs_partition_size = 1024;
7275
7276                 adev->gds.gws.gfx_partition_size = 16;
7277                 adev->gds.gws.cs_partition_size = 16;
7278
7279                 adev->gds.oa.gfx_partition_size = 4;
7280                 adev->gds.oa.cs_partition_size = 4;
7281         }
7282 }
7283
7284 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7285                                                  u32 bitmap)
7286 {
7287         u32 data;
7288
7289         if (!bitmap)
7290                 return;
7291
7292         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7293         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7294
7295         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7296 }
7297
7298 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7299 {
7300         u32 data, mask;
7301
7302         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7303                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7304
7305         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7306
7307         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7308 }
7309
7310 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7311 {
7312         int i, j, k, counter, active_cu_number = 0;
7313         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7314         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7315         unsigned disable_masks[4 * 2];
7316         u32 ao_cu_num;
7317
7318         memset(cu_info, 0, sizeof(*cu_info));
7319
7320         if (adev->flags & AMD_IS_APU)
7321                 ao_cu_num = 2;
7322         else
7323                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7324
7325         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7326
7327         mutex_lock(&adev->grbm_idx_mutex);
7328         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7329                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7330                         mask = 1;
7331                         ao_bitmap = 0;
7332                         counter = 0;
7333                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7334                         if (i < 4 && j < 2)
7335                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7336                                         adev, disable_masks[i * 2 + j]);
7337                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7338                         cu_info->bitmap[i][j] = bitmap;
7339
7340                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7341                                 if (bitmap & mask) {
7342                                         if (counter < ao_cu_num)
7343                                                 ao_bitmap |= mask;
7344                                         counter ++;
7345                                 }
7346                                 mask <<= 1;
7347                         }
7348                         active_cu_number += counter;
7349                         if (i < 2 && j < 2)
7350                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7351                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7352                 }
7353         }
7354         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7355         mutex_unlock(&adev->grbm_idx_mutex);
7356
7357         cu_info->number = active_cu_number;
7358         cu_info->ao_cu_mask = ao_cu_mask;
7359         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7360         cu_info->max_waves_per_simd = 10;
7361         cu_info->max_scratch_slots_per_cu = 32;
7362         cu_info->wave_front_size = 64;
7363         cu_info->lds_size = 64;
7364 }
7365
7366 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7367 {
7368         .type = AMD_IP_BLOCK_TYPE_GFX,
7369         .major = 8,
7370         .minor = 0,
7371         .rev = 0,
7372         .funcs = &gfx_v8_0_ip_funcs,
7373 };
7374
7375 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7376 {
7377         .type = AMD_IP_BLOCK_TYPE_GFX,
7378         .major = 8,
7379         .minor = 1,
7380         .rev = 0,
7381         .funcs = &gfx_v8_0_ip_funcs,
7382 };
7383
7384 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7385 {
7386         uint64_t ce_payload_addr;
7387         int cnt_ce;
7388         union {
7389                 struct vi_ce_ib_state regular;
7390                 struct vi_ce_ib_state_chained_ib chained;
7391         } ce_payload = {};
7392
7393         if (ring->adev->virt.chained_ib_support) {
7394                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7395                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7396                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7397         } else {
7398                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7399                         offsetof(struct vi_gfx_meta_data, ce_payload);
7400                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7401         }
7402
7403         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7404         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7405                                 WRITE_DATA_DST_SEL(8) |
7406                                 WR_CONFIRM) |
7407                                 WRITE_DATA_CACHE_POLICY(0));
7408         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7409         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7410         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7411 }
7412
7413 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7414 {
7415         uint64_t de_payload_addr, gds_addr, csa_addr;
7416         int cnt_de;
7417         union {
7418                 struct vi_de_ib_state regular;
7419                 struct vi_de_ib_state_chained_ib chained;
7420         } de_payload = {};
7421
7422         csa_addr = amdgpu_csa_vaddr(ring->adev);
7423         gds_addr = csa_addr + 4096;
7424         if (ring->adev->virt.chained_ib_support) {
7425                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7426                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7427                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7428                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7429         } else {
7430                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7431                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7432                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7433                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7434         }
7435
7436         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7437         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7438                                 WRITE_DATA_DST_SEL(8) |
7439                                 WR_CONFIRM) |
7440                                 WRITE_DATA_CACHE_POLICY(0));
7441         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7442         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7443         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7444 }