2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_NUM_COMPUTE_RINGS 8
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
98 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
116 static const u32 golden_settings_tonga_a11[] =
118 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121 mmGB_GPU_ID, 0x0000000f, 0x00000000,
122 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
126 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
127 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
128 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
129 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
130 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
131 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
132 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
135 static const u32 tonga_golden_common_all[] =
137 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
138 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
139 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
140 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
141 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
142 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
143 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
144 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
147 static const u32 tonga_mgcg_cgcg_init[] =
149 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
150 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
151 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
152 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
153 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
154 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
155 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
156 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
157 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
158 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
160 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
163 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
165 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
167 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
168 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
169 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
170 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
171 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
174 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
175 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
176 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
177 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
179 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
181 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
182 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
183 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
184 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
185 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
186 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
187 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
188 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
189 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
190 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
191 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
192 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
193 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
194 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
195 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
196 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
197 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
198 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
199 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
200 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
201 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
202 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
203 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
204 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
205 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
206 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
207 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
208 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
209 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
210 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
211 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
212 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
213 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
214 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
215 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
216 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
217 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
218 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
219 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
220 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
221 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
222 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
223 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
226 static const u32 golden_settings_polaris11_a11[] =
228 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
229 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
230 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
231 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
232 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
233 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
234 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
235 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
236 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
237 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
238 mmSQ_CONFIG, 0x07f80000, 0x01180000,
239 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
240 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
241 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
242 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
243 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
244 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
247 static const u32 polaris11_golden_common_all[] =
249 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
250 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
251 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
252 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
253 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
254 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
257 static const u32 golden_settings_polaris10_a11[] =
259 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
260 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
261 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
262 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
263 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
264 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
265 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
266 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
267 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
268 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
269 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
270 mmSQ_CONFIG, 0x07f80000, 0x07180000,
271 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
272 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
273 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
274 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
275 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
278 static const u32 polaris10_golden_common_all[] =
280 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
281 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
282 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
283 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
284 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
285 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
286 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
287 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
290 static const u32 fiji_golden_common_all[] =
292 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
293 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
294 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
301 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
304 static const u32 golden_settings_fiji_a10[] =
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
316 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
319 static const u32 fiji_mgcg_cgcg_init[] =
321 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
322 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
324 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
325 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
326 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
327 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
328 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
329 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
330 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
331 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
332 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
333 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
334 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
335 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
336 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
339 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
340 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
341 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
342 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
343 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
345 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
346 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
347 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
348 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
349 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
351 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
352 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
353 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
354 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
355 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
358 static const u32 golden_settings_iceland_a11[] =
360 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
363 mmGB_GPU_ID, 0x0000000f, 0x00000000,
364 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
365 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
366 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
367 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
368 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
369 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
370 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
371 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
372 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
373 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
374 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
375 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
378 static const u32 iceland_golden_common_all[] =
380 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
381 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
382 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
383 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
384 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
385 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
386 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
387 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
390 static const u32 iceland_mgcg_cgcg_init[] =
392 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
393 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
397 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
398 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
399 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
402 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
403 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
404 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
408 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
412 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
413 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
414 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
415 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
416 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
417 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
418 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
419 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
420 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
422 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
423 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
424 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
425 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
426 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
427 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
428 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
429 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
430 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
431 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
432 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
433 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
434 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
435 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
436 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
437 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
438 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
439 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
440 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
441 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
442 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
443 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
444 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
445 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
446 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
447 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
448 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
449 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
450 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
451 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
452 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
453 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
454 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
455 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
458 static const u32 cz_golden_settings_a11[] =
460 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
461 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
462 mmGB_GPU_ID, 0x0000000f, 0x00000000,
463 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
464 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
465 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
466 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
467 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
468 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
469 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
470 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
471 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
474 static const u32 cz_golden_common_all[] =
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
478 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
479 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
480 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
481 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
482 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
483 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
486 static const u32 cz_mgcg_cgcg_init[] =
488 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
491 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
492 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
493 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
494 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
495 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
496 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
497 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
498 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
499 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
500 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
501 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
506 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
507 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
508 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
509 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
510 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
513 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
514 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
515 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
516 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
518 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
519 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
520 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
521 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
522 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
523 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
524 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
525 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
526 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
527 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
528 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
529 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
530 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
531 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
532 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
533 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
534 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
535 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
536 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
537 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
538 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
539 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
540 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
541 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
542 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
543 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
544 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
545 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
546 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
547 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
548 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
549 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
550 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
551 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
552 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
553 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
554 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
557 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
560 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
561 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
562 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
565 static const u32 stoney_golden_settings_a11[] =
567 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
568 mmGB_GPU_ID, 0x0000000f, 0x00000000,
569 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
570 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
571 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
572 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
573 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
574 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
575 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
576 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
579 static const u32 stoney_golden_common_all[] =
581 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
582 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
583 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
584 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
585 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
586 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
587 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
588 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
591 static const u32 stoney_mgcg_cgcg_init[] =
593 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
594 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
595 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
596 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
600 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
601 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
604 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
605 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
607 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
609 switch (adev->asic_type) {
611 amdgpu_program_register_sequence(adev,
612 iceland_mgcg_cgcg_init,
613 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
614 amdgpu_program_register_sequence(adev,
615 golden_settings_iceland_a11,
616 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
617 amdgpu_program_register_sequence(adev,
618 iceland_golden_common_all,
619 (const u32)ARRAY_SIZE(iceland_golden_common_all));
622 amdgpu_program_register_sequence(adev,
624 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
625 amdgpu_program_register_sequence(adev,
626 golden_settings_fiji_a10,
627 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
628 amdgpu_program_register_sequence(adev,
629 fiji_golden_common_all,
630 (const u32)ARRAY_SIZE(fiji_golden_common_all));
634 amdgpu_program_register_sequence(adev,
635 tonga_mgcg_cgcg_init,
636 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
637 amdgpu_program_register_sequence(adev,
638 golden_settings_tonga_a11,
639 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
640 amdgpu_program_register_sequence(adev,
641 tonga_golden_common_all,
642 (const u32)ARRAY_SIZE(tonga_golden_common_all));
645 amdgpu_program_register_sequence(adev,
646 golden_settings_polaris11_a11,
647 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
648 amdgpu_program_register_sequence(adev,
649 polaris11_golden_common_all,
650 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
653 amdgpu_program_register_sequence(adev,
654 golden_settings_polaris10_a11,
655 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
656 amdgpu_program_register_sequence(adev,
657 polaris10_golden_common_all,
658 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
659 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
660 if (adev->pdev->revision == 0xc7 &&
661 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
662 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
663 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
664 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
665 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
669 amdgpu_program_register_sequence(adev,
671 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 cz_golden_settings_a11,
674 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
675 amdgpu_program_register_sequence(adev,
676 cz_golden_common_all,
677 (const u32)ARRAY_SIZE(cz_golden_common_all));
680 amdgpu_program_register_sequence(adev,
681 stoney_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 stoney_golden_settings_a11,
685 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
686 amdgpu_program_register_sequence(adev,
687 stoney_golden_common_all,
688 (const u32)ARRAY_SIZE(stoney_golden_common_all));
695 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
699 adev->gfx.scratch.num_reg = 7;
700 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
701 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
702 adev->gfx.scratch.free[i] = true;
703 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
707 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
709 struct amdgpu_device *adev = ring->adev;
715 r = amdgpu_gfx_scratch_get(adev, &scratch);
717 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
720 WREG32(scratch, 0xCAFEDEAD);
721 r = amdgpu_ring_alloc(ring, 3);
723 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
725 amdgpu_gfx_scratch_free(adev, scratch);
728 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
729 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
730 amdgpu_ring_write(ring, 0xDEADBEEF);
731 amdgpu_ring_commit(ring);
733 for (i = 0; i < adev->usec_timeout; i++) {
734 tmp = RREG32(scratch);
735 if (tmp == 0xDEADBEEF)
739 if (i < adev->usec_timeout) {
740 DRM_INFO("ring test on %d succeeded in %d usecs\n",
743 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
744 ring->idx, scratch, tmp);
747 amdgpu_gfx_scratch_free(adev, scratch);
751 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
753 struct amdgpu_device *adev = ring->adev;
755 struct fence *f = NULL;
760 r = amdgpu_gfx_scratch_get(adev, &scratch);
762 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
765 WREG32(scratch, 0xCAFEDEAD);
766 memset(&ib, 0, sizeof(ib));
767 r = amdgpu_ib_get(adev, NULL, 256, &ib);
769 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
772 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
773 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
774 ib.ptr[2] = 0xDEADBEEF;
777 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
781 r = fence_wait_timeout(f, false, timeout);
783 DRM_ERROR("amdgpu: IB test timed out.\n");
787 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
790 tmp = RREG32(scratch);
791 if (tmp == 0xDEADBEEF) {
792 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
795 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
800 amdgpu_ib_free(adev, &ib, NULL);
803 amdgpu_gfx_scratch_free(adev, scratch);
808 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
809 release_firmware(adev->gfx.pfp_fw);
810 adev->gfx.pfp_fw = NULL;
811 release_firmware(adev->gfx.me_fw);
812 adev->gfx.me_fw = NULL;
813 release_firmware(adev->gfx.ce_fw);
814 adev->gfx.ce_fw = NULL;
815 release_firmware(adev->gfx.rlc_fw);
816 adev->gfx.rlc_fw = NULL;
817 release_firmware(adev->gfx.mec_fw);
818 adev->gfx.mec_fw = NULL;
819 if ((adev->asic_type != CHIP_STONEY) &&
820 (adev->asic_type != CHIP_TOPAZ))
821 release_firmware(adev->gfx.mec2_fw);
822 adev->gfx.mec2_fw = NULL;
824 kfree(adev->gfx.rlc.register_list_format);
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
829 const char *chip_name;
832 struct amdgpu_firmware_info *info = NULL;
833 const struct common_firmware_header *header = NULL;
834 const struct gfx_firmware_header_v1_0 *cp_hdr;
835 const struct rlc_firmware_header_v2_0 *rlc_hdr;
836 unsigned int *tmp = NULL, i;
840 switch (adev->asic_type) {
848 chip_name = "carrizo";
854 chip_name = "polaris11";
857 chip_name = "polaris10";
860 chip_name = "stoney";
866 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
867 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
870 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
873 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
874 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
875 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
877 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
878 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
881 err = amdgpu_ucode_validate(adev->gfx.me_fw);
884 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
885 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
886 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
888 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
889 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
892 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
895 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
896 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
897 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
899 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
900 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
903 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
904 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
905 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
906 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
908 adev->gfx.rlc.save_and_restore_offset =
909 le32_to_cpu(rlc_hdr->save_and_restore_offset);
910 adev->gfx.rlc.clear_state_descriptor_offset =
911 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
912 adev->gfx.rlc.avail_scratch_ram_locations =
913 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
914 adev->gfx.rlc.reg_restore_list_size =
915 le32_to_cpu(rlc_hdr->reg_restore_list_size);
916 adev->gfx.rlc.reg_list_format_start =
917 le32_to_cpu(rlc_hdr->reg_list_format_start);
918 adev->gfx.rlc.reg_list_format_separate_start =
919 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
920 adev->gfx.rlc.starting_offsets_start =
921 le32_to_cpu(rlc_hdr->starting_offsets_start);
922 adev->gfx.rlc.reg_list_format_size_bytes =
923 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
924 adev->gfx.rlc.reg_list_size_bytes =
925 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
927 adev->gfx.rlc.register_list_format =
928 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
929 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
931 if (!adev->gfx.rlc.register_list_format) {
936 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
937 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
938 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
939 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
941 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
943 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
944 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
945 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
946 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
948 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
952 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
955 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
956 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
957 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
959 if ((adev->asic_type != CHIP_STONEY) &&
960 (adev->asic_type != CHIP_TOPAZ)) {
961 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
962 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
968 adev->gfx.mec2_fw->data;
969 adev->gfx.mec2_fw_version =
970 le32_to_cpu(cp_hdr->header.ucode_version);
971 adev->gfx.mec2_feature_version =
972 le32_to_cpu(cp_hdr->ucode_feature_version);
975 adev->gfx.mec2_fw = NULL;
979 if (adev->firmware.smu_load) {
980 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
981 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
982 info->fw = adev->gfx.pfp_fw;
983 header = (const struct common_firmware_header *)info->fw->data;
984 adev->firmware.fw_size +=
985 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
987 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
988 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
989 info->fw = adev->gfx.me_fw;
990 header = (const struct common_firmware_header *)info->fw->data;
991 adev->firmware.fw_size +=
992 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
994 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
995 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
996 info->fw = adev->gfx.ce_fw;
997 header = (const struct common_firmware_header *)info->fw->data;
998 adev->firmware.fw_size +=
999 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1001 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1002 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1003 info->fw = adev->gfx.rlc_fw;
1004 header = (const struct common_firmware_header *)info->fw->data;
1005 adev->firmware.fw_size +=
1006 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1008 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1009 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1010 info->fw = adev->gfx.mec_fw;
1011 header = (const struct common_firmware_header *)info->fw->data;
1012 adev->firmware.fw_size +=
1013 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1015 if (adev->gfx.mec2_fw) {
1016 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1017 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1018 info->fw = adev->gfx.mec2_fw;
1019 header = (const struct common_firmware_header *)info->fw->data;
1020 adev->firmware.fw_size +=
1021 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029 "gfx8: Failed to load firmware \"%s\"\n",
1031 release_firmware(adev->gfx.pfp_fw);
1032 adev->gfx.pfp_fw = NULL;
1033 release_firmware(adev->gfx.me_fw);
1034 adev->gfx.me_fw = NULL;
1035 release_firmware(adev->gfx.ce_fw);
1036 adev->gfx.ce_fw = NULL;
1037 release_firmware(adev->gfx.rlc_fw);
1038 adev->gfx.rlc_fw = NULL;
1039 release_firmware(adev->gfx.mec_fw);
1040 adev->gfx.mec_fw = NULL;
1041 release_firmware(adev->gfx.mec2_fw);
1042 adev->gfx.mec2_fw = NULL;
1047 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1048 volatile u32 *buffer)
1051 const struct cs_section_def *sect = NULL;
1052 const struct cs_extent_def *ext = NULL;
1054 if (adev->gfx.rlc.cs_data == NULL)
1059 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1060 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1062 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1063 buffer[count++] = cpu_to_le32(0x80000000);
1064 buffer[count++] = cpu_to_le32(0x80000000);
1066 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1067 for (ext = sect->section; ext->extent != NULL; ++ext) {
1068 if (sect->id == SECT_CONTEXT) {
1070 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1071 buffer[count++] = cpu_to_le32(ext->reg_index -
1072 PACKET3_SET_CONTEXT_REG_START);
1073 for (i = 0; i < ext->reg_count; i++)
1074 buffer[count++] = cpu_to_le32(ext->extent[i]);
1081 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1082 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1083 PACKET3_SET_CONTEXT_REG_START);
1084 switch (adev->asic_type) {
1086 case CHIP_POLARIS10:
1087 buffer[count++] = cpu_to_le32(0x16000012);
1088 buffer[count++] = cpu_to_le32(0x0000002A);
1090 case CHIP_POLARIS11:
1091 buffer[count++] = cpu_to_le32(0x16000012);
1092 buffer[count++] = cpu_to_le32(0x00000000);
1095 buffer[count++] = cpu_to_le32(0x3a00161a);
1096 buffer[count++] = cpu_to_le32(0x0000002e);
1100 buffer[count++] = cpu_to_le32(0x00000002);
1101 buffer[count++] = cpu_to_le32(0x00000000);
1104 buffer[count++] = cpu_to_le32(0x00000000);
1105 buffer[count++] = cpu_to_le32(0x00000000);
1108 buffer[count++] = cpu_to_le32(0x00000000);
1109 buffer[count++] = cpu_to_le32(0x00000000);
1113 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1114 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1116 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1117 buffer[count++] = cpu_to_le32(0);
1120 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1122 const __le32 *fw_data;
1123 volatile u32 *dst_ptr;
1124 int me, i, max_me = 4;
1126 u32 table_offset, table_size;
1128 if (adev->asic_type == CHIP_CARRIZO)
1131 /* write the cp table buffer */
1132 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1133 for (me = 0; me < max_me; me++) {
1135 const struct gfx_firmware_header_v1_0 *hdr =
1136 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1137 fw_data = (const __le32 *)
1138 (adev->gfx.ce_fw->data +
1139 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1140 table_offset = le32_to_cpu(hdr->jt_offset);
1141 table_size = le32_to_cpu(hdr->jt_size);
1142 } else if (me == 1) {
1143 const struct gfx_firmware_header_v1_0 *hdr =
1144 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1145 fw_data = (const __le32 *)
1146 (adev->gfx.pfp_fw->data +
1147 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1148 table_offset = le32_to_cpu(hdr->jt_offset);
1149 table_size = le32_to_cpu(hdr->jt_size);
1150 } else if (me == 2) {
1151 const struct gfx_firmware_header_v1_0 *hdr =
1152 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1153 fw_data = (const __le32 *)
1154 (adev->gfx.me_fw->data +
1155 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1156 table_offset = le32_to_cpu(hdr->jt_offset);
1157 table_size = le32_to_cpu(hdr->jt_size);
1158 } else if (me == 3) {
1159 const struct gfx_firmware_header_v1_0 *hdr =
1160 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1161 fw_data = (const __le32 *)
1162 (adev->gfx.mec_fw->data +
1163 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1164 table_offset = le32_to_cpu(hdr->jt_offset);
1165 table_size = le32_to_cpu(hdr->jt_size);
1166 } else if (me == 4) {
1167 const struct gfx_firmware_header_v1_0 *hdr =
1168 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1169 fw_data = (const __le32 *)
1170 (adev->gfx.mec2_fw->data +
1171 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1172 table_offset = le32_to_cpu(hdr->jt_offset);
1173 table_size = le32_to_cpu(hdr->jt_size);
1176 for (i = 0; i < table_size; i ++) {
1177 dst_ptr[bo_offset + i] =
1178 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1181 bo_offset += table_size;
1185 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1189 /* clear state block */
1190 if (adev->gfx.rlc.clear_state_obj) {
1191 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1192 if (unlikely(r != 0))
1193 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1194 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1195 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1196 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1197 adev->gfx.rlc.clear_state_obj = NULL;
1200 /* jump table block */
1201 if (adev->gfx.rlc.cp_table_obj) {
1202 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1203 if (unlikely(r != 0))
1204 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1205 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1206 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1207 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1208 adev->gfx.rlc.cp_table_obj = NULL;
1212 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1214 volatile u32 *dst_ptr;
1216 const struct cs_section_def *cs_data;
1219 adev->gfx.rlc.cs_data = vi_cs_data;
1221 cs_data = adev->gfx.rlc.cs_data;
1224 /* clear state block */
1225 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1227 if (adev->gfx.rlc.clear_state_obj == NULL) {
1228 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1229 AMDGPU_GEM_DOMAIN_VRAM,
1230 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1232 &adev->gfx.rlc.clear_state_obj);
1234 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1235 gfx_v8_0_rlc_fini(adev);
1239 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1240 if (unlikely(r != 0)) {
1241 gfx_v8_0_rlc_fini(adev);
1244 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1245 &adev->gfx.rlc.clear_state_gpu_addr);
1247 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1248 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1249 gfx_v8_0_rlc_fini(adev);
1253 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1255 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1256 gfx_v8_0_rlc_fini(adev);
1259 /* set up the cs buffer */
1260 dst_ptr = adev->gfx.rlc.cs_ptr;
1261 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1262 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1263 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1266 if ((adev->asic_type == CHIP_CARRIZO) ||
1267 (adev->asic_type == CHIP_STONEY)) {
1268 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1269 if (adev->gfx.rlc.cp_table_obj == NULL) {
1270 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1271 AMDGPU_GEM_DOMAIN_VRAM,
1272 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1274 &adev->gfx.rlc.cp_table_obj);
1276 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1281 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1282 if (unlikely(r != 0)) {
1283 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1286 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1287 &adev->gfx.rlc.cp_table_gpu_addr);
1289 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1290 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1293 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1295 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1299 cz_init_cp_jump_table(adev);
1301 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1302 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1308 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1312 if (adev->gfx.mec.hpd_eop_obj) {
1313 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1314 if (unlikely(r != 0))
1315 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1316 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1317 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1318 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1319 adev->gfx.mec.hpd_eop_obj = NULL;
1323 #define MEC_HPD_SIZE 2048
1325 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1331 * we assign only 1 pipe because all other pipes will
1334 adev->gfx.mec.num_mec = 1;
1335 adev->gfx.mec.num_pipe = 1;
1336 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1338 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1339 r = amdgpu_bo_create(adev,
1340 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1342 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1343 &adev->gfx.mec.hpd_eop_obj);
1345 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1350 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1351 if (unlikely(r != 0)) {
1352 gfx_v8_0_mec_fini(adev);
1355 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1356 &adev->gfx.mec.hpd_eop_gpu_addr);
1358 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1359 gfx_v8_0_mec_fini(adev);
1362 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1364 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1365 gfx_v8_0_mec_fini(adev);
1369 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1371 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1372 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1377 static const u32 vgpr_init_compute_shader[] =
1379 0x7e000209, 0x7e020208,
1380 0x7e040207, 0x7e060206,
1381 0x7e080205, 0x7e0a0204,
1382 0x7e0c0203, 0x7e0e0202,
1383 0x7e100201, 0x7e120200,
1384 0x7e140209, 0x7e160208,
1385 0x7e180207, 0x7e1a0206,
1386 0x7e1c0205, 0x7e1e0204,
1387 0x7e200203, 0x7e220202,
1388 0x7e240201, 0x7e260200,
1389 0x7e280209, 0x7e2a0208,
1390 0x7e2c0207, 0x7e2e0206,
1391 0x7e300205, 0x7e320204,
1392 0x7e340203, 0x7e360202,
1393 0x7e380201, 0x7e3a0200,
1394 0x7e3c0209, 0x7e3e0208,
1395 0x7e400207, 0x7e420206,
1396 0x7e440205, 0x7e460204,
1397 0x7e480203, 0x7e4a0202,
1398 0x7e4c0201, 0x7e4e0200,
1399 0x7e500209, 0x7e520208,
1400 0x7e540207, 0x7e560206,
1401 0x7e580205, 0x7e5a0204,
1402 0x7e5c0203, 0x7e5e0202,
1403 0x7e600201, 0x7e620200,
1404 0x7e640209, 0x7e660208,
1405 0x7e680207, 0x7e6a0206,
1406 0x7e6c0205, 0x7e6e0204,
1407 0x7e700203, 0x7e720202,
1408 0x7e740201, 0x7e760200,
1409 0x7e780209, 0x7e7a0208,
1410 0x7e7c0207, 0x7e7e0206,
1411 0xbf8a0000, 0xbf810000,
1414 static const u32 sgpr_init_compute_shader[] =
1416 0xbe8a0100, 0xbe8c0102,
1417 0xbe8e0104, 0xbe900106,
1418 0xbe920108, 0xbe940100,
1419 0xbe960102, 0xbe980104,
1420 0xbe9a0106, 0xbe9c0108,
1421 0xbe9e0100, 0xbea00102,
1422 0xbea20104, 0xbea40106,
1423 0xbea60108, 0xbea80100,
1424 0xbeaa0102, 0xbeac0104,
1425 0xbeae0106, 0xbeb00108,
1426 0xbeb20100, 0xbeb40102,
1427 0xbeb60104, 0xbeb80106,
1428 0xbeba0108, 0xbebc0100,
1429 0xbebe0102, 0xbec00104,
1430 0xbec20106, 0xbec40108,
1431 0xbec60100, 0xbec80102,
1432 0xbee60004, 0xbee70005,
1433 0xbeea0006, 0xbeeb0007,
1434 0xbee80008, 0xbee90009,
1435 0xbefc0000, 0xbf8a0000,
1436 0xbf810000, 0x00000000,
1439 static const u32 vgpr_init_regs[] =
1441 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1442 mmCOMPUTE_RESOURCE_LIMITS, 0,
1443 mmCOMPUTE_NUM_THREAD_X, 256*4,
1444 mmCOMPUTE_NUM_THREAD_Y, 1,
1445 mmCOMPUTE_NUM_THREAD_Z, 1,
1446 mmCOMPUTE_PGM_RSRC2, 20,
1447 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1448 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1449 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1450 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1451 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1452 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1453 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1454 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1455 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1456 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1459 static const u32 sgpr1_init_regs[] =
1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1462 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1463 mmCOMPUTE_NUM_THREAD_X, 256*5,
1464 mmCOMPUTE_NUM_THREAD_Y, 1,
1465 mmCOMPUTE_NUM_THREAD_Z, 1,
1466 mmCOMPUTE_PGM_RSRC2, 20,
1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479 static const u32 sgpr2_init_regs[] =
1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483 mmCOMPUTE_NUM_THREAD_X, 256*5,
1484 mmCOMPUTE_NUM_THREAD_Y, 1,
1485 mmCOMPUTE_NUM_THREAD_Z, 1,
1486 mmCOMPUTE_PGM_RSRC2, 20,
1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499 static const u32 sec_ded_counter_registers[] =
1502 mmCPC_EDC_SCRATCH_CNT,
1503 mmCPC_EDC_UCODE_CNT,
1510 mmDC_EDC_CSINVOC_CNT,
1511 mmDC_EDC_RESTORE_CNT,
1517 mmSQC_ATC_EDC_GATCL1_CNT,
1523 mmTCP_ATC_EDC_GATCL1_CNT,
1528 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1530 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1531 struct amdgpu_ib ib;
1532 struct fence *f = NULL;
1535 unsigned total_size, vgpr_offset, sgpr_offset;
1538 /* only supported on CZ */
1539 if (adev->asic_type != CHIP_CARRIZO)
1542 /* bail if the compute ring is not ready */
1546 tmp = RREG32(mmGB_EDC_MODE);
1547 WREG32(mmGB_EDC_MODE, 0);
1550 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1552 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555 total_size = ALIGN(total_size, 256);
1556 vgpr_offset = total_size;
1557 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1558 sgpr_offset = total_size;
1559 total_size += sizeof(sgpr_init_compute_shader);
1561 /* allocate an indirect buffer to put the commands in */
1562 memset(&ib, 0, sizeof(ib));
1563 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1565 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1569 /* load the compute shaders */
1570 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1571 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1573 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1574 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1576 /* init the ib length to 0 */
1580 /* write the register state for the compute dispatch */
1581 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1583 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1584 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1586 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1587 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1588 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1589 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1590 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1591 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1593 /* write dispatch packet */
1594 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1595 ib.ptr[ib.length_dw++] = 8; /* x */
1596 ib.ptr[ib.length_dw++] = 1; /* y */
1597 ib.ptr[ib.length_dw++] = 1; /* z */
1598 ib.ptr[ib.length_dw++] =
1599 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1601 /* write CS partial flush packet */
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1603 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1606 /* write the register state for the compute dispatch */
1607 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1609 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1610 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1612 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1613 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1615 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1616 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1617 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1619 /* write dispatch packet */
1620 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1621 ib.ptr[ib.length_dw++] = 8; /* x */
1622 ib.ptr[ib.length_dw++] = 1; /* y */
1623 ib.ptr[ib.length_dw++] = 1; /* z */
1624 ib.ptr[ib.length_dw++] =
1625 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1627 /* write CS partial flush packet */
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1629 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1632 /* write the register state for the compute dispatch */
1633 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1635 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1638 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1639 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1641 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1642 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1643 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1645 /* write dispatch packet */
1646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1647 ib.ptr[ib.length_dw++] = 8; /* x */
1648 ib.ptr[ib.length_dw++] = 1; /* y */
1649 ib.ptr[ib.length_dw++] = 1; /* z */
1650 ib.ptr[ib.length_dw++] =
1651 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1653 /* write CS partial flush packet */
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1655 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1657 /* shedule the ib on the ring */
1658 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1660 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1664 /* wait for the GPU to finish processing the IB */
1665 r = fence_wait(f, false);
1667 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1671 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1672 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1673 WREG32(mmGB_EDC_MODE, tmp);
1675 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1676 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1677 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1680 /* read back registers to clear the counters */
1681 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1682 RREG32(sec_ded_counter_registers[i]);
1685 amdgpu_ib_free(adev, &ib, NULL);
1691 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1694 u32 mc_shared_chmap, mc_arb_ramcfg;
1695 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1699 switch (adev->asic_type) {
1701 adev->gfx.config.max_shader_engines = 1;
1702 adev->gfx.config.max_tile_pipes = 2;
1703 adev->gfx.config.max_cu_per_sh = 6;
1704 adev->gfx.config.max_sh_per_se = 1;
1705 adev->gfx.config.max_backends_per_se = 2;
1706 adev->gfx.config.max_texture_channel_caches = 2;
1707 adev->gfx.config.max_gprs = 256;
1708 adev->gfx.config.max_gs_threads = 32;
1709 adev->gfx.config.max_hw_contexts = 8;
1711 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1712 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1713 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1714 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1715 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1718 adev->gfx.config.max_shader_engines = 4;
1719 adev->gfx.config.max_tile_pipes = 16;
1720 adev->gfx.config.max_cu_per_sh = 16;
1721 adev->gfx.config.max_sh_per_se = 1;
1722 adev->gfx.config.max_backends_per_se = 4;
1723 adev->gfx.config.max_texture_channel_caches = 16;
1724 adev->gfx.config.max_gprs = 256;
1725 adev->gfx.config.max_gs_threads = 32;
1726 adev->gfx.config.max_hw_contexts = 8;
1728 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1729 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1730 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1731 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1732 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1734 case CHIP_POLARIS11:
1735 ret = amdgpu_atombios_get_gfx_info(adev);
1738 adev->gfx.config.max_gprs = 256;
1739 adev->gfx.config.max_gs_threads = 32;
1740 adev->gfx.config.max_hw_contexts = 8;
1742 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1743 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1744 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1745 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1746 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1748 case CHIP_POLARIS10:
1749 ret = amdgpu_atombios_get_gfx_info(adev);
1752 adev->gfx.config.max_gprs = 256;
1753 adev->gfx.config.max_gs_threads = 32;
1754 adev->gfx.config.max_hw_contexts = 8;
1756 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1757 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1758 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1759 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1760 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1763 adev->gfx.config.max_shader_engines = 4;
1764 adev->gfx.config.max_tile_pipes = 8;
1765 adev->gfx.config.max_cu_per_sh = 8;
1766 adev->gfx.config.max_sh_per_se = 1;
1767 adev->gfx.config.max_backends_per_se = 2;
1768 adev->gfx.config.max_texture_channel_caches = 8;
1769 adev->gfx.config.max_gprs = 256;
1770 adev->gfx.config.max_gs_threads = 32;
1771 adev->gfx.config.max_hw_contexts = 8;
1773 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1774 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1775 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1776 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1777 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780 adev->gfx.config.max_shader_engines = 1;
1781 adev->gfx.config.max_tile_pipes = 2;
1782 adev->gfx.config.max_sh_per_se = 1;
1783 adev->gfx.config.max_backends_per_se = 2;
1785 switch (adev->pdev->revision) {
1793 adev->gfx.config.max_cu_per_sh = 8;
1803 adev->gfx.config.max_cu_per_sh = 6;
1810 adev->gfx.config.max_cu_per_sh = 6;
1819 adev->gfx.config.max_cu_per_sh = 4;
1823 adev->gfx.config.max_texture_channel_caches = 2;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 32;
1826 adev->gfx.config.max_hw_contexts = 8;
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1835 adev->gfx.config.max_shader_engines = 1;
1836 adev->gfx.config.max_tile_pipes = 2;
1837 adev->gfx.config.max_sh_per_se = 1;
1838 adev->gfx.config.max_backends_per_se = 1;
1840 switch (adev->pdev->revision) {
1847 adev->gfx.config.max_cu_per_sh = 3;
1853 adev->gfx.config.max_cu_per_sh = 2;
1857 adev->gfx.config.max_texture_channel_caches = 2;
1858 adev->gfx.config.max_gprs = 256;
1859 adev->gfx.config.max_gs_threads = 16;
1860 adev->gfx.config.max_hw_contexts = 8;
1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 adev->gfx.config.max_shader_engines = 2;
1870 adev->gfx.config.max_tile_pipes = 4;
1871 adev->gfx.config.max_cu_per_sh = 2;
1872 adev->gfx.config.max_sh_per_se = 1;
1873 adev->gfx.config.max_backends_per_se = 2;
1874 adev->gfx.config.max_texture_channel_caches = 4;
1875 adev->gfx.config.max_gprs = 256;
1876 adev->gfx.config.max_gs_threads = 32;
1877 adev->gfx.config.max_hw_contexts = 8;
1879 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1891 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892 adev->gfx.config.mem_max_burst_length_bytes = 256;
1893 if (adev->flags & AMD_IS_APU) {
1894 /* Get memory bank mapping mode. */
1895 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1899 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1903 /* Validate settings in case only one DIMM installed. */
1904 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905 dimm00_addr_map = 0;
1906 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907 dimm01_addr_map = 0;
1908 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909 dimm10_addr_map = 0;
1910 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911 dimm11_addr_map = 0;
1913 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916 adev->gfx.config.mem_row_size_in_kb = 2;
1918 adev->gfx.config.mem_row_size_in_kb = 1;
1920 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922 if (adev->gfx.config.mem_row_size_in_kb > 4)
1923 adev->gfx.config.mem_row_size_in_kb = 4;
1926 adev->gfx.config.shader_engine_tile_size = 32;
1927 adev->gfx.config.num_gpus = 1;
1928 adev->gfx.config.multi_gpu_tile_size = 64;
1930 /* fix up row size */
1931 switch (adev->gfx.config.mem_row_size_in_kb) {
1934 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1937 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1940 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1943 adev->gfx.config.gb_addr_config = gb_addr_config;
1948 static int gfx_v8_0_sw_init(void *handle)
1951 struct amdgpu_ring *ring;
1952 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1955 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1959 /* Privileged reg */
1960 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1964 /* Privileged inst */
1965 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1969 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1971 gfx_v8_0_scratch_init(adev);
1973 r = gfx_v8_0_init_microcode(adev);
1975 DRM_ERROR("Failed to load gfx firmware!\n");
1979 r = gfx_v8_0_rlc_init(adev);
1981 DRM_ERROR("Failed to init rlc BOs!\n");
1985 r = gfx_v8_0_mec_init(adev);
1987 DRM_ERROR("Failed to init MEC BOs!\n");
1991 /* set up the gfx ring */
1992 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1993 ring = &adev->gfx.gfx_ring[i];
1994 ring->ring_obj = NULL;
1995 sprintf(ring->name, "gfx");
1996 /* no gfx doorbells on iceland */
1997 if (adev->asic_type != CHIP_TOPAZ) {
1998 ring->use_doorbell = true;
1999 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2002 r = amdgpu_ring_init(adev, ring, 1024,
2003 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2004 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2005 AMDGPU_RING_TYPE_GFX);
2010 /* set up the compute queues */
2011 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2014 /* max 32 queues per MEC */
2015 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2016 DRM_ERROR("Too many (%d) compute rings!\n", i);
2019 ring = &adev->gfx.compute_ring[i];
2020 ring->ring_obj = NULL;
2021 ring->use_doorbell = true;
2022 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2023 ring->me = 1; /* first MEC */
2025 ring->queue = i % 8;
2026 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2027 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2028 /* type-2 packets are deprecated on MEC, use type-3 instead */
2029 r = amdgpu_ring_init(adev, ring, 1024,
2030 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2031 &adev->gfx.eop_irq, irq_type,
2032 AMDGPU_RING_TYPE_COMPUTE);
2037 /* reserve GDS, GWS and OA resource for gfx */
2038 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2039 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2040 &adev->gds.gds_gfx_bo, NULL, NULL);
2044 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2045 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2046 &adev->gds.gws_gfx_bo, NULL, NULL);
2050 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2051 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2052 &adev->gds.oa_gfx_bo, NULL, NULL);
2056 adev->gfx.ce_ram_size = 0x8000;
2058 r = gfx_v8_0_gpu_early_init(adev);
2065 static int gfx_v8_0_sw_fini(void *handle)
2068 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2070 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2071 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2072 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2074 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2075 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2076 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2077 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079 gfx_v8_0_mec_fini(adev);
2080 gfx_v8_0_rlc_fini(adev);
2081 gfx_v8_0_free_microcode(adev);
2086 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2088 uint32_t *modearray, *mod2array;
2089 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2090 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2093 modearray = adev->gfx.config.tile_mode_array;
2094 mod2array = adev->gfx.config.macrotile_mode_array;
2096 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2097 modearray[reg_offset] = 0;
2099 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2100 mod2array[reg_offset] = 0;
2102 switch (adev->asic_type) {
2104 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105 PIPE_CONFIG(ADDR_SURF_P2) |
2106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2108 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2109 PIPE_CONFIG(ADDR_SURF_P2) |
2110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2112 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2116 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2120 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2124 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2131 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2132 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2133 PIPE_CONFIG(ADDR_SURF_P2));
2134 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2135 PIPE_CONFIG(ADDR_SURF_P2) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139 PIPE_CONFIG(ADDR_SURF_P2) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143 PIPE_CONFIG(ADDR_SURF_P2) |
2144 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2146 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147 PIPE_CONFIG(ADDR_SURF_P2) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2150 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 PIPE_CONFIG(ADDR_SURF_P2) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2154 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2155 PIPE_CONFIG(ADDR_SURF_P2) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159 PIPE_CONFIG(ADDR_SURF_P2) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2162 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2163 PIPE_CONFIG(ADDR_SURF_P2) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2166 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2167 PIPE_CONFIG(ADDR_SURF_P2) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2170 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2171 PIPE_CONFIG(ADDR_SURF_P2) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2174 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2175 PIPE_CONFIG(ADDR_SURF_P2) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2178 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2179 PIPE_CONFIG(ADDR_SURF_P2) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2182 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2183 PIPE_CONFIG(ADDR_SURF_P2) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2186 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2187 PIPE_CONFIG(ADDR_SURF_P2) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2190 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2191 PIPE_CONFIG(ADDR_SURF_P2) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2194 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195 PIPE_CONFIG(ADDR_SURF_P2) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199 PIPE_CONFIG(ADDR_SURF_P2) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203 PIPE_CONFIG(ADDR_SURF_P2) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2207 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210 NUM_BANKS(ADDR_SURF_8_BANK));
2211 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2214 NUM_BANKS(ADDR_SURF_8_BANK));
2215 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2218 NUM_BANKS(ADDR_SURF_8_BANK));
2219 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2222 NUM_BANKS(ADDR_SURF_8_BANK));
2223 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2226 NUM_BANKS(ADDR_SURF_8_BANK));
2227 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2230 NUM_BANKS(ADDR_SURF_8_BANK));
2231 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2234 NUM_BANKS(ADDR_SURF_8_BANK));
2235 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238 NUM_BANKS(ADDR_SURF_16_BANK));
2239 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242 NUM_BANKS(ADDR_SURF_16_BANK));
2243 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2247 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2250 NUM_BANKS(ADDR_SURF_16_BANK));
2251 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254 NUM_BANKS(ADDR_SURF_16_BANK));
2255 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2259 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 NUM_BANKS(ADDR_SURF_8_BANK));
2264 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2265 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2267 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2269 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2270 if (reg_offset != 7)
2271 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2275 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2307 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2309 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2321 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2334 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2341 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2345 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2354 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2362 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2366 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2370 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2373 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2374 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2377 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2398 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401 NUM_BANKS(ADDR_SURF_8_BANK));
2402 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2405 NUM_BANKS(ADDR_SURF_8_BANK));
2406 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2409 NUM_BANKS(ADDR_SURF_8_BANK));
2410 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2413 NUM_BANKS(ADDR_SURF_8_BANK));
2414 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2417 NUM_BANKS(ADDR_SURF_8_BANK));
2418 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2421 NUM_BANKS(ADDR_SURF_8_BANK));
2422 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425 NUM_BANKS(ADDR_SURF_8_BANK));
2426 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2429 NUM_BANKS(ADDR_SURF_8_BANK));
2430 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433 NUM_BANKS(ADDR_SURF_8_BANK));
2434 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437 NUM_BANKS(ADDR_SURF_8_BANK));
2438 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2441 NUM_BANKS(ADDR_SURF_8_BANK));
2442 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 NUM_BANKS(ADDR_SURF_8_BANK));
2446 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 NUM_BANKS(ADDR_SURF_8_BANK));
2450 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453 NUM_BANKS(ADDR_SURF_4_BANK));
2455 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2456 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2458 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2459 if (reg_offset != 7)
2460 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2464 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2468 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2472 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2476 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2488 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2492 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2496 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2498 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2507 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2510 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2511 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2512 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2514 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2522 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2523 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2525 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2530 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2535 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2538 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2539 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2541 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2542 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2543 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2545 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2546 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2550 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2555 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2559 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2562 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2563 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2566 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2587 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2590 NUM_BANKS(ADDR_SURF_16_BANK));
2591 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594 NUM_BANKS(ADDR_SURF_16_BANK));
2595 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2597 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2598 NUM_BANKS(ADDR_SURF_16_BANK));
2599 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2601 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2602 NUM_BANKS(ADDR_SURF_16_BANK));
2603 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2605 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606 NUM_BANKS(ADDR_SURF_16_BANK));
2607 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610 NUM_BANKS(ADDR_SURF_16_BANK));
2611 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614 NUM_BANKS(ADDR_SURF_16_BANK));
2615 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618 NUM_BANKS(ADDR_SURF_16_BANK));
2619 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622 NUM_BANKS(ADDR_SURF_16_BANK));
2623 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2625 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2626 NUM_BANKS(ADDR_SURF_16_BANK));
2627 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630 NUM_BANKS(ADDR_SURF_16_BANK));
2631 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 NUM_BANKS(ADDR_SURF_8_BANK));
2635 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638 NUM_BANKS(ADDR_SURF_4_BANK));
2639 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 NUM_BANKS(ADDR_SURF_4_BANK));
2644 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2645 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2647 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648 if (reg_offset != 7)
2649 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2652 case CHIP_POLARIS11:
2653 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2657 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2661 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2687 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2699 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2703 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2727 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2731 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2760 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2776 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779 NUM_BANKS(ADDR_SURF_16_BANK));
2781 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2784 NUM_BANKS(ADDR_SURF_16_BANK));
2786 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2789 NUM_BANKS(ADDR_SURF_16_BANK));
2791 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 NUM_BANKS(ADDR_SURF_16_BANK));
2796 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799 NUM_BANKS(ADDR_SURF_16_BANK));
2801 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804 NUM_BANKS(ADDR_SURF_16_BANK));
2806 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2809 NUM_BANKS(ADDR_SURF_16_BANK));
2811 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2812 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2813 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2814 NUM_BANKS(ADDR_SURF_16_BANK));
2816 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819 NUM_BANKS(ADDR_SURF_16_BANK));
2821 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 NUM_BANKS(ADDR_SURF_16_BANK));
2826 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 NUM_BANKS(ADDR_SURF_16_BANK));
2831 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2834 NUM_BANKS(ADDR_SURF_16_BANK));
2836 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 NUM_BANKS(ADDR_SURF_8_BANK));
2841 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 NUM_BANKS(ADDR_SURF_4_BANK));
2846 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2847 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2849 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2850 if (reg_offset != 7)
2851 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2854 case CHIP_POLARIS10:
2855 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2859 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2880 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2888 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2889 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2890 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2905 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2923 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2925 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2929 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2933 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2946 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2947 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2966 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2978 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981 NUM_BANKS(ADDR_SURF_16_BANK));
2983 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 NUM_BANKS(ADDR_SURF_16_BANK));
2988 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 NUM_BANKS(ADDR_SURF_16_BANK));
2993 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 NUM_BANKS(ADDR_SURF_16_BANK));
2998 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3000 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3001 NUM_BANKS(ADDR_SURF_16_BANK));
3003 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3006 NUM_BANKS(ADDR_SURF_16_BANK));
3008 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3011 NUM_BANKS(ADDR_SURF_16_BANK));
3013 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 NUM_BANKS(ADDR_SURF_16_BANK));
3018 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 NUM_BANKS(ADDR_SURF_16_BANK));
3023 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026 NUM_BANKS(ADDR_SURF_16_BANK));
3028 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3031 NUM_BANKS(ADDR_SURF_16_BANK));
3033 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3036 NUM_BANKS(ADDR_SURF_8_BANK));
3038 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041 NUM_BANKS(ADDR_SURF_4_BANK));
3043 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3046 NUM_BANKS(ADDR_SURF_4_BANK));
3048 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3049 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3051 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3052 if (reg_offset != 7)
3053 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3057 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P2) |
3059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3061 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P2) |
3063 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3065 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P2) |
3067 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P2) |
3071 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P2) |
3075 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P2) |
3079 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082 PIPE_CONFIG(ADDR_SURF_P2) |
3083 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3086 PIPE_CONFIG(ADDR_SURF_P2));
3087 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3088 PIPE_CONFIG(ADDR_SURF_P2) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092 PIPE_CONFIG(ADDR_SURF_P2) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3095 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3096 PIPE_CONFIG(ADDR_SURF_P2) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3099 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3100 PIPE_CONFIG(ADDR_SURF_P2) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 PIPE_CONFIG(ADDR_SURF_P2) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3108 PIPE_CONFIG(ADDR_SURF_P2) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 PIPE_CONFIG(ADDR_SURF_P2) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3119 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3120 PIPE_CONFIG(ADDR_SURF_P2) |
3121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3123 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3124 PIPE_CONFIG(ADDR_SURF_P2) |
3125 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3128 PIPE_CONFIG(ADDR_SURF_P2) |
3129 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3136 PIPE_CONFIG(ADDR_SURF_P2) |
3137 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3140 PIPE_CONFIG(ADDR_SURF_P2) |
3141 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3144 PIPE_CONFIG(ADDR_SURF_P2) |
3145 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 PIPE_CONFIG(ADDR_SURF_P2) |
3149 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152 PIPE_CONFIG(ADDR_SURF_P2) |
3153 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156 PIPE_CONFIG(ADDR_SURF_P2) |
3157 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 NUM_BANKS(ADDR_SURF_8_BANK));
3164 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3167 NUM_BANKS(ADDR_SURF_8_BANK));
3168 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171 NUM_BANKS(ADDR_SURF_8_BANK));
3172 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175 NUM_BANKS(ADDR_SURF_8_BANK));
3176 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179 NUM_BANKS(ADDR_SURF_8_BANK));
3180 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183 NUM_BANKS(ADDR_SURF_8_BANK));
3184 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187 NUM_BANKS(ADDR_SURF_8_BANK));
3188 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3191 NUM_BANKS(ADDR_SURF_16_BANK));
3192 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195 NUM_BANKS(ADDR_SURF_16_BANK));
3196 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199 NUM_BANKS(ADDR_SURF_16_BANK));
3200 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203 NUM_BANKS(ADDR_SURF_16_BANK));
3204 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207 NUM_BANKS(ADDR_SURF_16_BANK));
3208 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211 NUM_BANKS(ADDR_SURF_16_BANK));
3212 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215 NUM_BANKS(ADDR_SURF_8_BANK));
3217 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3218 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3220 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3223 if (reg_offset != 7)
3224 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3229 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3233 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242 PIPE_CONFIG(ADDR_SURF_P2) |
3243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3262 PIPE_CONFIG(ADDR_SURF_P2));
3263 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3271 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3275 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3283 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3291 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339 NUM_BANKS(ADDR_SURF_8_BANK));
3340 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343 NUM_BANKS(ADDR_SURF_8_BANK));
3344 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3347 NUM_BANKS(ADDR_SURF_8_BANK));
3348 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3351 NUM_BANKS(ADDR_SURF_8_BANK));
3352 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355 NUM_BANKS(ADDR_SURF_8_BANK));
3356 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359 NUM_BANKS(ADDR_SURF_8_BANK));
3360 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363 NUM_BANKS(ADDR_SURF_8_BANK));
3364 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 NUM_BANKS(ADDR_SURF_16_BANK));
3368 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 NUM_BANKS(ADDR_SURF_16_BANK));
3372 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 NUM_BANKS(ADDR_SURF_16_BANK));
3376 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 NUM_BANKS(ADDR_SURF_16_BANK));
3380 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383 NUM_BANKS(ADDR_SURF_16_BANK));
3384 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387 NUM_BANKS(ADDR_SURF_16_BANK));
3388 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391 NUM_BANKS(ADDR_SURF_8_BANK));
3393 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3394 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3396 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3398 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3399 if (reg_offset != 7)
3400 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3406 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3407 u32 se_num, u32 sh_num, u32 instance)
3411 if (instance == 0xffffffff)
3412 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3414 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3416 if (se_num == 0xffffffff)
3417 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3419 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3421 if (sh_num == 0xffffffff)
3422 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3424 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3426 WREG32(mmGRBM_GFX_INDEX, data);
3429 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3431 return (u32)((1ULL << bit_width) - 1);
3434 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3438 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3439 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3441 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3443 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3444 adev->gfx.config.max_sh_per_se);
3446 return (~data) & mask;
3450 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3452 switch (adev->asic_type) {
3454 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3455 RB_XSEL2(1) | PKR_MAP(2) |
3456 PKR_XSEL(1) | PKR_YSEL(1) |
3457 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3458 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3462 case CHIP_POLARIS10:
3463 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3464 SE_XSEL(1) | SE_YSEL(1);
3465 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3470 *rconf |= RB_MAP_PKR0(2);
3473 case CHIP_POLARIS11:
3474 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3475 SE_XSEL(1) | SE_YSEL(1);
3483 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3489 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3490 u32 raster_config, u32 raster_config_1,
3491 unsigned rb_mask, unsigned num_rb)
3493 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3494 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3495 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3496 unsigned rb_per_se = num_rb / num_se;
3497 unsigned se_mask[4];
3500 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3501 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3502 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3503 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3505 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3506 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3507 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3509 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3510 (!se_mask[2] && !se_mask[3]))) {
3511 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3513 if (!se_mask[0] && !se_mask[1]) {
3515 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3518 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3522 for (se = 0; se < num_se; se++) {
3523 unsigned raster_config_se = raster_config;
3524 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3525 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3526 int idx = (se / 2) * 2;
3528 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3529 raster_config_se &= ~SE_MAP_MASK;
3531 if (!se_mask[idx]) {
3532 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3534 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3538 pkr0_mask &= rb_mask;
3539 pkr1_mask &= rb_mask;
3540 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3541 raster_config_se &= ~PKR_MAP_MASK;
3544 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3546 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3550 if (rb_per_se >= 2) {
3551 unsigned rb0_mask = 1 << (se * rb_per_se);
3552 unsigned rb1_mask = rb0_mask << 1;
3554 rb0_mask &= rb_mask;
3555 rb1_mask &= rb_mask;
3556 if (!rb0_mask || !rb1_mask) {
3557 raster_config_se &= ~RB_MAP_PKR0_MASK;
3561 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3564 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3568 if (rb_per_se > 2) {
3569 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3570 rb1_mask = rb0_mask << 1;
3571 rb0_mask &= rb_mask;
3572 rb1_mask &= rb_mask;
3573 if (!rb0_mask || !rb1_mask) {
3574 raster_config_se &= ~RB_MAP_PKR1_MASK;
3578 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3581 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3587 /* GRBM_GFX_INDEX has a different offset on VI */
3588 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3589 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3590 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3593 /* GRBM_GFX_INDEX has a different offset on VI */
3594 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3597 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3601 u32 raster_config = 0, raster_config_1 = 0;
3603 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3604 adev->gfx.config.max_sh_per_se;
3605 unsigned num_rb_pipes;
3607 mutex_lock(&adev->grbm_idx_mutex);
3608 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3609 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3610 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3611 data = gfx_v8_0_get_rb_active_bitmap(adev);
3612 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3613 rb_bitmap_width_per_sh);
3616 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3618 adev->gfx.config.backend_enable_mask = active_rbs;
3619 adev->gfx.config.num_rbs = hweight32(active_rbs);
3621 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3622 adev->gfx.config.max_shader_engines, 16);
3624 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3626 if (!adev->gfx.config.backend_enable_mask ||
3627 adev->gfx.config.num_rbs >= num_rb_pipes) {
3628 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3629 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3631 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3632 adev->gfx.config.backend_enable_mask,
3636 mutex_unlock(&adev->grbm_idx_mutex);
3640 * gfx_v8_0_init_compute_vmid - gart enable
3642 * @rdev: amdgpu_device pointer
3644 * Initialize compute vmid sh_mem registers
3647 #define DEFAULT_SH_MEM_BASES (0x6000)
3648 #define FIRST_COMPUTE_VMID (8)
3649 #define LAST_COMPUTE_VMID (16)
3650 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3653 uint32_t sh_mem_config;
3654 uint32_t sh_mem_bases;
3657 * Configure apertures:
3658 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3659 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3660 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3662 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3664 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3665 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3666 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3667 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3668 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3669 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3671 mutex_lock(&adev->srbm_mutex);
3672 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3673 vi_srbm_select(adev, 0, 0, 0, i);
3674 /* CP and shaders */
3675 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3676 WREG32(mmSH_MEM_APE1_BASE, 1);
3677 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3678 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3680 vi_srbm_select(adev, 0, 0, 0, 0);
3681 mutex_unlock(&adev->srbm_mutex);
3684 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3689 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3690 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3691 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3692 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3694 gfx_v8_0_tiling_mode_table_init(adev);
3695 gfx_v8_0_setup_rb(adev);
3696 gfx_v8_0_get_cu_info(adev);
3698 /* XXX SH_MEM regs */
3699 /* where to put LDS, scratch, GPUVM in FSA64 space */
3700 mutex_lock(&adev->srbm_mutex);
3701 for (i = 0; i < 16; i++) {
3702 vi_srbm_select(adev, 0, 0, 0, i);
3703 /* CP and shaders */
3705 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3706 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3707 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3708 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3709 WREG32(mmSH_MEM_CONFIG, tmp);
3711 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3712 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3713 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3714 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3715 WREG32(mmSH_MEM_CONFIG, tmp);
3718 WREG32(mmSH_MEM_APE1_BASE, 1);
3719 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3720 WREG32(mmSH_MEM_BASES, 0);
3722 vi_srbm_select(adev, 0, 0, 0, 0);
3723 mutex_unlock(&adev->srbm_mutex);
3725 gfx_v8_0_init_compute_vmid(adev);
3727 mutex_lock(&adev->grbm_idx_mutex);
3729 * making sure that the following register writes will be broadcasted
3730 * to all the shaders
3732 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3734 WREG32(mmPA_SC_FIFO_SIZE,
3735 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3736 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3737 (adev->gfx.config.sc_prim_fifo_size_backend <<
3738 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3739 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3740 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3741 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3742 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3743 mutex_unlock(&adev->grbm_idx_mutex);
3747 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3752 mutex_lock(&adev->grbm_idx_mutex);
3753 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3754 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3755 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3756 for (k = 0; k < adev->usec_timeout; k++) {
3757 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3763 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3764 mutex_unlock(&adev->grbm_idx_mutex);
3766 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3767 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3768 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3769 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3770 for (k = 0; k < adev->usec_timeout; k++) {
3771 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3777 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3780 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3782 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3783 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3784 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3785 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3787 WREG32(mmCP_INT_CNTL_RING0, tmp);
3790 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3793 WREG32(mmRLC_CSIB_ADDR_HI,
3794 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3795 WREG32(mmRLC_CSIB_ADDR_LO,
3796 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3797 WREG32(mmRLC_CSIB_LENGTH,
3798 adev->gfx.rlc.clear_state_size);
3801 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3804 int *unique_indices,
3807 int *ind_start_offsets,
3812 bool new_entry = true;
3814 for (; ind_offset < list_size; ind_offset++) {
3818 ind_start_offsets[*offset_count] = ind_offset;
3819 *offset_count = *offset_count + 1;
3820 BUG_ON(*offset_count >= max_offset);
3823 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3830 /* look for the matching indice */
3832 indices < *indices_count;
3834 if (unique_indices[indices] ==
3835 register_list_format[ind_offset])
3839 if (indices >= *indices_count) {
3840 unique_indices[*indices_count] =
3841 register_list_format[ind_offset];
3842 indices = *indices_count;
3843 *indices_count = *indices_count + 1;
3844 BUG_ON(*indices_count >= max_indices);
3847 register_list_format[ind_offset] = indices;
3851 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3854 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3855 int indices_count = 0;
3856 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3857 int offset_count = 0;
3860 unsigned int *register_list_format =
3861 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3862 if (register_list_format == NULL)
3864 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3865 adev->gfx.rlc.reg_list_format_size_bytes);
3867 gfx_v8_0_parse_ind_reg_list(register_list_format,
3868 RLC_FormatDirectRegListLength,
3869 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3872 sizeof(unique_indices) / sizeof(int),
3873 indirect_start_offsets,
3875 sizeof(indirect_start_offsets)/sizeof(int));
3877 /* save and restore list */
3878 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3880 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3881 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3882 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3885 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3886 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3887 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3889 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3890 list_size = list_size >> 1;
3891 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3892 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3894 /* starting offsets starts */
3895 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3896 adev->gfx.rlc.starting_offsets_start);
3897 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3898 WREG32(mmRLC_GPM_SCRATCH_DATA,
3899 indirect_start_offsets[i]);
3901 /* unique indices */
3902 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3903 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3904 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3905 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3906 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3908 kfree(register_list_format);
3913 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3915 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3918 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3922 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3923 AMD_PG_SUPPORT_GFX_SMG |
3924 AMD_PG_SUPPORT_GFX_DMG)) {
3925 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3927 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3928 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3929 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3930 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3931 WREG32(mmRLC_PG_DELAY, data);
3933 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3934 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3938 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3941 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3944 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3947 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3950 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3952 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3955 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3957 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3958 AMD_PG_SUPPORT_GFX_SMG |
3959 AMD_PG_SUPPORT_GFX_DMG |
3961 AMD_PG_SUPPORT_GDS |
3962 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3963 gfx_v8_0_init_csb(adev);
3964 gfx_v8_0_init_save_restore_list(adev);
3965 gfx_v8_0_enable_save_restore_machine(adev);
3967 if ((adev->asic_type == CHIP_CARRIZO) ||
3968 (adev->asic_type == CHIP_STONEY)) {
3969 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3970 gfx_v8_0_init_power_gating(adev);
3971 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3972 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3973 cz_enable_sck_slow_down_on_power_up(adev, true);
3974 cz_enable_sck_slow_down_on_power_down(adev, true);
3976 cz_enable_sck_slow_down_on_power_up(adev, false);
3977 cz_enable_sck_slow_down_on_power_down(adev, false);
3979 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3980 cz_enable_cp_power_gating(adev, true);
3982 cz_enable_cp_power_gating(adev, false);
3983 } else if (adev->asic_type == CHIP_POLARIS11) {
3984 gfx_v8_0_init_power_gating(adev);
3989 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3991 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3993 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3994 gfx_v8_0_wait_for_rlc_serdes(adev);
3997 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3999 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4002 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4006 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4008 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4010 /* carrizo do enable cp interrupt after cp inited */
4011 if (!(adev->flags & AMD_IS_APU))
4012 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4017 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4019 const struct rlc_firmware_header_v2_0 *hdr;
4020 const __le32 *fw_data;
4021 unsigned i, fw_size;
4023 if (!adev->gfx.rlc_fw)
4026 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4027 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4029 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4030 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4031 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4033 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4034 for (i = 0; i < fw_size; i++)
4035 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4036 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4041 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4046 gfx_v8_0_rlc_stop(adev);
4049 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4050 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4051 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4052 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4053 if (adev->asic_type == CHIP_POLARIS11 ||
4054 adev->asic_type == CHIP_POLARIS10) {
4055 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4057 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4061 WREG32(mmRLC_PG_CNTL, 0);
4063 gfx_v8_0_rlc_reset(adev);
4064 gfx_v8_0_init_pg(adev);
4066 if (!adev->pp_enabled) {
4067 if (!adev->firmware.smu_load) {
4068 /* legacy rlc firmware loading */
4069 r = gfx_v8_0_rlc_load_microcode(adev);
4073 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4074 AMDGPU_UCODE_ID_RLC_G);
4080 gfx_v8_0_rlc_start(adev);
4085 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4088 u32 tmp = RREG32(mmCP_ME_CNTL);
4091 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4092 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4093 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4095 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4096 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4097 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4098 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4099 adev->gfx.gfx_ring[i].ready = false;
4101 WREG32(mmCP_ME_CNTL, tmp);
4105 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4107 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4108 const struct gfx_firmware_header_v1_0 *ce_hdr;
4109 const struct gfx_firmware_header_v1_0 *me_hdr;
4110 const __le32 *fw_data;
4111 unsigned i, fw_size;
4113 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4116 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4117 adev->gfx.pfp_fw->data;
4118 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4119 adev->gfx.ce_fw->data;
4120 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4121 adev->gfx.me_fw->data;
4123 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4124 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4125 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4127 gfx_v8_0_cp_gfx_enable(adev, false);
4130 fw_data = (const __le32 *)
4131 (adev->gfx.pfp_fw->data +
4132 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4133 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4134 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4135 for (i = 0; i < fw_size; i++)
4136 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4137 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4140 fw_data = (const __le32 *)
4141 (adev->gfx.ce_fw->data +
4142 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4143 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4144 WREG32(mmCP_CE_UCODE_ADDR, 0);
4145 for (i = 0; i < fw_size; i++)
4146 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4147 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4150 fw_data = (const __le32 *)
4151 (adev->gfx.me_fw->data +
4152 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4153 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4154 WREG32(mmCP_ME_RAM_WADDR, 0);
4155 for (i = 0; i < fw_size; i++)
4156 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4157 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4162 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4165 const struct cs_section_def *sect = NULL;
4166 const struct cs_extent_def *ext = NULL;
4168 /* begin clear state */
4170 /* context control state */
4173 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4174 for (ext = sect->section; ext->extent != NULL; ++ext) {
4175 if (sect->id == SECT_CONTEXT)
4176 count += 2 + ext->reg_count;
4181 /* pa_sc_raster_config/pa_sc_raster_config1 */
4183 /* end clear state */
4191 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4193 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4194 const struct cs_section_def *sect = NULL;
4195 const struct cs_extent_def *ext = NULL;
4199 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4200 WREG32(mmCP_ENDIAN_SWAP, 0);
4201 WREG32(mmCP_DEVICE_ID, 1);
4203 gfx_v8_0_cp_gfx_enable(adev, true);
4205 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4207 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4211 /* clear state buffer */
4212 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4215 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4216 amdgpu_ring_write(ring, 0x80000000);
4217 amdgpu_ring_write(ring, 0x80000000);
4219 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220 for (ext = sect->section; ext->extent != NULL; ++ext) {
4221 if (sect->id == SECT_CONTEXT) {
4222 amdgpu_ring_write(ring,
4223 PACKET3(PACKET3_SET_CONTEXT_REG,
4225 amdgpu_ring_write(ring,
4226 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4227 for (i = 0; i < ext->reg_count; i++)
4228 amdgpu_ring_write(ring, ext->extent[i]);
4233 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4234 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4235 switch (adev->asic_type) {
4237 case CHIP_POLARIS10:
4238 amdgpu_ring_write(ring, 0x16000012);
4239 amdgpu_ring_write(ring, 0x0000002A);
4241 case CHIP_POLARIS11:
4242 amdgpu_ring_write(ring, 0x16000012);
4243 amdgpu_ring_write(ring, 0x00000000);
4246 amdgpu_ring_write(ring, 0x3a00161a);
4247 amdgpu_ring_write(ring, 0x0000002e);
4250 amdgpu_ring_write(ring, 0x00000002);
4251 amdgpu_ring_write(ring, 0x00000000);
4254 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4255 0x00000000 : 0x00000002);
4256 amdgpu_ring_write(ring, 0x00000000);
4259 amdgpu_ring_write(ring, 0x00000000);
4260 amdgpu_ring_write(ring, 0x00000000);
4266 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4267 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4269 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4270 amdgpu_ring_write(ring, 0);
4272 /* init the CE partitions */
4273 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4274 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4275 amdgpu_ring_write(ring, 0x8000);
4276 amdgpu_ring_write(ring, 0x8000);
4278 amdgpu_ring_commit(ring);
4283 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4285 struct amdgpu_ring *ring;
4288 u64 rb_addr, rptr_addr;
4291 /* Set the write pointer delay */
4292 WREG32(mmCP_RB_WPTR_DELAY, 0);
4294 /* set the RB to use vmid 0 */
4295 WREG32(mmCP_RB_VMID, 0);
4297 /* Set ring buffer size */
4298 ring = &adev->gfx.gfx_ring[0];
4299 rb_bufsz = order_base_2(ring->ring_size / 8);
4300 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4301 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4302 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4303 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4305 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4307 WREG32(mmCP_RB0_CNTL, tmp);
4309 /* Initialize the ring buffer's read and write pointers */
4310 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4312 WREG32(mmCP_RB0_WPTR, ring->wptr);
4314 /* set the wb address wether it's enabled or not */
4315 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4316 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4317 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4320 WREG32(mmCP_RB0_CNTL, tmp);
4322 rb_addr = ring->gpu_addr >> 8;
4323 WREG32(mmCP_RB0_BASE, rb_addr);
4324 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4326 /* no gfx doorbells on iceland */
4327 if (adev->asic_type != CHIP_TOPAZ) {
4328 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4329 if (ring->use_doorbell) {
4330 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4331 DOORBELL_OFFSET, ring->doorbell_index);
4332 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4334 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4337 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4340 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4342 if (adev->asic_type == CHIP_TONGA) {
4343 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4344 DOORBELL_RANGE_LOWER,
4345 AMDGPU_DOORBELL_GFX_RING0);
4346 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4348 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4349 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4354 /* start the ring */
4355 gfx_v8_0_cp_gfx_start(adev);
4357 r = amdgpu_ring_test_ring(ring);
4359 ring->ready = false;
4364 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4369 WREG32(mmCP_MEC_CNTL, 0);
4371 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4372 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4373 adev->gfx.compute_ring[i].ready = false;
4378 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4380 const struct gfx_firmware_header_v1_0 *mec_hdr;
4381 const __le32 *fw_data;
4382 unsigned i, fw_size;
4384 if (!adev->gfx.mec_fw)
4387 gfx_v8_0_cp_compute_enable(adev, false);
4389 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4390 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4392 fw_data = (const __le32 *)
4393 (adev->gfx.mec_fw->data +
4394 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4395 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4398 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4399 for (i = 0; i < fw_size; i++)
4400 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4401 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4403 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4404 if (adev->gfx.mec2_fw) {
4405 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4407 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4408 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4410 fw_data = (const __le32 *)
4411 (adev->gfx.mec2_fw->data +
4412 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4413 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4415 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4416 for (i = 0; i < fw_size; i++)
4417 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4418 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4425 uint32_t header; /* ordinal0 */
4426 uint32_t compute_dispatch_initiator; /* ordinal1 */
4427 uint32_t compute_dim_x; /* ordinal2 */
4428 uint32_t compute_dim_y; /* ordinal3 */
4429 uint32_t compute_dim_z; /* ordinal4 */
4430 uint32_t compute_start_x; /* ordinal5 */
4431 uint32_t compute_start_y; /* ordinal6 */
4432 uint32_t compute_start_z; /* ordinal7 */
4433 uint32_t compute_num_thread_x; /* ordinal8 */
4434 uint32_t compute_num_thread_y; /* ordinal9 */
4435 uint32_t compute_num_thread_z; /* ordinal10 */
4436 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4437 uint32_t compute_perfcount_enable; /* ordinal12 */
4438 uint32_t compute_pgm_lo; /* ordinal13 */
4439 uint32_t compute_pgm_hi; /* ordinal14 */
4440 uint32_t compute_tba_lo; /* ordinal15 */
4441 uint32_t compute_tba_hi; /* ordinal16 */
4442 uint32_t compute_tma_lo; /* ordinal17 */
4443 uint32_t compute_tma_hi; /* ordinal18 */
4444 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4445 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4446 uint32_t compute_vmid; /* ordinal21 */
4447 uint32_t compute_resource_limits; /* ordinal22 */
4448 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4449 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4450 uint32_t compute_tmpring_size; /* ordinal25 */
4451 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4452 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4453 uint32_t compute_restart_x; /* ordinal28 */
4454 uint32_t compute_restart_y; /* ordinal29 */
4455 uint32_t compute_restart_z; /* ordinal30 */
4456 uint32_t compute_thread_trace_enable; /* ordinal31 */
4457 uint32_t compute_misc_reserved; /* ordinal32 */
4458 uint32_t compute_dispatch_id; /* ordinal33 */
4459 uint32_t compute_threadgroup_id; /* ordinal34 */
4460 uint32_t compute_relaunch; /* ordinal35 */
4461 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4462 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4463 uint32_t compute_wave_restore_control; /* ordinal38 */
4464 uint32_t reserved9; /* ordinal39 */
4465 uint32_t reserved10; /* ordinal40 */
4466 uint32_t reserved11; /* ordinal41 */
4467 uint32_t reserved12; /* ordinal42 */
4468 uint32_t reserved13; /* ordinal43 */
4469 uint32_t reserved14; /* ordinal44 */
4470 uint32_t reserved15; /* ordinal45 */
4471 uint32_t reserved16; /* ordinal46 */
4472 uint32_t reserved17; /* ordinal47 */
4473 uint32_t reserved18; /* ordinal48 */
4474 uint32_t reserved19; /* ordinal49 */
4475 uint32_t reserved20; /* ordinal50 */
4476 uint32_t reserved21; /* ordinal51 */
4477 uint32_t reserved22; /* ordinal52 */
4478 uint32_t reserved23; /* ordinal53 */
4479 uint32_t reserved24; /* ordinal54 */
4480 uint32_t reserved25; /* ordinal55 */
4481 uint32_t reserved26; /* ordinal56 */
4482 uint32_t reserved27; /* ordinal57 */
4483 uint32_t reserved28; /* ordinal58 */
4484 uint32_t reserved29; /* ordinal59 */
4485 uint32_t reserved30; /* ordinal60 */
4486 uint32_t reserved31; /* ordinal61 */
4487 uint32_t reserved32; /* ordinal62 */
4488 uint32_t reserved33; /* ordinal63 */
4489 uint32_t reserved34; /* ordinal64 */
4490 uint32_t compute_user_data_0; /* ordinal65 */
4491 uint32_t compute_user_data_1; /* ordinal66 */
4492 uint32_t compute_user_data_2; /* ordinal67 */
4493 uint32_t compute_user_data_3; /* ordinal68 */
4494 uint32_t compute_user_data_4; /* ordinal69 */
4495 uint32_t compute_user_data_5; /* ordinal70 */
4496 uint32_t compute_user_data_6; /* ordinal71 */
4497 uint32_t compute_user_data_7; /* ordinal72 */
4498 uint32_t compute_user_data_8; /* ordinal73 */
4499 uint32_t compute_user_data_9; /* ordinal74 */
4500 uint32_t compute_user_data_10; /* ordinal75 */
4501 uint32_t compute_user_data_11; /* ordinal76 */
4502 uint32_t compute_user_data_12; /* ordinal77 */
4503 uint32_t compute_user_data_13; /* ordinal78 */
4504 uint32_t compute_user_data_14; /* ordinal79 */
4505 uint32_t compute_user_data_15; /* ordinal80 */
4506 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4507 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4508 uint32_t reserved35; /* ordinal83 */
4509 uint32_t reserved36; /* ordinal84 */
4510 uint32_t reserved37; /* ordinal85 */
4511 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4512 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4513 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4514 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4515 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4516 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4517 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4518 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4519 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4520 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4521 uint32_t reserved38; /* ordinal96 */
4522 uint32_t reserved39; /* ordinal97 */
4523 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4524 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4525 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4526 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4527 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4528 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4529 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4530 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4531 uint32_t reserved40; /* ordinal106 */
4532 uint32_t reserved41; /* ordinal107 */
4533 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4534 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4535 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4536 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4537 uint32_t reserved42; /* ordinal112 */
4538 uint32_t reserved43; /* ordinal113 */
4539 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4540 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4541 uint32_t cp_packet_id_lo; /* ordinal116 */
4542 uint32_t cp_packet_id_hi; /* ordinal117 */
4543 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4544 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4545 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4546 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4547 uint32_t gds_save_mask_lo; /* ordinal122 */
4548 uint32_t gds_save_mask_hi; /* ordinal123 */
4549 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4550 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4551 uint32_t reserved44; /* ordinal126 */
4552 uint32_t reserved45; /* ordinal127 */
4553 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4554 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4555 uint32_t cp_hqd_active; /* ordinal130 */
4556 uint32_t cp_hqd_vmid; /* ordinal131 */
4557 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4558 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4559 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4560 uint32_t cp_hqd_quantum; /* ordinal135 */
4561 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4562 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4563 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4564 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4565 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4566 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4567 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4568 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4569 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4570 uint32_t cp_hqd_pq_control; /* ordinal145 */
4571 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4572 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4573 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4574 uint32_t cp_hqd_ib_control; /* ordinal149 */
4575 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4576 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4577 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4578 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4579 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4580 uint32_t cp_hqd_msg_type; /* ordinal155 */
4581 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4582 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4583 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4584 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4585 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4586 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4587 uint32_t cp_mqd_control; /* ordinal162 */
4588 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4589 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4590 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4591 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4592 uint32_t cp_hqd_eop_control; /* ordinal167 */
4593 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4594 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4595 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4596 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4597 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4598 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4599 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4600 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4601 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4602 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4603 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4604 uint32_t cp_hqd_error; /* ordinal179 */
4605 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4606 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4607 uint32_t reserved46; /* ordinal182 */
4608 uint32_t reserved47; /* ordinal183 */
4609 uint32_t reserved48; /* ordinal184 */
4610 uint32_t reserved49; /* ordinal185 */
4611 uint32_t reserved50; /* ordinal186 */
4612 uint32_t reserved51; /* ordinal187 */
4613 uint32_t reserved52; /* ordinal188 */
4614 uint32_t reserved53; /* ordinal189 */
4615 uint32_t reserved54; /* ordinal190 */
4616 uint32_t reserved55; /* ordinal191 */
4617 uint32_t iqtimer_pkt_header; /* ordinal192 */
4618 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4619 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4620 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4621 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4622 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4623 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4624 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4625 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4626 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4627 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4628 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4629 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4630 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4631 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4632 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4633 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4634 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4635 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4636 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4637 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4638 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4639 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4640 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4641 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4642 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4643 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4644 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4645 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4646 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4647 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4648 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4649 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4650 uint32_t reserved56; /* ordinal225 */
4651 uint32_t reserved57; /* ordinal226 */
4652 uint32_t reserved58; /* ordinal227 */
4653 uint32_t set_resources_header; /* ordinal228 */
4654 uint32_t set_resources_dw1; /* ordinal229 */
4655 uint32_t set_resources_dw2; /* ordinal230 */
4656 uint32_t set_resources_dw3; /* ordinal231 */
4657 uint32_t set_resources_dw4; /* ordinal232 */
4658 uint32_t set_resources_dw5; /* ordinal233 */
4659 uint32_t set_resources_dw6; /* ordinal234 */
4660 uint32_t set_resources_dw7; /* ordinal235 */
4661 uint32_t reserved59; /* ordinal236 */
4662 uint32_t reserved60; /* ordinal237 */
4663 uint32_t reserved61; /* ordinal238 */
4664 uint32_t reserved62; /* ordinal239 */
4665 uint32_t reserved63; /* ordinal240 */
4666 uint32_t reserved64; /* ordinal241 */
4667 uint32_t reserved65; /* ordinal242 */
4668 uint32_t reserved66; /* ordinal243 */
4669 uint32_t reserved67; /* ordinal244 */
4670 uint32_t reserved68; /* ordinal245 */
4671 uint32_t reserved69; /* ordinal246 */
4672 uint32_t reserved70; /* ordinal247 */
4673 uint32_t reserved71; /* ordinal248 */
4674 uint32_t reserved72; /* ordinal249 */
4675 uint32_t reserved73; /* ordinal250 */
4676 uint32_t reserved74; /* ordinal251 */
4677 uint32_t reserved75; /* ordinal252 */
4678 uint32_t reserved76; /* ordinal253 */
4679 uint32_t reserved77; /* ordinal254 */
4680 uint32_t reserved78; /* ordinal255 */
4682 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4685 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4689 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4692 if (ring->mqd_obj) {
4693 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4694 if (unlikely(r != 0))
4695 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4697 amdgpu_bo_unpin(ring->mqd_obj);
4698 amdgpu_bo_unreserve(ring->mqd_obj);
4700 amdgpu_bo_unref(&ring->mqd_obj);
4701 ring->mqd_obj = NULL;
4706 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4710 bool use_doorbell = true;
4718 /* init the pipes */
4719 mutex_lock(&adev->srbm_mutex);
4720 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4721 int me = (i < 4) ? 1 : 2;
4722 int pipe = (i < 4) ? i : (i - 4);
4724 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4727 vi_srbm_select(adev, me, pipe, 0, 0);
4729 /* write the EOP addr */
4730 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4731 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4733 /* set the VMID assigned */
4734 WREG32(mmCP_HQD_VMID, 0);
4736 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4737 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4738 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4739 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4740 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4742 vi_srbm_select(adev, 0, 0, 0, 0);
4743 mutex_unlock(&adev->srbm_mutex);
4745 /* init the queues. Just two for now. */
4746 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4749 if (ring->mqd_obj == NULL) {
4750 r = amdgpu_bo_create(adev,
4751 sizeof(struct vi_mqd),
4753 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4754 NULL, &ring->mqd_obj);
4756 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4761 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4762 if (unlikely(r != 0)) {
4763 gfx_v8_0_cp_compute_fini(adev);
4766 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4769 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4770 gfx_v8_0_cp_compute_fini(adev);
4773 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4775 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4776 gfx_v8_0_cp_compute_fini(adev);
4780 /* init the mqd struct */
4781 memset(buf, 0, sizeof(struct vi_mqd));
4783 mqd = (struct vi_mqd *)buf;
4784 mqd->header = 0xC0310800;
4785 mqd->compute_pipelinestat_enable = 0x00000001;
4786 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4787 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4788 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4789 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4790 mqd->compute_misc_reserved = 0x00000003;
4792 mutex_lock(&adev->srbm_mutex);
4793 vi_srbm_select(adev, ring->me,
4797 /* disable wptr polling */
4798 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4799 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4800 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4802 mqd->cp_hqd_eop_base_addr_lo =
4803 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4804 mqd->cp_hqd_eop_base_addr_hi =
4805 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4807 /* enable doorbell? */
4808 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4810 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4812 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4814 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4815 mqd->cp_hqd_pq_doorbell_control = tmp;
4817 /* disable the queue if it's active */
4818 mqd->cp_hqd_dequeue_request = 0;
4819 mqd->cp_hqd_pq_rptr = 0;
4820 mqd->cp_hqd_pq_wptr= 0;
4821 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4822 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4823 for (j = 0; j < adev->usec_timeout; j++) {
4824 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4828 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4829 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4830 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4833 /* set the pointer to the MQD */
4834 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4835 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4836 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4837 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4839 /* set MQD vmid to 0 */
4840 tmp = RREG32(mmCP_MQD_CONTROL);
4841 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4842 WREG32(mmCP_MQD_CONTROL, tmp);
4843 mqd->cp_mqd_control = tmp;
4845 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4846 hqd_gpu_addr = ring->gpu_addr >> 8;
4847 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4848 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4849 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4850 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4852 /* set up the HQD, this is similar to CP_RB0_CNTL */
4853 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4854 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4855 (order_base_2(ring->ring_size / 4) - 1));
4856 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4857 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4859 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4861 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4862 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4863 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4864 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4865 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4866 mqd->cp_hqd_pq_control = tmp;
4868 /* set the wb address wether it's enabled or not */
4869 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4870 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4871 mqd->cp_hqd_pq_rptr_report_addr_hi =
4872 upper_32_bits(wb_gpu_addr) & 0xffff;
4873 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4874 mqd->cp_hqd_pq_rptr_report_addr_lo);
4875 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4876 mqd->cp_hqd_pq_rptr_report_addr_hi);
4878 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4879 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4880 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4881 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4882 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4883 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4884 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4886 /* enable the doorbell if requested */
4888 if ((adev->asic_type == CHIP_CARRIZO) ||
4889 (adev->asic_type == CHIP_FIJI) ||
4890 (adev->asic_type == CHIP_STONEY) ||
4891 (adev->asic_type == CHIP_POLARIS11) ||
4892 (adev->asic_type == CHIP_POLARIS10)) {
4893 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4894 AMDGPU_DOORBELL_KIQ << 2);
4895 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4896 AMDGPU_DOORBELL_MEC_RING7 << 2);
4898 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4899 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4900 DOORBELL_OFFSET, ring->doorbell_index);
4901 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4902 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4903 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4904 mqd->cp_hqd_pq_doorbell_control = tmp;
4907 mqd->cp_hqd_pq_doorbell_control = 0;
4909 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4910 mqd->cp_hqd_pq_doorbell_control);
4912 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4914 mqd->cp_hqd_pq_wptr = ring->wptr;
4915 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4916 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4918 /* set the vmid for the queue */
4919 mqd->cp_hqd_vmid = 0;
4920 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4922 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4923 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4924 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4925 mqd->cp_hqd_persistent_state = tmp;
4926 if (adev->asic_type == CHIP_STONEY ||
4927 adev->asic_type == CHIP_POLARIS11 ||
4928 adev->asic_type == CHIP_POLARIS10) {
4929 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4930 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4931 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4934 /* activate the queue */
4935 mqd->cp_hqd_active = 1;
4936 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4938 vi_srbm_select(adev, 0, 0, 0, 0);
4939 mutex_unlock(&adev->srbm_mutex);
4941 amdgpu_bo_kunmap(ring->mqd_obj);
4942 amdgpu_bo_unreserve(ring->mqd_obj);
4946 tmp = RREG32(mmCP_PQ_STATUS);
4947 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4948 WREG32(mmCP_PQ_STATUS, tmp);
4951 gfx_v8_0_cp_compute_enable(adev, true);
4953 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4954 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4957 r = amdgpu_ring_test_ring(ring);
4959 ring->ready = false;
4965 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4969 if (!(adev->flags & AMD_IS_APU))
4970 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4972 if (!adev->pp_enabled) {
4973 if (!adev->firmware.smu_load) {
4974 /* legacy firmware loading */
4975 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4979 r = gfx_v8_0_cp_compute_load_microcode(adev);
4983 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4984 AMDGPU_UCODE_ID_CP_CE);
4988 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4989 AMDGPU_UCODE_ID_CP_PFP);
4993 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4994 AMDGPU_UCODE_ID_CP_ME);
4998 if (adev->asic_type == CHIP_TOPAZ) {
4999 r = gfx_v8_0_cp_compute_load_microcode(adev);
5003 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5004 AMDGPU_UCODE_ID_CP_MEC1);
5011 r = gfx_v8_0_cp_gfx_resume(adev);
5015 r = gfx_v8_0_cp_compute_resume(adev);
5019 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5024 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5026 gfx_v8_0_cp_gfx_enable(adev, enable);
5027 gfx_v8_0_cp_compute_enable(adev, enable);
5030 static int gfx_v8_0_hw_init(void *handle)
5033 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035 gfx_v8_0_init_golden_registers(adev);
5036 gfx_v8_0_gpu_init(adev);
5038 r = gfx_v8_0_rlc_resume(adev);
5042 r = gfx_v8_0_cp_resume(adev);
5047 static int gfx_v8_0_hw_fini(void *handle)
5049 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5051 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5052 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5053 gfx_v8_0_cp_enable(adev, false);
5054 gfx_v8_0_rlc_stop(adev);
5055 gfx_v8_0_cp_compute_fini(adev);
5057 amdgpu_set_powergating_state(adev,
5058 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5063 static int gfx_v8_0_suspend(void *handle)
5065 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5067 return gfx_v8_0_hw_fini(adev);
5070 static int gfx_v8_0_resume(void *handle)
5072 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5074 return gfx_v8_0_hw_init(adev);
5077 static bool gfx_v8_0_is_idle(void *handle)
5079 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5081 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5087 static int gfx_v8_0_wait_for_idle(void *handle)
5090 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5092 for (i = 0; i < adev->usec_timeout; i++) {
5093 if (gfx_v8_0_is_idle(handle))
5101 static bool gfx_v8_0_check_soft_reset(void *handle)
5103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5104 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5108 tmp = RREG32(mmGRBM_STATUS);
5109 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5110 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5111 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5112 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5113 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5114 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5115 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5116 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5117 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5118 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5119 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5120 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5121 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5125 tmp = RREG32(mmGRBM_STATUS2);
5126 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5127 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5128 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5130 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5131 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5132 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5133 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5135 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5137 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5139 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5140 SOFT_RESET_GRBM, 1);
5144 tmp = RREG32(mmSRBM_STATUS);
5145 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5146 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5147 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5148 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5149 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5150 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5152 if (grbm_soft_reset || srbm_soft_reset) {
5153 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5154 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5157 adev->gfx.grbm_soft_reset = 0;
5158 adev->gfx.srbm_soft_reset = 0;
5163 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5164 struct amdgpu_ring *ring)
5168 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5169 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5171 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5172 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5174 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5175 for (i = 0; i < adev->usec_timeout; i++) {
5176 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5183 static int gfx_v8_0_pre_soft_reset(void *handle)
5185 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5186 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5188 if ((!adev->gfx.grbm_soft_reset) &&
5189 (!adev->gfx.srbm_soft_reset))
5192 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5193 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5196 gfx_v8_0_rlc_stop(adev);
5198 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5199 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5200 /* Disable GFX parsing/prefetching */
5201 gfx_v8_0_cp_gfx_enable(adev, false);
5203 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5204 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5205 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5206 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5209 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5210 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5212 gfx_v8_0_inactive_hqd(adev, ring);
5214 /* Disable MEC parsing/prefetching */
5215 gfx_v8_0_cp_compute_enable(adev, false);
5221 static int gfx_v8_0_soft_reset(void *handle)
5223 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5224 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5227 if ((!adev->gfx.grbm_soft_reset) &&
5228 (!adev->gfx.srbm_soft_reset))
5231 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5232 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5234 if (grbm_soft_reset || srbm_soft_reset) {
5235 tmp = RREG32(mmGMCON_DEBUG);
5236 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5237 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5238 WREG32(mmGMCON_DEBUG, tmp);
5242 if (grbm_soft_reset) {
5243 tmp = RREG32(mmGRBM_SOFT_RESET);
5244 tmp |= grbm_soft_reset;
5245 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5246 WREG32(mmGRBM_SOFT_RESET, tmp);
5247 tmp = RREG32(mmGRBM_SOFT_RESET);
5251 tmp &= ~grbm_soft_reset;
5252 WREG32(mmGRBM_SOFT_RESET, tmp);
5253 tmp = RREG32(mmGRBM_SOFT_RESET);
5256 if (srbm_soft_reset) {
5257 tmp = RREG32(mmSRBM_SOFT_RESET);
5258 tmp |= srbm_soft_reset;
5259 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5260 WREG32(mmSRBM_SOFT_RESET, tmp);
5261 tmp = RREG32(mmSRBM_SOFT_RESET);
5265 tmp &= ~srbm_soft_reset;
5266 WREG32(mmSRBM_SOFT_RESET, tmp);
5267 tmp = RREG32(mmSRBM_SOFT_RESET);
5270 if (grbm_soft_reset || srbm_soft_reset) {
5271 tmp = RREG32(mmGMCON_DEBUG);
5272 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5273 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5274 WREG32(mmGMCON_DEBUG, tmp);
5277 /* Wait a little for things to settle down */
5283 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5284 struct amdgpu_ring *ring)
5286 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5287 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5288 WREG32(mmCP_HQD_PQ_RPTR, 0);
5289 WREG32(mmCP_HQD_PQ_WPTR, 0);
5290 vi_srbm_select(adev, 0, 0, 0, 0);
5293 static int gfx_v8_0_post_soft_reset(void *handle)
5295 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5296 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5298 if ((!adev->gfx.grbm_soft_reset) &&
5299 (!adev->gfx.srbm_soft_reset))
5302 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5303 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5305 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5306 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5307 gfx_v8_0_cp_gfx_resume(adev);
5309 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5310 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5311 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5312 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5315 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5316 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5318 gfx_v8_0_init_hqd(adev, ring);
5320 gfx_v8_0_cp_compute_resume(adev);
5322 gfx_v8_0_rlc_start(adev);
5328 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5330 * @adev: amdgpu_device pointer
5332 * Fetches a GPU clock counter snapshot.
5333 * Returns the 64 bit clock counter snapshot.
5335 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5339 mutex_lock(&adev->gfx.gpu_clock_mutex);
5340 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5341 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5342 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5343 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5347 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5349 uint32_t gds_base, uint32_t gds_size,
5350 uint32_t gws_base, uint32_t gws_size,
5351 uint32_t oa_base, uint32_t oa_size)
5353 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5354 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5356 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5357 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5359 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5360 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5365 WRITE_DATA_DST_SEL(0)));
5366 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5367 amdgpu_ring_write(ring, 0);
5368 amdgpu_ring_write(ring, gds_base);
5371 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5372 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5373 WRITE_DATA_DST_SEL(0)));
5374 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5375 amdgpu_ring_write(ring, 0);
5376 amdgpu_ring_write(ring, gds_size);
5379 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5380 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5381 WRITE_DATA_DST_SEL(0)));
5382 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5383 amdgpu_ring_write(ring, 0);
5384 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5387 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5388 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5389 WRITE_DATA_DST_SEL(0)));
5390 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5391 amdgpu_ring_write(ring, 0);
5392 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5395 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5396 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5397 .select_se_sh = &gfx_v8_0_select_se_sh,
5400 static int gfx_v8_0_early_init(void *handle)
5402 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5404 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5405 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5406 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5407 gfx_v8_0_set_ring_funcs(adev);
5408 gfx_v8_0_set_irq_funcs(adev);
5409 gfx_v8_0_set_gds_init(adev);
5410 gfx_v8_0_set_rlc_funcs(adev);
5415 static int gfx_v8_0_late_init(void *handle)
5417 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5420 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5424 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5428 /* requires IBs so do in late init after IB pool is initialized */
5429 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5433 amdgpu_set_powergating_state(adev,
5434 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5439 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5442 if (adev->asic_type == CHIP_POLARIS11)
5443 /* Send msg to SMU via Powerplay */
5444 amdgpu_set_powergating_state(adev,
5445 AMD_IP_BLOCK_TYPE_SMC,
5447 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5449 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5452 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5455 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5458 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5461 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5464 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5467 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5470 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5473 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5475 /* Read any GFX register to wake up GFX. */
5477 RREG32(mmDB_RENDER_CONTROL);
5480 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5483 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5484 cz_enable_gfx_cg_power_gating(adev, true);
5485 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5486 cz_enable_gfx_pipeline_power_gating(adev, true);
5488 cz_enable_gfx_cg_power_gating(adev, false);
5489 cz_enable_gfx_pipeline_power_gating(adev, false);
5493 static int gfx_v8_0_set_powergating_state(void *handle,
5494 enum amd_powergating_state state)
5496 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5497 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5499 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5502 switch (adev->asic_type) {
5505 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5506 cz_update_gfx_cg_power_gating(adev, enable);
5508 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5509 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5511 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5513 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5514 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5516 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5518 case CHIP_POLARIS11:
5519 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5520 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5522 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5524 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5525 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5527 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5529 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5530 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5532 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5541 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5542 uint32_t reg_addr, uint32_t cmd)
5546 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5548 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5549 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5551 data = RREG32(mmRLC_SERDES_WR_CTRL);
5552 if (adev->asic_type == CHIP_STONEY)
5553 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5554 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5555 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5556 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5557 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5558 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5559 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5560 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5561 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5563 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5564 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5565 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5566 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5567 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5568 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5569 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5570 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5571 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5572 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5573 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5574 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5575 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5576 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5577 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5579 WREG32(mmRLC_SERDES_WR_CTRL, data);
5582 #define MSG_ENTER_RLC_SAFE_MODE 1
5583 #define MSG_EXIT_RLC_SAFE_MODE 0
5584 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5585 #define RLC_GPR_REG2__REQ__SHIFT 0
5586 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5587 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5589 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5594 data = RREG32(mmRLC_CNTL);
5595 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5598 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5599 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5600 AMD_PG_SUPPORT_GFX_DMG))) {
5601 data |= RLC_GPR_REG2__REQ_MASK;
5602 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5603 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5604 WREG32(mmRLC_GPR_REG2, data);
5606 for (i = 0; i < adev->usec_timeout; i++) {
5607 if ((RREG32(mmRLC_GPM_STAT) &
5608 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5609 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5610 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5611 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5616 for (i = 0; i < adev->usec_timeout; i++) {
5617 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5621 adev->gfx.rlc.in_safe_mode = true;
5625 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5630 data = RREG32(mmRLC_CNTL);
5631 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5634 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5635 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5636 AMD_PG_SUPPORT_GFX_DMG))) {
5637 data |= RLC_GPR_REG2__REQ_MASK;
5638 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5639 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5640 WREG32(mmRLC_GPR_REG2, data);
5641 adev->gfx.rlc.in_safe_mode = false;
5644 for (i = 0; i < adev->usec_timeout; i++) {
5645 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5651 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5656 data = RREG32(mmRLC_CNTL);
5657 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5660 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5661 data |= RLC_SAFE_MODE__CMD_MASK;
5662 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5663 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5664 WREG32(mmRLC_SAFE_MODE, data);
5666 for (i = 0; i < adev->usec_timeout; i++) {
5667 if ((RREG32(mmRLC_GPM_STAT) &
5668 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5669 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5670 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5671 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5676 for (i = 0; i < adev->usec_timeout; i++) {
5677 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5681 adev->gfx.rlc.in_safe_mode = true;
5685 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5690 data = RREG32(mmRLC_CNTL);
5691 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5694 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5695 if (adev->gfx.rlc.in_safe_mode) {
5696 data |= RLC_SAFE_MODE__CMD_MASK;
5697 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5698 WREG32(mmRLC_SAFE_MODE, data);
5699 adev->gfx.rlc.in_safe_mode = false;
5703 for (i = 0; i < adev->usec_timeout; i++) {
5704 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5710 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5712 adev->gfx.rlc.in_safe_mode = true;
5715 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5717 adev->gfx.rlc.in_safe_mode = false;
5720 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5721 .enter_safe_mode = cz_enter_rlc_safe_mode,
5722 .exit_safe_mode = cz_exit_rlc_safe_mode
5725 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5726 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5727 .exit_safe_mode = iceland_exit_rlc_safe_mode
5730 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5731 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5732 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5735 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5738 uint32_t temp, data;
5740 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5742 /* It is disabled by HW by default */
5743 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5744 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5745 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5746 /* 1 - RLC memory Light sleep */
5747 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5749 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5750 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5753 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5754 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5755 if (adev->flags & AMD_IS_APU)
5756 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5757 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5758 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5760 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5761 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5762 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5763 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5766 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5768 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5769 gfx_v8_0_wait_for_rlc_serdes(adev);
5771 /* 5 - clear mgcg override */
5772 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5774 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5775 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5776 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5777 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5778 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5779 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5780 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5781 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5782 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5783 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5784 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5785 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5787 WREG32(mmCGTS_SM_CTRL_REG, data);
5791 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792 gfx_v8_0_wait_for_rlc_serdes(adev);
5794 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5795 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5796 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5797 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5798 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5799 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5801 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5803 /* 2 - disable MGLS in RLC */
5804 data = RREG32(mmRLC_MEM_SLP_CNTL);
5805 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5806 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5807 WREG32(mmRLC_MEM_SLP_CNTL, data);
5810 /* 3 - disable MGLS in CP */
5811 data = RREG32(mmCP_MEM_SLP_CNTL);
5812 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5813 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5814 WREG32(mmCP_MEM_SLP_CNTL, data);
5817 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5818 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5819 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5820 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5822 WREG32(mmCGTS_SM_CTRL_REG, data);
5824 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825 gfx_v8_0_wait_for_rlc_serdes(adev);
5827 /* 6 - set mgcg override */
5828 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5832 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5833 gfx_v8_0_wait_for_rlc_serdes(adev);
5836 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5839 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5842 uint32_t temp, temp1, data, data1;
5844 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5846 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5848 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5849 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5850 * Cmp_busy/GFX_Idle interrupts
5852 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5854 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5855 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5857 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5859 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5860 gfx_v8_0_wait_for_rlc_serdes(adev);
5862 /* 3 - clear cgcg override */
5863 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5865 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866 gfx_v8_0_wait_for_rlc_serdes(adev);
5868 /* 4 - write cmd to set CGLS */
5869 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5871 /* 5 - enable cgcg */
5872 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5874 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5876 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5878 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5879 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5882 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5884 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5888 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5890 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5891 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5894 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5895 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5896 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5898 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5900 /* read gfx register to wake up cgcg */
5901 RREG32(mmCB_CGTT_SCLK_CTRL);
5902 RREG32(mmCB_CGTT_SCLK_CTRL);
5903 RREG32(mmCB_CGTT_SCLK_CTRL);
5904 RREG32(mmCB_CGTT_SCLK_CTRL);
5906 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5907 gfx_v8_0_wait_for_rlc_serdes(adev);
5909 /* write cmd to Set CGCG Overrride */
5910 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5912 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5913 gfx_v8_0_wait_for_rlc_serdes(adev);
5915 /* write cmd to Clear CGLS */
5916 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5918 /* disable cgcg, cgls should be disabled too. */
5919 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5920 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5922 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5925 gfx_v8_0_wait_for_rlc_serdes(adev);
5927 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5929 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5933 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5934 * === MGCG + MGLS + TS(CG/LS) ===
5936 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5937 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5939 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5940 * === CGCG + CGLS ===
5942 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5943 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5948 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5949 enum amd_clockgating_state state)
5951 uint32_t msg_id, pp_state;
5952 void *pp_handle = adev->powerplay.pp_handle;
5954 if (state == AMD_CG_STATE_UNGATE)
5957 pp_state = PP_STATE_CG | PP_STATE_LS;
5959 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5961 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5963 amd_set_clockgating_by_smu(pp_handle, msg_id);
5965 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5967 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5969 amd_set_clockgating_by_smu(pp_handle, msg_id);
5974 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5975 enum amd_clockgating_state state)
5977 uint32_t msg_id, pp_state;
5978 void *pp_handle = adev->powerplay.pp_handle;
5980 if (state == AMD_CG_STATE_UNGATE)
5983 pp_state = PP_STATE_CG | PP_STATE_LS;
5985 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5987 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5989 amd_set_clockgating_by_smu(pp_handle, msg_id);
5991 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5993 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5995 amd_set_clockgating_by_smu(pp_handle, msg_id);
5997 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5999 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6001 amd_set_clockgating_by_smu(pp_handle, msg_id);
6003 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6005 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6007 amd_set_clockgating_by_smu(pp_handle, msg_id);
6009 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6013 amd_set_clockgating_by_smu(pp_handle, msg_id);
6018 static int gfx_v8_0_set_clockgating_state(void *handle,
6019 enum amd_clockgating_state state)
6021 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6023 switch (adev->asic_type) {
6027 gfx_v8_0_update_gfx_clock_gating(adev,
6028 state == AMD_CG_STATE_GATE ? true : false);
6031 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6033 case CHIP_POLARIS10:
6034 case CHIP_POLARIS11:
6035 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6043 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6045 return ring->adev->wb.wb[ring->rptr_offs];
6048 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6050 struct amdgpu_device *adev = ring->adev;
6052 if (ring->use_doorbell)
6053 /* XXX check if swapping is necessary on BE */
6054 return ring->adev->wb.wb[ring->wptr_offs];
6056 return RREG32(mmCP_RB0_WPTR);
6059 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6061 struct amdgpu_device *adev = ring->adev;
6063 if (ring->use_doorbell) {
6064 /* XXX check if swapping is necessary on BE */
6065 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6066 WDOORBELL32(ring->doorbell_index, ring->wptr);
6068 WREG32(mmCP_RB0_WPTR, ring->wptr);
6069 (void)RREG32(mmCP_RB0_WPTR);
6073 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6075 u32 ref_and_mask, reg_mem_engine;
6077 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6080 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6083 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6090 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6091 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6094 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6095 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6096 WAIT_REG_MEM_FUNCTION(3) | /* == */
6098 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6099 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6100 amdgpu_ring_write(ring, ref_and_mask);
6101 amdgpu_ring_write(ring, ref_and_mask);
6102 amdgpu_ring_write(ring, 0x20); /* poll interval */
6105 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6107 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6108 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6109 WRITE_DATA_DST_SEL(0) |
6111 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6112 amdgpu_ring_write(ring, 0);
6113 amdgpu_ring_write(ring, 1);
6117 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6118 struct amdgpu_ib *ib,
6119 unsigned vm_id, bool ctx_switch)
6121 u32 header, control = 0;
6123 if (ib->flags & AMDGPU_IB_FLAG_CE)
6124 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6126 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6128 control |= ib->length_dw | (vm_id << 24);
6130 amdgpu_ring_write(ring, header);
6131 amdgpu_ring_write(ring,
6135 (ib->gpu_addr & 0xFFFFFFFC));
6136 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6137 amdgpu_ring_write(ring, control);
6140 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6141 struct amdgpu_ib *ib,
6142 unsigned vm_id, bool ctx_switch)
6144 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6146 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6147 amdgpu_ring_write(ring,
6151 (ib->gpu_addr & 0xFFFFFFFC));
6152 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6153 amdgpu_ring_write(ring, control);
6156 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6157 u64 seq, unsigned flags)
6159 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6160 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6162 /* EVENT_WRITE_EOP - flush caches, send int */
6163 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6164 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6166 EOP_TC_WB_ACTION_EN |
6167 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6169 amdgpu_ring_write(ring, addr & 0xfffffffc);
6170 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6171 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6172 amdgpu_ring_write(ring, lower_32_bits(seq));
6173 amdgpu_ring_write(ring, upper_32_bits(seq));
6177 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6179 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6180 uint32_t seq = ring->fence_drv.sync_seq;
6181 uint64_t addr = ring->fence_drv.gpu_addr;
6183 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6184 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6185 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6186 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6187 amdgpu_ring_write(ring, addr & 0xfffffffc);
6188 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6189 amdgpu_ring_write(ring, seq);
6190 amdgpu_ring_write(ring, 0xffffffff);
6191 amdgpu_ring_write(ring, 4); /* poll interval */
6194 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6195 unsigned vm_id, uint64_t pd_addr)
6197 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6199 /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6201 amdgpu_ring_insert_nop(ring, 128);
6203 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6204 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6205 WRITE_DATA_DST_SEL(0)) |
6208 amdgpu_ring_write(ring,
6209 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6211 amdgpu_ring_write(ring,
6212 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6214 amdgpu_ring_write(ring, 0);
6215 amdgpu_ring_write(ring, pd_addr >> 12);
6217 /* bits 0-15 are the VM contexts0-15 */
6218 /* invalidate the cache */
6219 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6220 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6221 WRITE_DATA_DST_SEL(0)));
6222 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6223 amdgpu_ring_write(ring, 0);
6224 amdgpu_ring_write(ring, 1 << vm_id);
6226 /* wait for the invalidate to complete */
6227 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6229 WAIT_REG_MEM_FUNCTION(0) | /* always */
6230 WAIT_REG_MEM_ENGINE(0))); /* me */
6231 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6232 amdgpu_ring_write(ring, 0);
6233 amdgpu_ring_write(ring, 0); /* ref */
6234 amdgpu_ring_write(ring, 0); /* mask */
6235 amdgpu_ring_write(ring, 0x20); /* poll interval */
6237 /* compute doesn't have PFP */
6239 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6240 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6241 amdgpu_ring_write(ring, 0x0);
6242 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6243 amdgpu_ring_insert_nop(ring, 128);
6247 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6249 return ring->adev->wb.wb[ring->wptr_offs];
6252 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6254 struct amdgpu_device *adev = ring->adev;
6256 /* XXX check if swapping is necessary on BE */
6257 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6258 WDOORBELL32(ring->doorbell_index, ring->wptr);
6261 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6265 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6266 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6268 /* RELEASE_MEM - flush caches, send int */
6269 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6270 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6272 EOP_TC_WB_ACTION_EN |
6273 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6275 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6276 amdgpu_ring_write(ring, addr & 0xfffffffc);
6277 amdgpu_ring_write(ring, upper_32_bits(addr));
6278 amdgpu_ring_write(ring, lower_32_bits(seq));
6279 amdgpu_ring_write(ring, upper_32_bits(seq));
6282 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6284 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6285 amdgpu_ring_write(ring, 0);
6288 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6292 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6293 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6294 /* set load_global_config & load_global_uconfig */
6296 /* set load_cs_sh_regs */
6298 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6301 /* set load_ce_ram if preamble presented */
6302 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6305 /* still load_ce_ram if this is the first time preamble presented
6306 * although there is no context switch happens.
6308 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6312 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6313 amdgpu_ring_write(ring, dw2);
6314 amdgpu_ring_write(ring, 0);
6317 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6320 4; /* gfx_v8_0_ring_emit_ib_gfx */
6323 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6326 20 + /* gfx_v8_0_ring_emit_gds_switch */
6327 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6328 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6329 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6330 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6331 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6332 2 + /* gfx_v8_ring_emit_sb */
6333 3; /* gfx_v8_ring_emit_cntxcntl */
6336 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6339 4; /* gfx_v8_0_ring_emit_ib_compute */
6342 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6345 20 + /* gfx_v8_0_ring_emit_gds_switch */
6346 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6347 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6348 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6349 17 + /* gfx_v8_0_ring_emit_vm_flush */
6350 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6353 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6354 enum amdgpu_interrupt_state state)
6356 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6357 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6360 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6362 enum amdgpu_interrupt_state state)
6365 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6366 * handles the setting of interrupts for this specific pipe. All other
6367 * pipes' interrupts are set by amdkfd.
6375 DRM_DEBUG("invalid pipe %d\n", pipe);
6379 DRM_DEBUG("invalid me %d\n", me);
6383 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6384 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6387 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6388 struct amdgpu_irq_src *source,
6390 enum amdgpu_interrupt_state state)
6392 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6393 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6398 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6399 struct amdgpu_irq_src *source,
6401 enum amdgpu_interrupt_state state)
6403 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6404 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6409 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6410 struct amdgpu_irq_src *src,
6412 enum amdgpu_interrupt_state state)
6415 case AMDGPU_CP_IRQ_GFX_EOP:
6416 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6418 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6419 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6421 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6422 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6424 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6425 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6427 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6428 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6430 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6431 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6433 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6434 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6436 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6437 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6439 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6440 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6448 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6449 struct amdgpu_irq_src *source,
6450 struct amdgpu_iv_entry *entry)
6453 u8 me_id, pipe_id, queue_id;
6454 struct amdgpu_ring *ring;
6456 DRM_DEBUG("IH: CP EOP\n");
6457 me_id = (entry->ring_id & 0x0c) >> 2;
6458 pipe_id = (entry->ring_id & 0x03) >> 0;
6459 queue_id = (entry->ring_id & 0x70) >> 4;
6463 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6467 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6468 ring = &adev->gfx.compute_ring[i];
6469 /* Per-queue interrupt is supported for MEC starting from VI.
6470 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6472 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6473 amdgpu_fence_process(ring);
6480 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6481 struct amdgpu_irq_src *source,
6482 struct amdgpu_iv_entry *entry)
6484 DRM_ERROR("Illegal register access in command stream\n");
6485 schedule_work(&adev->reset_work);
6489 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6490 struct amdgpu_irq_src *source,
6491 struct amdgpu_iv_entry *entry)
6493 DRM_ERROR("Illegal instruction in command stream\n");
6494 schedule_work(&adev->reset_work);
6498 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6500 .early_init = gfx_v8_0_early_init,
6501 .late_init = gfx_v8_0_late_init,
6502 .sw_init = gfx_v8_0_sw_init,
6503 .sw_fini = gfx_v8_0_sw_fini,
6504 .hw_init = gfx_v8_0_hw_init,
6505 .hw_fini = gfx_v8_0_hw_fini,
6506 .suspend = gfx_v8_0_suspend,
6507 .resume = gfx_v8_0_resume,
6508 .is_idle = gfx_v8_0_is_idle,
6509 .wait_for_idle = gfx_v8_0_wait_for_idle,
6510 .check_soft_reset = gfx_v8_0_check_soft_reset,
6511 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6512 .soft_reset = gfx_v8_0_soft_reset,
6513 .post_soft_reset = gfx_v8_0_post_soft_reset,
6514 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6515 .set_powergating_state = gfx_v8_0_set_powergating_state,
6518 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6519 .get_rptr = gfx_v8_0_ring_get_rptr,
6520 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6521 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6523 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6524 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6525 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6526 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6527 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6528 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6529 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6530 .test_ring = gfx_v8_0_ring_test_ring,
6531 .test_ib = gfx_v8_0_ring_test_ib,
6532 .insert_nop = amdgpu_ring_insert_nop,
6533 .pad_ib = amdgpu_ring_generic_pad_ib,
6534 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6535 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6536 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6537 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6540 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6541 .get_rptr = gfx_v8_0_ring_get_rptr,
6542 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6543 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6545 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6546 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6547 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6548 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6549 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6550 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6551 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6552 .test_ring = gfx_v8_0_ring_test_ring,
6553 .test_ib = gfx_v8_0_ring_test_ib,
6554 .insert_nop = amdgpu_ring_insert_nop,
6555 .pad_ib = amdgpu_ring_generic_pad_ib,
6556 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6557 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6560 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6564 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6565 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6567 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6568 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6571 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6572 .set = gfx_v8_0_set_eop_interrupt_state,
6573 .process = gfx_v8_0_eop_irq,
6576 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6577 .set = gfx_v8_0_set_priv_reg_fault_state,
6578 .process = gfx_v8_0_priv_reg_irq,
6581 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6582 .set = gfx_v8_0_set_priv_inst_fault_state,
6583 .process = gfx_v8_0_priv_inst_irq,
6586 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6588 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6589 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6591 adev->gfx.priv_reg_irq.num_types = 1;
6592 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6594 adev->gfx.priv_inst_irq.num_types = 1;
6595 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6598 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6600 switch (adev->asic_type) {
6602 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6606 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6609 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6614 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6616 /* init asci gds info */
6617 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6618 adev->gds.gws.total_size = 64;
6619 adev->gds.oa.total_size = 16;
6621 if (adev->gds.mem.total_size == 64 * 1024) {
6622 adev->gds.mem.gfx_partition_size = 4096;
6623 adev->gds.mem.cs_partition_size = 4096;
6625 adev->gds.gws.gfx_partition_size = 4;
6626 adev->gds.gws.cs_partition_size = 4;
6628 adev->gds.oa.gfx_partition_size = 4;
6629 adev->gds.oa.cs_partition_size = 1;
6631 adev->gds.mem.gfx_partition_size = 1024;
6632 adev->gds.mem.cs_partition_size = 1024;
6634 adev->gds.gws.gfx_partition_size = 16;
6635 adev->gds.gws.cs_partition_size = 16;
6637 adev->gds.oa.gfx_partition_size = 4;
6638 adev->gds.oa.cs_partition_size = 4;
6642 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6650 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6651 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6653 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6656 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6660 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6661 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6663 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6665 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6668 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6670 int i, j, k, counter, active_cu_number = 0;
6671 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6672 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6673 unsigned disable_masks[4 * 2];
6675 memset(cu_info, 0, sizeof(*cu_info));
6677 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6679 mutex_lock(&adev->grbm_idx_mutex);
6680 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6681 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6685 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6687 gfx_v8_0_set_user_cu_inactive_bitmap(
6688 adev, disable_masks[i * 2 + j]);
6689 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6690 cu_info->bitmap[i][j] = bitmap;
6692 for (k = 0; k < 16; k ++) {
6693 if (bitmap & mask) {
6700 active_cu_number += counter;
6701 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6704 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6705 mutex_unlock(&adev->grbm_idx_mutex);
6707 cu_info->number = active_cu_number;
6708 cu_info->ao_cu_mask = ao_cu_mask;