GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "a6xx_gpu.h"
8 #include "a6xx_gmu.xml.h"
9
10 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
11 {
12         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
13         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
14
15         /* Check that the GMU is idle */
16         if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
17                 return false;
18
19         /* Check tha the CX master is idle */
20         if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
21                         ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
22                 return false;
23
24         return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
25                 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
26 }
27
28 bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
29 {
30         /* wait for CP to drain ringbuffer: */
31         if (!adreno_idle(gpu, ring))
32                 return false;
33
34         if (spin_until(_a6xx_check_idle(gpu))) {
35                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
36                         gpu->name, __builtin_return_address(0),
37                         gpu_read(gpu, REG_A6XX_RBBM_STATUS),
38                         gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
39                         gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
40                         gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
41                 return false;
42         }
43
44         return true;
45 }
46
47 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
48 {
49         uint32_t wptr;
50         unsigned long flags;
51
52         spin_lock_irqsave(&ring->lock, flags);
53
54         /* Copy the shadow to the actual register */
55         ring->cur = ring->next;
56
57         /* Make sure to wrap wptr if we need to */
58         wptr = get_wptr(ring);
59
60         spin_unlock_irqrestore(&ring->lock, flags);
61
62         /* Make sure everything is posted before making a decision */
63         mb();
64
65         gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
66 }
67
68 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
69         struct msm_file_private *ctx)
70 {
71         struct msm_drm_private *priv = gpu->dev->dev_private;
72         struct msm_ringbuffer *ring = submit->ring;
73         unsigned int i;
74
75         /* Invalidate CCU depth and color */
76         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
77         OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH);
78
79         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
80         OUT_RING(ring, PC_CCU_INVALIDATE_COLOR);
81
82         /* Submit the commands */
83         for (i = 0; i < submit->nr_cmds; i++) {
84                 switch (submit->cmd[i].type) {
85                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
86                         break;
87                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
88                         if (priv->lastctx == ctx)
89                                 break;
90                 case MSM_SUBMIT_CMD_BUF:
91                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
92                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
93                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
94                         OUT_RING(ring, submit->cmd[i].size);
95                         break;
96                 }
97         }
98
99         /* Write the fence to the scratch register */
100         OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
101         OUT_RING(ring, submit->seqno);
102
103         /*
104          * Execute a CACHE_FLUSH_TS event. This will ensure that the
105          * timestamp is written to the memory and then triggers the interrupt
106          */
107         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
108         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
109         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
110         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
111         OUT_RING(ring, submit->seqno);
112
113         a6xx_flush(gpu, ring);
114 }
115
116 static const struct {
117         u32 offset;
118         u32 value;
119 } a6xx_hwcg[] = {
120         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
121         {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
122         {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
123         {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
124         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
125         {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
126         {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
127         {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
128         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
129         {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
130         {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
131         {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
132         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
133         {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
134         {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
135         {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
136         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
137         {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
138         {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
139         {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
140         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
141         {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
142         {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
143         {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
144         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
145         {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
146         {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
147         {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
148         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
149         {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
150         {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
151         {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
152         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
153         {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
154         {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
155         {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
156         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
157         {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
158         {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
159         {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
160         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
161         {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
162         {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
163         {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
164         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
165         {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
166         {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
167         {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
168         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
169         {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
170         {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
171         {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
172         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
173         {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
174         {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
175         {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
176         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
177         {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
178         {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
179         {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
180         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
181         {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
182         {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
183         {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
184         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
185         {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
186         {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
187         {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
188         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
189         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
190         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
191         {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
192         {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
193         {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
194         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
195         {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
196         {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
197         {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
198         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
199         {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
200         {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
201         {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
202         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
203         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
204         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
205         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
206         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
207         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
208         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
209         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
210         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
211         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
212         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
213         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
214         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
215         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
216         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
217         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
218         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
219         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
220         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
221         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
222         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
223         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
224         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}
225 };
226
227 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
228 {
229         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
230         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
231         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
232         unsigned int i;
233         u32 val;
234
235         val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
236
237         /* Don't re-program the registers if they are already correct */
238         if ((!state && !val) || (state && (val == 0x8aa8aa02)))
239                 return;
240
241         /* Disable SP clock before programming HWCG registers */
242         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
243
244         for (i = 0; i < ARRAY_SIZE(a6xx_hwcg); i++)
245                 gpu_write(gpu, a6xx_hwcg[i].offset,
246                         state ? a6xx_hwcg[i].value : 0);
247
248         /* Enable SP clock */
249         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
250
251         gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? 0x8aa8aa02 : 0);
252 }
253
254 static int a6xx_cp_init(struct msm_gpu *gpu)
255 {
256         struct msm_ringbuffer *ring = gpu->rb[0];
257
258         OUT_PKT7(ring, CP_ME_INIT, 8);
259
260         OUT_RING(ring, 0x0000002f);
261
262         /* Enable multiple hardware contexts */
263         OUT_RING(ring, 0x00000003);
264
265         /* Enable error detection */
266         OUT_RING(ring, 0x20000000);
267
268         /* Don't enable header dump */
269         OUT_RING(ring, 0x00000000);
270         OUT_RING(ring, 0x00000000);
271
272         /* No workarounds enabled */
273         OUT_RING(ring, 0x00000000);
274
275         /* Pad rest of the cmds with 0's */
276         OUT_RING(ring, 0x00000000);
277         OUT_RING(ring, 0x00000000);
278
279         a6xx_flush(gpu, ring);
280         return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
281 }
282
283 static int a6xx_ucode_init(struct msm_gpu *gpu)
284 {
285         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
286         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
287
288         if (!a6xx_gpu->sqe_bo) {
289                 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
290                         adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
291
292                 if (IS_ERR(a6xx_gpu->sqe_bo)) {
293                         int ret = PTR_ERR(a6xx_gpu->sqe_bo);
294
295                         a6xx_gpu->sqe_bo = NULL;
296                         DRM_DEV_ERROR(&gpu->pdev->dev,
297                                 "Could not allocate SQE ucode: %d\n", ret);
298
299                         return ret;
300                 }
301         }
302
303         gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO,
304                 REG_A6XX_CP_SQE_INSTR_BASE_HI, a6xx_gpu->sqe_iova);
305
306         return 0;
307 }
308
309 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
310           A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
311           A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
312           A6XX_RBBM_INT_0_MASK_CP_IB2 | \
313           A6XX_RBBM_INT_0_MASK_CP_IB1 | \
314           A6XX_RBBM_INT_0_MASK_CP_RB | \
315           A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
316           A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
317           A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
318           A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
319           A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
320
321 static int a6xx_hw_init(struct msm_gpu *gpu)
322 {
323         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
324         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
325         int ret;
326
327         /* Make sure the GMU keeps the GPU on while we set it up */
328         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
329
330         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
331
332         /*
333          * Disable the trusted memory range - we don't actually supported secure
334          * memory rendering at this point in time and we don't want to block off
335          * part of the virtual memory space.
336          */
337         gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
338                 REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
339         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
340
341         /* enable hardware clockgating */
342         a6xx_set_hwcg(gpu, true);
343
344         /* VBIF start */
345         gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
346         gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
347
348         /* Make all blocks contribute to the GPU BUSY perf counter */
349         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
350
351         /* Disable L2 bypass in the UCHE */
352         gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
353         gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
354         gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
355         gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
356         gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
357         gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
358
359         /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
360         gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
361                 REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
362
363         gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
364                 REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
365                 0x00100000 + adreno_gpu->gmem - 1);
366
367         gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
368         gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
369
370         gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
371         gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
372
373         /* Setting the mem pool size */
374         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
375
376         /* Setting the primFifo thresholds default values */
377         gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11));
378
379         /* Set the AHB default slave response to "ERROR" */
380         gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
381
382         /* Turn on performance counters */
383         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
384
385         /* Select CP0 to always count cycles */
386         gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
387
388         /* FIXME: not sure if this should live here or in a6xx_gmu.c */
389         gmu_write(&a6xx_gpu->gmu,  REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK,
390                 0xff000000);
391         gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
392                 0xff, 0x20);
393         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE,
394                 0x01);
395
396         gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
397         gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
398         gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
399         gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21);
400
401         /* Enable fault detection */
402         gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
403                 (1 << 30) | 0x1fffff);
404
405         gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
406
407         /* Protect registers from the CP */
408         gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003);
409
410         gpu_write(gpu, REG_A6XX_CP_PROTECT(0),
411                 A6XX_PROTECT_RDONLY(0x600, 0x51));
412         gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2));
413         gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13));
414         gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8));
415         gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1));
416         gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187));
417         gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810));
418         gpu_write(gpu, REG_A6XX_CP_PROTECT(7),
419                 A6XX_PROTECT_RDONLY(0xfc00, 0x3));
420         gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0));
421         gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0));
422         gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0));
423         gpu_write(gpu, REG_A6XX_CP_PROTECT(11),
424                 A6XX_PROTECT_RDONLY(0x0, 0x4f9));
425         gpu_write(gpu, REG_A6XX_CP_PROTECT(12),
426                 A6XX_PROTECT_RDONLY(0x501, 0xa));
427         gpu_write(gpu, REG_A6XX_CP_PROTECT(13),
428                 A6XX_PROTECT_RDONLY(0x511, 0x44));
429         gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe));
430         gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0));
431         gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf));
432         gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0));
433         gpu_write(gpu, REG_A6XX_CP_PROTECT(18),
434                 A6XX_PROTECT_RW(0xbe20, 0x11f3));
435         gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82));
436         gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8));
437         gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19));
438         gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d));
439         gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76));
440         gpu_write(gpu, REG_A6XX_CP_PROTECT(24),
441                         A6XX_PROTECT_RDONLY(0x8d0, 0x23));
442         gpu_write(gpu, REG_A6XX_CP_PROTECT(25),
443                         A6XX_PROTECT_RDONLY(0x980, 0x4));
444         gpu_write(gpu, REG_A6XX_CP_PROTECT(26), A6XX_PROTECT_RW(0xa630, 0x0));
445
446         /* Enable interrupts */
447         gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
448
449         ret = adreno_hw_init(gpu);
450         if (ret)
451                 goto out;
452
453         ret = a6xx_ucode_init(gpu);
454         if (ret)
455                 goto out;
456
457         /* Always come up on rb 0 */
458         a6xx_gpu->cur_ring = gpu->rb[0];
459
460         /* Enable the SQE_to start the CP engine */
461         gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
462
463         ret = a6xx_cp_init(gpu);
464         if (ret)
465                 goto out;
466
467         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
468
469 out:
470         /*
471          * Tell the GMU that we are done touching the GPU and it can start power
472          * management
473          */
474         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
475
476         /* Take the GMU out of its special boot mode */
477         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
478
479         return ret;
480 }
481
482 static void a6xx_dump(struct msm_gpu *gpu)
483 {
484         dev_info(&gpu->pdev->dev, "status:   %08x\n",
485                         gpu_read(gpu, REG_A6XX_RBBM_STATUS));
486         adreno_dump(gpu);
487 }
488
489 #define VBIF_RESET_ACK_TIMEOUT  100
490 #define VBIF_RESET_ACK_MASK     0x00f0
491
492 static void a6xx_recover(struct msm_gpu *gpu)
493 {
494         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
495         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
496         int i;
497
498         adreno_dump_info(gpu);
499
500         for (i = 0; i < 8; i++)
501                 dev_info(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
502                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
503
504         if (hang_debug)
505                 a6xx_dump(gpu);
506
507         /*
508          * Turn off keep alive that might have been enabled by the hang
509          * interrupt
510          */
511         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
512
513         gpu->funcs->pm_suspend(gpu);
514         gpu->funcs->pm_resume(gpu);
515
516         msm_gpu_hw_init(gpu);
517 }
518
519 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags)
520 {
521         struct msm_gpu *gpu = arg;
522
523         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
524                         iova, flags,
525                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
526                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
527                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
528                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
529
530         return -EFAULT;
531 }
532
533 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
534 {
535         u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
536
537         if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
538                 u32 val;
539
540                 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
541                 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
542                 dev_err_ratelimited(&gpu->pdev->dev,
543                         "CP | opcode error | possible opcode=0x%8.8X\n",
544                         val);
545         }
546
547         if (status & A6XX_CP_INT_CP_UCODE_ERROR)
548                 dev_err_ratelimited(&gpu->pdev->dev,
549                         "CP ucode error interrupt\n");
550
551         if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
552                 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
553                         gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
554
555         if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
556                 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
557
558                 dev_err_ratelimited(&gpu->pdev->dev,
559                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
560                         val & (1 << 20) ? "READ" : "WRITE",
561                         (val & 0x3ffff), val);
562         }
563
564         if (status & A6XX_CP_INT_CP_AHB_ERROR)
565                 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
566
567         if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
568                 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
569
570         if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
571                 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
572
573 }
574
575 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
576 {
577         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
578         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
579         struct drm_device *dev = gpu->dev;
580         struct msm_drm_private *priv = dev->dev_private;
581         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
582
583         /*
584          * Force the GPU to stay on until after we finish
585          * collecting information
586          */
587         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
588
589         DRM_DEV_ERROR(&gpu->pdev->dev,
590                 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
591                 ring ? ring->id : -1, ring ? ring->seqno : 0,
592                 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
593                 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
594                 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
595                 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
596                 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
597                 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
598                 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
599
600         /* Turn off the hangcheck timer to keep it from bothering us */
601         del_timer(&gpu->hangcheck_timer);
602
603         queue_work(priv->wq, &gpu->recover_work);
604 }
605
606 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
607 {
608         u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
609
610         gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
611
612         if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
613                 a6xx_fault_detect_irq(gpu);
614
615         if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
616                 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
617
618         if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
619                 a6xx_cp_hw_err_irq(gpu);
620
621         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
622                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
623
624         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
625                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
626
627         if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
628                 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
629
630         if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
631                 msm_gpu_retire(gpu);
632
633         return IRQ_HANDLED;
634 }
635
636 static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
637         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE),
638         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI),
639         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR,
640                 REG_A6XX_CP_RB_RPTR_ADDR_LO),
641         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
642                 REG_A6XX_CP_RB_RPTR_ADDR_HI),
643         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR),
644         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR),
645         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
646 };
647
648 static const u32 a6xx_registers[] = {
649         0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
650         0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
651         0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
652         0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d,
653         0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511,
654         0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813,
655         0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843,
656         0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4,
657         0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911,
658         0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996,
659         0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1,
660         0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06,
661         0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19,
662         0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601,
663         0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637,
664         0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c,
665         0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f,
666         0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77,
667         0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e,
668         0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23,
669         0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79,
670         0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a,
671         0xa610, 0xa617, 0xa630, 0xa630,
672         ~0
673 };
674
675 static int a6xx_pm_resume(struct msm_gpu *gpu)
676 {
677         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
678         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
679         int ret;
680
681         ret = a6xx_gmu_resume(a6xx_gpu);
682
683         gpu->needs_hw_init = true;
684
685         return ret;
686 }
687
688 static int a6xx_pm_suspend(struct msm_gpu *gpu)
689 {
690         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
691         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
692
693         /*
694          * Make sure the GMU is idle before continuing (because some transitions
695          * may use VBIF
696          */
697         a6xx_gmu_wait_for_idle(a6xx_gpu);
698
699         /* Clear the VBIF pipe before shutting down */
700         /* FIXME: This accesses the GPU - do we need to make sure it is on? */
701         gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
702         spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf);
703         gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
704
705         return a6xx_gmu_stop(a6xx_gpu);
706 }
707
708 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
709 {
710         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
711         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
712
713         /* Force the GPU power on so we can read this register */
714         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
715
716         *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
717                 REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
718
719         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
720         return 0;
721 }
722
723 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
724 static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
725                 struct drm_printer *p)
726 {
727         adreno_show(gpu, state, p);
728 }
729 #endif
730
731 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
732 {
733         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
734         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
735
736         return a6xx_gpu->cur_ring;
737 }
738
739 static void a6xx_destroy(struct msm_gpu *gpu)
740 {
741         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
742         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
743
744         if (a6xx_gpu->sqe_bo) {
745                 if (a6xx_gpu->sqe_iova)
746                         msm_gem_put_iova(a6xx_gpu->sqe_bo, gpu->aspace);
747                 drm_gem_object_unreference_unlocked(a6xx_gpu->sqe_bo);
748         }
749
750         a6xx_gmu_remove(a6xx_gpu);
751
752         adreno_gpu_cleanup(adreno_gpu);
753         kfree(a6xx_gpu);
754 }
755
756 static const struct adreno_gpu_funcs funcs = {
757         .base = {
758                 .get_param = adreno_get_param,
759                 .hw_init = a6xx_hw_init,
760                 .pm_suspend = a6xx_pm_suspend,
761                 .pm_resume = a6xx_pm_resume,
762                 .recover = a6xx_recover,
763                 .submit = a6xx_submit,
764                 .flush = a6xx_flush,
765                 .active_ring = a6xx_active_ring,
766                 .irq = a6xx_irq,
767                 .destroy = a6xx_destroy,
768 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
769                 .show = a6xx_show,
770 #endif
771         },
772         .get_timestamp = a6xx_get_timestamp,
773 };
774
775 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
776 {
777         struct msm_drm_private *priv = dev->dev_private;
778         struct platform_device *pdev = priv->gpu_pdev;
779         struct device_node *node;
780         struct a6xx_gpu *a6xx_gpu;
781         struct adreno_gpu *adreno_gpu;
782         struct msm_gpu *gpu;
783         int ret;
784
785         a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
786         if (!a6xx_gpu)
787                 return ERR_PTR(-ENOMEM);
788
789         adreno_gpu = &a6xx_gpu->base;
790         gpu = &adreno_gpu->base;
791
792         adreno_gpu->registers = a6xx_registers;
793         adreno_gpu->reg_offsets = a6xx_register_offsets;
794
795         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
796         if (ret) {
797                 a6xx_destroy(&(a6xx_gpu->base.base));
798                 return ERR_PTR(ret);
799         }
800
801         /* Check if there is a GMU phandle and set it up */
802         node = of_parse_phandle(pdev->dev.of_node, "gmu", 0);
803
804         /* FIXME: How do we gracefully handle this? */
805         BUG_ON(!node);
806
807         ret = a6xx_gmu_probe(a6xx_gpu, node);
808         if (ret) {
809                 a6xx_destroy(&(a6xx_gpu->base.base));
810                 return ERR_PTR(ret);
811         }
812
813         if (gpu->aspace)
814                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
815                                 a6xx_fault_handler);
816
817         return gpu;
818 }