GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61         struct amdgpu_device *adev = ring->adev;
62
63         if (ring == &adev->vce.ring[0])
64                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65         else if (ring == &adev->vce.ring[1])
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67         else
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80         struct amdgpu_device *adev = ring->adev;
81
82         if (ring->use_doorbell)
83                 return adev->wb.wb[ring->wptr_offs];
84
85         if (ring == &adev->vce.ring[0])
86                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87         else if (ring == &adev->vce.ring[1])
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89         else
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102         struct amdgpu_device *adev = ring->adev;
103
104         if (ring->use_doorbell) {
105                 /* XXX check if swapping is necessary on BE */
106                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108                 return;
109         }
110
111         if (ring == &adev->vce.ring[0])
112                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113                         lower_32_bits(ring->wptr));
114         else if (ring == &adev->vce.ring[1])
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116                         lower_32_bits(ring->wptr));
117         else
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119                         lower_32_bits(ring->wptr));
120 }
121
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124         int i, j;
125
126         for (i = 0; i < 10; ++i) {
127                 for (j = 0; j < 100; ++j) {
128                         uint32_t status =
129                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132                                 return 0;
133                         mdelay(10);
134                 }
135
136                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140                 mdelay(10);
141                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144
145         }
146
147         return -ETIMEDOUT;
148 }
149
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151                                 struct amdgpu_mm_table *table)
152 {
153         uint32_t data = 0, loop;
154         uint64_t addr = table->gpu_addr;
155         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156         uint32_t size;
157
158         size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164         /* 2, update vmid of descriptor */
165         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170         /* 3, notify mmsch about the size of this descriptor */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173         /* 4, set resp to zero */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
177         adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
178         adev->vce.ring[0].wptr = 0;
179         adev->vce.ring[0].wptr_old = 0;
180
181         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
182         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
183
184         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
185         loop = 1000;
186         while ((data & 0x10000002) != 0x10000002) {
187                 udelay(10);
188                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
189                 loop--;
190                 if (!loop)
191                         break;
192         }
193
194         if (!loop) {
195                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
196                 return -EBUSY;
197         }
198
199         return 0;
200 }
201
202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
203 {
204         struct amdgpu_ring *ring;
205         uint32_t offset, size;
206         uint32_t table_size = 0;
207         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
208         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
209         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
210         struct mmsch_v1_0_cmd_end end = { { 0 } };
211         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
212         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
213
214         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
215         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
216         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
217         end.cmd_header.command_type = MMSCH_COMMAND__END;
218
219         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
220                 header->version = MMSCH_VERSION;
221                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
222
223                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
224                         header->vce_table_offset = header->header_size;
225                 else
226                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
227
228                 init_table += header->vce_table_offset;
229
230                 ring = &adev->vce.ring[0];
231                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
232                                             lower_32_bits(ring->gpu_addr));
233                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
234                                             upper_32_bits(ring->gpu_addr));
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
236                                             ring->ring_size / 4);
237
238                 /* BEGING OF MC_RESUME */
239                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
240                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
241                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
242                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
243                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
244
245                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
246                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
247                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
249                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
250                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
251                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
252                 } else {
253                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
254                                                 adev->vce.gpu_addr >> 8);
255                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
256                                                 adev->vce.gpu_addr >> 8);
257                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
258                                                 adev->vce.gpu_addr >> 8);
259                 }
260
261                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
262                 size = VCE_V4_0_FW_SIZE;
263                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
264                                             offset & 0x7FFFFFFF);
265                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
266
267                 offset += size;
268                 size = VCE_V4_0_STACK_SIZE;
269                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
270                                             offset & 0x7FFFFFFF);
271                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
272
273                 offset += size;
274                 size = VCE_V4_0_DATA_SIZE;
275                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
276                                             offset & 0x7FFFFFFF);
277                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
278
279                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
280                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
281                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
282                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
283
284                 /* end of MC_RESUME */
285                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
286                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
287                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
288                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
289                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
290                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
291
292                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
294                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
295
296                 /* clear BUSY flag */
297                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
298                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
299
300                 /* add end packet */
301                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
302                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
303                 header->vce_table_size = table_size;
304         }
305
306         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
307 }
308
309 /**
310  * vce_v4_0_start - start VCE block
311  *
312  * @adev: amdgpu_device pointer
313  *
314  * Setup and start the VCE block
315  */
316 static int vce_v4_0_start(struct amdgpu_device *adev)
317 {
318         struct amdgpu_ring *ring;
319         int r;
320
321         ring = &adev->vce.ring[0];
322
323         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
324         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
325         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
326         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
327         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
328
329         ring = &adev->vce.ring[1];
330
331         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
332         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
333         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
334         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
335         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
336
337         ring = &adev->vce.ring[2];
338
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
340         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
341         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
342         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
343         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
344
345         vce_v4_0_mc_resume(adev);
346         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
347                         ~VCE_STATUS__JOB_BUSY_MASK);
348
349         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
350
351         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
352                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
353         mdelay(100);
354
355         r = vce_v4_0_firmware_loaded(adev);
356
357         /* clear BUSY flag */
358         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
359
360         if (r) {
361                 DRM_ERROR("VCE not responding, giving up!!!\n");
362                 return r;
363         }
364
365         return 0;
366 }
367
368 static int vce_v4_0_stop(struct amdgpu_device *adev)
369 {
370
371         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
372
373         /* hold on ECPU */
374         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
375                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
376                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
377
378         /* clear BUSY flag */
379         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
380
381         /* Set Clock-Gating off */
382         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
383                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
384         */
385
386         return 0;
387 }
388
389 static int vce_v4_0_early_init(void *handle)
390 {
391         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
392
393         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
394                 adev->vce.num_rings = 1;
395         else
396                 adev->vce.num_rings = 3;
397
398         vce_v4_0_set_ring_funcs(adev);
399         vce_v4_0_set_irq_funcs(adev);
400
401         return 0;
402 }
403
404 static int vce_v4_0_sw_init(void *handle)
405 {
406         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
407         struct amdgpu_ring *ring;
408         unsigned size;
409         int r, i;
410
411         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
412         if (r)
413                 return r;
414
415         size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
416         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
417                 size += VCE_V4_0_FW_SIZE;
418
419         r = amdgpu_vce_sw_init(adev, size);
420         if (r)
421                 return r;
422
423         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
424                 const struct common_firmware_header *hdr;
425                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
426
427                 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
428                 if (!adev->vce.saved_bo)
429                         return -ENOMEM;
430
431                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
432                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
433                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
434                 adev->firmware.fw_size +=
435                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
436                 DRM_INFO("PSP loading VCE firmware\n");
437         } else {
438                 r = amdgpu_vce_resume(adev);
439                 if (r)
440                         return r;
441         }
442
443         for (i = 0; i < adev->vce.num_rings; i++) {
444                 ring = &adev->vce.ring[i];
445                 sprintf(ring->name, "vce%d", i);
446                 if (amdgpu_sriov_vf(adev)) {
447                         /* DOORBELL only works under SRIOV */
448                         ring->use_doorbell = true;
449
450                         /* currently only use the first encoding ring for sriov,
451                          * so set unused location for other unused rings.
452                          */
453                         if (i == 0)
454                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
455                         else
456                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
457                 }
458                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
459                 if (r)
460                         return r;
461         }
462
463         r = amdgpu_virt_alloc_mm_table(adev);
464         if (r)
465                 return r;
466
467         return r;
468 }
469
470 static int vce_v4_0_sw_fini(void *handle)
471 {
472         int r;
473         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
474
475         /* free MM table */
476         amdgpu_virt_free_mm_table(adev);
477
478         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
479                 kfree(adev->vce.saved_bo);
480                 adev->vce.saved_bo = NULL;
481         }
482
483         r = amdgpu_vce_suspend(adev);
484         if (r)
485                 return r;
486
487         return amdgpu_vce_sw_fini(adev);
488 }
489
490 static int vce_v4_0_hw_init(void *handle)
491 {
492         int r, i;
493         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494
495         if (amdgpu_sriov_vf(adev))
496                 r = vce_v4_0_sriov_start(adev);
497         else
498                 r = vce_v4_0_start(adev);
499         if (r)
500                 return r;
501
502         for (i = 0; i < adev->vce.num_rings; i++)
503                 adev->vce.ring[i].ready = false;
504
505         for (i = 0; i < adev->vce.num_rings; i++) {
506                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
507                 if (r)
508                         return r;
509                 else
510                         adev->vce.ring[i].ready = true;
511         }
512
513         DRM_INFO("VCE initialized successfully.\n");
514
515         return 0;
516 }
517
518 static int vce_v4_0_hw_fini(void *handle)
519 {
520         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521         int i;
522
523         if (!amdgpu_sriov_vf(adev)) {
524                 /* vce_v4_0_wait_for_idle(handle); */
525                 vce_v4_0_stop(adev);
526         } else {
527                 /* full access mode, so don't touch any VCE register */
528                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
529         }
530
531         for (i = 0; i < adev->vce.num_rings; i++)
532                 adev->vce.ring[i].ready = false;
533
534         return 0;
535 }
536
537 static int vce_v4_0_suspend(void *handle)
538 {
539         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
540         int r;
541
542         if (adev->vce.vcpu_bo == NULL)
543                 return 0;
544
545         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
546                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
547                 void *ptr = adev->vce.cpu_addr;
548
549                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
550         }
551
552         r = vce_v4_0_hw_fini(adev);
553         if (r)
554                 return r;
555
556         return amdgpu_vce_suspend(adev);
557 }
558
559 static int vce_v4_0_resume(void *handle)
560 {
561         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
562         int r;
563
564         if (adev->vce.vcpu_bo == NULL)
565                 return -EINVAL;
566
567         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
568                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
569                 void *ptr = adev->vce.cpu_addr;
570
571                 memcpy_toio(ptr, adev->vce.saved_bo, size);
572         } else {
573                 r = amdgpu_vce_resume(adev);
574                 if (r)
575                         return r;
576         }
577
578         return vce_v4_0_hw_init(adev);
579 }
580
581 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
582 {
583         uint32_t offset, size;
584
585         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
586         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
587         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
588         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
589
590         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
591         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
592         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
593         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
594         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
595
596         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
597                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
598                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
599                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
600                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
601         } else {
602                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
603                         (adev->vce.gpu_addr >> 8));
604                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
605                         (adev->vce.gpu_addr >> 40) & 0xff);
606         }
607
608         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
609         size = VCE_V4_0_FW_SIZE;
610         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
611         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
612
613         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
614         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
615         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
616         size = VCE_V4_0_STACK_SIZE;
617         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
618         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
619
620         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
621         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
622         offset += size;
623         size = VCE_V4_0_DATA_SIZE;
624         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
625         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
626
627         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
628         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
629                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
630                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
631 }
632
633 static int vce_v4_0_set_clockgating_state(void *handle,
634                                           enum amd_clockgating_state state)
635 {
636         /* needed for driver unload*/
637         return 0;
638 }
639
640 #if 0
641 static bool vce_v4_0_is_idle(void *handle)
642 {
643         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
644         u32 mask = 0;
645
646         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
647         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
648
649         return !(RREG32(mmSRBM_STATUS2) & mask);
650 }
651
652 static int vce_v4_0_wait_for_idle(void *handle)
653 {
654         unsigned i;
655         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
656
657         for (i = 0; i < adev->usec_timeout; i++)
658                 if (vce_v4_0_is_idle(handle))
659                         return 0;
660
661         return -ETIMEDOUT;
662 }
663
664 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
665 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
666 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
667 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
668                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
669
670 static bool vce_v4_0_check_soft_reset(void *handle)
671 {
672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673         u32 srbm_soft_reset = 0;
674
675         /* According to VCE team , we should use VCE_STATUS instead
676          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
677          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
678          * instance's registers are accessed
679          * (0 for 1st instance, 10 for 2nd instance).
680          *
681          *VCE_STATUS
682          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
683          *|----+----+-----------+----+----+----+----------+---------+----|
684          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
685          *
686          * VCE team suggest use bit 3--bit 6 for busy status check
687          */
688         mutex_lock(&adev->grbm_idx_mutex);
689         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
690         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
691                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
692                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
693         }
694         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
695         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
696                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
697                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
698         }
699         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
700         mutex_unlock(&adev->grbm_idx_mutex);
701
702         if (srbm_soft_reset) {
703                 adev->vce.srbm_soft_reset = srbm_soft_reset;
704                 return true;
705         } else {
706                 adev->vce.srbm_soft_reset = 0;
707                 return false;
708         }
709 }
710
711 static int vce_v4_0_soft_reset(void *handle)
712 {
713         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714         u32 srbm_soft_reset;
715
716         if (!adev->vce.srbm_soft_reset)
717                 return 0;
718         srbm_soft_reset = adev->vce.srbm_soft_reset;
719
720         if (srbm_soft_reset) {
721                 u32 tmp;
722
723                 tmp = RREG32(mmSRBM_SOFT_RESET);
724                 tmp |= srbm_soft_reset;
725                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
726                 WREG32(mmSRBM_SOFT_RESET, tmp);
727                 tmp = RREG32(mmSRBM_SOFT_RESET);
728
729                 udelay(50);
730
731                 tmp &= ~srbm_soft_reset;
732                 WREG32(mmSRBM_SOFT_RESET, tmp);
733                 tmp = RREG32(mmSRBM_SOFT_RESET);
734
735                 /* Wait a little for things to settle down */
736                 udelay(50);
737         }
738
739         return 0;
740 }
741
742 static int vce_v4_0_pre_soft_reset(void *handle)
743 {
744         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
745
746         if (!adev->vce.srbm_soft_reset)
747                 return 0;
748
749         mdelay(5);
750
751         return vce_v4_0_suspend(adev);
752 }
753
754
755 static int vce_v4_0_post_soft_reset(void *handle)
756 {
757         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
758
759         if (!adev->vce.srbm_soft_reset)
760                 return 0;
761
762         mdelay(5);
763
764         return vce_v4_0_resume(adev);
765 }
766
767 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
768 {
769         u32 tmp, data;
770
771         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
772         if (override)
773                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
774         else
775                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
776
777         if (tmp != data)
778                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
779 }
780
781 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
782                                              bool gated)
783 {
784         u32 data;
785
786         /* Set Override to disable Clock Gating */
787         vce_v4_0_override_vce_clock_gating(adev, true);
788
789         /* This function enables MGCG which is controlled by firmware.
790            With the clocks in the gated state the core is still
791            accessible but the firmware will throttle the clocks on the
792            fly as necessary.
793         */
794         if (gated) {
795                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
796                 data |= 0x1ff;
797                 data &= ~0xef0000;
798                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
799
800                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
801                 data |= 0x3ff000;
802                 data &= ~0xffc00000;
803                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
804
805                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
806                 data |= 0x2;
807                 data &= ~0x00010000;
808                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
809
810                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
811                 data |= 0x37f;
812                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
813
814                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
815                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
816                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
817                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
818                         0x8;
819                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
820         } else {
821                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822                 data &= ~0x80010;
823                 data |= 0xe70008;
824                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
825
826                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827                 data |= 0xffc00000;
828                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
829
830                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
831                 data |= 0x10000;
832                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
833
834                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
835                 data &= ~0xffc00000;
836                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
837
838                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
839                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
840                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
841                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
842                           0x8);
843                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
844         }
845         vce_v4_0_override_vce_clock_gating(adev, false);
846 }
847
848 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
849 {
850         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
851
852         if (enable)
853                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
854         else
855                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
856
857         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
858 }
859
860 static int vce_v4_0_set_clockgating_state(void *handle,
861                                           enum amd_clockgating_state state)
862 {
863         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
864         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
865         int i;
866
867         if ((adev->asic_type == CHIP_POLARIS10) ||
868                 (adev->asic_type == CHIP_TONGA) ||
869                 (adev->asic_type == CHIP_FIJI))
870                 vce_v4_0_set_bypass_mode(adev, enable);
871
872         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
873                 return 0;
874
875         mutex_lock(&adev->grbm_idx_mutex);
876         for (i = 0; i < 2; i++) {
877                 /* Program VCE Instance 0 or 1 if not harvested */
878                 if (adev->vce.harvest_config & (1 << i))
879                         continue;
880
881                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
882
883                 if (enable) {
884                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
885                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
886                         data &= ~(0xf | 0xff0);
887                         data |= ((0x0 << 0) | (0x04 << 4));
888                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
889
890                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
891                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
892                         data &= ~(0xf | 0xff0);
893                         data |= ((0x0 << 0) | (0x04 << 4));
894                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
895                 }
896
897                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
898         }
899
900         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
901         mutex_unlock(&adev->grbm_idx_mutex);
902
903         return 0;
904 }
905
906 static int vce_v4_0_set_powergating_state(void *handle,
907                                           enum amd_powergating_state state)
908 {
909         /* This doesn't actually powergate the VCE block.
910          * That's done in the dpm code via the SMC.  This
911          * just re-inits the block as necessary.  The actual
912          * gating still happens in the dpm code.  We should
913          * revisit this when there is a cleaner line between
914          * the smc and the hw blocks
915          */
916         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
917
918         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
919                 return 0;
920
921         if (state == AMD_PG_STATE_GATE)
922                 /* XXX do we need a vce_v4_0_stop()? */
923                 return 0;
924         else
925                 return vce_v4_0_start(adev);
926 }
927 #endif
928
929 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
930                 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
931 {
932         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
933         amdgpu_ring_write(ring, vm_id);
934         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
935         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
936         amdgpu_ring_write(ring, ib->length_dw);
937 }
938
939 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
940                         u64 seq, unsigned flags)
941 {
942         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
943
944         amdgpu_ring_write(ring, VCE_CMD_FENCE);
945         amdgpu_ring_write(ring, addr);
946         amdgpu_ring_write(ring, upper_32_bits(addr));
947         amdgpu_ring_write(ring, seq);
948         amdgpu_ring_write(ring, VCE_CMD_TRAP);
949 }
950
951 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
952 {
953         amdgpu_ring_write(ring, VCE_CMD_END);
954 }
955
956 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
957                          unsigned int vm_id, uint64_t pd_addr)
958 {
959         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
960         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
961         unsigned eng = ring->vm_inv_eng;
962
963         pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
964         pd_addr |= AMDGPU_PTE_VALID;
965
966         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
967         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
968         amdgpu_ring_write(ring, upper_32_bits(pd_addr));
969
970         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
971         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
972         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
973
974         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
975         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
976         amdgpu_ring_write(ring, 0xffffffff);
977         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
978
979         /* flush TLB */
980         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
981         amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
982         amdgpu_ring_write(ring, req);
983
984         /* wait for flush */
985         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
986         amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
987         amdgpu_ring_write(ring, 1 << vm_id);
988         amdgpu_ring_write(ring, 1 << vm_id);
989 }
990
991 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
992                                         struct amdgpu_irq_src *source,
993                                         unsigned type,
994                                         enum amdgpu_interrupt_state state)
995 {
996         uint32_t val = 0;
997
998         if (!amdgpu_sriov_vf(adev)) {
999                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1000                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1001
1002                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1003                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1004         }
1005         return 0;
1006 }
1007
1008 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1009                                       struct amdgpu_irq_src *source,
1010                                       struct amdgpu_iv_entry *entry)
1011 {
1012         DRM_DEBUG("IH: VCE\n");
1013
1014         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1015                         VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1016                         ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1017
1018         switch (entry->src_data[0]) {
1019         case 0:
1020         case 1:
1021         case 2:
1022                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1023                 break;
1024         default:
1025                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1026                           entry->src_id, entry->src_data[0]);
1027                 break;
1028         }
1029
1030         return 0;
1031 }
1032
1033 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1034         .name = "vce_v4_0",
1035         .early_init = vce_v4_0_early_init,
1036         .late_init = NULL,
1037         .sw_init = vce_v4_0_sw_init,
1038         .sw_fini = vce_v4_0_sw_fini,
1039         .hw_init = vce_v4_0_hw_init,
1040         .hw_fini = vce_v4_0_hw_fini,
1041         .suspend = vce_v4_0_suspend,
1042         .resume = vce_v4_0_resume,
1043         .is_idle = NULL /* vce_v4_0_is_idle */,
1044         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1045         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1046         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1047         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1048         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1049         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1050         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1051 };
1052
1053 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1054         .type = AMDGPU_RING_TYPE_VCE,
1055         .align_mask = 0x3f,
1056         .nop = VCE_CMD_NO_OP,
1057         .support_64bit_ptrs = false,
1058         .vmhub = AMDGPU_MMHUB,
1059         .get_rptr = vce_v4_0_ring_get_rptr,
1060         .get_wptr = vce_v4_0_ring_get_wptr,
1061         .set_wptr = vce_v4_0_ring_set_wptr,
1062         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1063         .emit_frame_size =
1064                 17 + /* vce_v4_0_emit_vm_flush */
1065                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1066                 1, /* vce_v4_0_ring_insert_end */
1067         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1068         .emit_ib = vce_v4_0_ring_emit_ib,
1069         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1070         .emit_fence = vce_v4_0_ring_emit_fence,
1071         .test_ring = amdgpu_vce_ring_test_ring,
1072         .test_ib = amdgpu_vce_ring_test_ib,
1073         .insert_nop = amdgpu_ring_insert_nop,
1074         .insert_end = vce_v4_0_ring_insert_end,
1075         .pad_ib = amdgpu_ring_generic_pad_ib,
1076         .begin_use = amdgpu_vce_ring_begin_use,
1077         .end_use = amdgpu_vce_ring_end_use,
1078 };
1079
1080 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1081 {
1082         int i;
1083
1084         for (i = 0; i < adev->vce.num_rings; i++)
1085                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1086         DRM_INFO("VCE enabled in VM mode\n");
1087 }
1088
1089 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1090         .set = vce_v4_0_set_interrupt_state,
1091         .process = vce_v4_0_process_interrupt,
1092 };
1093
1094 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1095 {
1096         adev->vce.irq.num_types = 1;
1097         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1098 };
1099
1100 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1101 {
1102         .type = AMD_IP_BLOCK_TYPE_VCE,
1103         .major = 4,
1104         .minor = 0,
1105         .rev = 0,
1106         .funcs = &vce_v4_0_ip_funcs,
1107 };