GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
45
46 #define VCE_V4_0_FW_SIZE        (384 * 1024)
47 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
48 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63         struct amdgpu_device *adev = ring->adev;
64
65         if (ring->me == 0)
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67         else if (ring->me == 1)
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69         else
70                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82         struct amdgpu_device *adev = ring->adev;
83
84         if (ring->use_doorbell)
85                 return adev->wb.wb[ring->wptr_offs];
86
87         if (ring->me == 0)
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89         else if (ring->me == 1)
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91         else
92                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104         struct amdgpu_device *adev = ring->adev;
105
106         if (ring->use_doorbell) {
107                 /* XXX check if swapping is necessary on BE */
108                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110                 return;
111         }
112
113         if (ring->me == 0)
114                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115                         lower_32_bits(ring->wptr));
116         else if (ring->me == 1)
117                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118                         lower_32_bits(ring->wptr));
119         else
120                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121                         lower_32_bits(ring->wptr));
122 }
123
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126         int i, j;
127
128         for (i = 0; i < 10; ++i) {
129                 for (j = 0; j < 100; ++j) {
130                         uint32_t status =
131                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134                                 return 0;
135                         mdelay(10);
136                 }
137
138                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142                 mdelay(10);
143                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145                 mdelay(10);
146
147         }
148
149         return -ETIMEDOUT;
150 }
151
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153                                 struct amdgpu_mm_table *table)
154 {
155         uint32_t data = 0, loop;
156         uint64_t addr = table->gpu_addr;
157         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158         uint32_t size;
159
160         size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166         /* 2, update vmid of descriptor */
167         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172         /* 3, notify mmsch about the size of this descriptor */
173         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175         /* 4, set resp to zero */
176         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179         adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180         adev->vce.ring[0].wptr = 0;
181         adev->vce.ring[0].wptr_old = 0;
182
183         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187         loop = 1000;
188         while ((data & 0x10000002) != 0x10000002) {
189                 udelay(10);
190                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191                 loop--;
192                 if (!loop)
193                         break;
194         }
195
196         if (!loop) {
197                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198                 return -EBUSY;
199         }
200
201         return 0;
202 }
203
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206         struct amdgpu_ring *ring;
207         uint32_t offset, size;
208         uint32_t table_size = 0;
209         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212         struct mmsch_v1_0_cmd_end end = { { 0 } };
213         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219         end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222                 header->version = MMSCH_VERSION;
223                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226                         header->vce_table_offset = header->header_size;
227                 else
228                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230                 init_table += header->vce_table_offset;
231
232                 ring = &adev->vce.ring[0];
233                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234                                             lower_32_bits(ring->gpu_addr));
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236                                             upper_32_bits(ring->gpu_addr));
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238                                             ring->ring_size / 4);
239
240                 /* BEGING OF MC_RESUME */
241                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
248                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
251                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
252                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
253                                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
254                 } else {
255                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
257                                                 adev->vce.gpu_addr >> 8);
258                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
259                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
260                                                 (adev->vce.gpu_addr >> 40) & 0xff);
261                 }
262                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
264                                                 adev->vce.gpu_addr >> 8);
265                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
267                                                 (adev->vce.gpu_addr >> 40) & 0xff);
268                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
270                                                 adev->vce.gpu_addr >> 8);
271                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
273                                                 (adev->vce.gpu_addr >> 40) & 0xff);
274
275                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
276                 size = VCE_V4_0_FW_SIZE;
277                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
278                                         offset & ~0x0f000000);
279                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
280
281                 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
282                 size = VCE_V4_0_STACK_SIZE;
283                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
284                                         (offset & ~0x0f000000) | (1 << 24));
285                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
286
287                 offset += size;
288                 size = VCE_V4_0_DATA_SIZE;
289                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
290                                         (offset & ~0x0f000000) | (2 << 24));
291                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
292
293                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
294                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
295                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
296                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
297
298                 /* end of MC_RESUME */
299                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
300                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
301                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
302                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
303                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
304                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
305
306                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
307                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
308                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
309
310                 /* clear BUSY flag */
311                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
313
314                 /* add end packet */
315                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
316                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
317                 header->vce_table_size = table_size;
318         }
319
320         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
321 }
322
323 /**
324  * vce_v4_0_start - start VCE block
325  *
326  * @adev: amdgpu_device pointer
327  *
328  * Setup and start the VCE block
329  */
330 static int vce_v4_0_start(struct amdgpu_device *adev)
331 {
332         struct amdgpu_ring *ring;
333         int r;
334
335         ring = &adev->vce.ring[0];
336
337         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
338         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
340         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
341         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
342
343         ring = &adev->vce.ring[1];
344
345         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
346         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
347         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
348         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
349         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
350
351         ring = &adev->vce.ring[2];
352
353         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
354         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
355         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
356         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
357         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
358
359         vce_v4_0_mc_resume(adev);
360         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
361                         ~VCE_STATUS__JOB_BUSY_MASK);
362
363         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
364
365         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
366                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
367         mdelay(100);
368
369         r = vce_v4_0_firmware_loaded(adev);
370
371         /* clear BUSY flag */
372         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
373
374         if (r) {
375                 DRM_ERROR("VCE not responding, giving up!!!\n");
376                 return r;
377         }
378
379         return 0;
380 }
381
382 static int vce_v4_0_stop(struct amdgpu_device *adev)
383 {
384
385         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
386
387         /* hold on ECPU */
388         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
389                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
390                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
391
392         /* clear BUSY flag */
393         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
394
395         /* Set Clock-Gating off */
396         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
397                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
398         */
399
400         return 0;
401 }
402
403 static int vce_v4_0_early_init(void *handle)
404 {
405         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
406
407         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
408                 adev->vce.num_rings = 1;
409         else
410                 adev->vce.num_rings = 3;
411
412         vce_v4_0_set_ring_funcs(adev);
413         vce_v4_0_set_irq_funcs(adev);
414
415         return 0;
416 }
417
418 static int vce_v4_0_sw_init(void *handle)
419 {
420         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
421         struct amdgpu_ring *ring;
422
423         unsigned size;
424         int r, i;
425
426         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
427         if (r)
428                 return r;
429
430         size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
431         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
432                 size += VCE_V4_0_FW_SIZE;
433
434         r = amdgpu_vce_sw_init(adev, size);
435         if (r)
436                 return r;
437
438         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
439                 const struct common_firmware_header *hdr;
440                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
441
442                 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
443                 if (!adev->vce.saved_bo)
444                         return -ENOMEM;
445
446                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
447                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
448                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
449                 adev->firmware.fw_size +=
450                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
451                 DRM_INFO("PSP loading VCE firmware\n");
452         } else {
453                 r = amdgpu_vce_resume(adev);
454                 if (r)
455                         return r;
456         }
457
458         for (i = 0; i < adev->vce.num_rings; i++) {
459                 ring = &adev->vce.ring[i];
460                 sprintf(ring->name, "vce%d", i);
461                 if (amdgpu_sriov_vf(adev)) {
462                         /* DOORBELL only works under SRIOV */
463                         ring->use_doorbell = true;
464
465                         /* currently only use the first encoding ring for sriov,
466                          * so set unused location for other unused rings.
467                          */
468                         if (i == 0)
469                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
470                         else
471                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
472                 }
473                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
474                 if (r)
475                         return r;
476         }
477
478
479         r = amdgpu_vce_entity_init(adev);
480         if (r)
481                 return r;
482
483         r = amdgpu_virt_alloc_mm_table(adev);
484         if (r)
485                 return r;
486
487         return r;
488 }
489
490 static int vce_v4_0_sw_fini(void *handle)
491 {
492         int r;
493         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494
495         /* free MM table */
496         amdgpu_virt_free_mm_table(adev);
497
498         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
499                 kvfree(adev->vce.saved_bo);
500                 adev->vce.saved_bo = NULL;
501         }
502
503         r = amdgpu_vce_suspend(adev);
504         if (r)
505                 return r;
506
507         return amdgpu_vce_sw_fini(adev);
508 }
509
510 static int vce_v4_0_hw_init(void *handle)
511 {
512         int r, i;
513         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
514
515         if (amdgpu_sriov_vf(adev))
516                 r = vce_v4_0_sriov_start(adev);
517         else
518                 r = vce_v4_0_start(adev);
519         if (r)
520                 return r;
521
522         for (i = 0; i < adev->vce.num_rings; i++)
523                 adev->vce.ring[i].ready = false;
524
525         for (i = 0; i < adev->vce.num_rings; i++) {
526                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
527                 if (r)
528                         return r;
529                 else
530                         adev->vce.ring[i].ready = true;
531         }
532
533         DRM_INFO("VCE initialized successfully.\n");
534
535         return 0;
536 }
537
538 static int vce_v4_0_hw_fini(void *handle)
539 {
540         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
541         int i;
542
543         if (!amdgpu_sriov_vf(adev)) {
544                 /* vce_v4_0_wait_for_idle(handle); */
545                 vce_v4_0_stop(adev);
546         } else {
547                 /* full access mode, so don't touch any VCE register */
548                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
549         }
550
551         for (i = 0; i < adev->vce.num_rings; i++)
552                 adev->vce.ring[i].ready = false;
553
554         return 0;
555 }
556
557 static int vce_v4_0_suspend(void *handle)
558 {
559         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
560         int r;
561
562         if (adev->vce.vcpu_bo == NULL)
563                 return 0;
564
565         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567                 void *ptr = adev->vce.cpu_addr;
568
569                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
570         }
571
572         r = vce_v4_0_hw_fini(adev);
573         if (r)
574                 return r;
575
576         return amdgpu_vce_suspend(adev);
577 }
578
579 static int vce_v4_0_resume(void *handle)
580 {
581         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
582         int r;
583
584         if (adev->vce.vcpu_bo == NULL)
585                 return -EINVAL;
586
587         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
588                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
589                 void *ptr = adev->vce.cpu_addr;
590
591                 memcpy_toio(ptr, adev->vce.saved_bo, size);
592         } else {
593                 r = amdgpu_vce_resume(adev);
594                 if (r)
595                         return r;
596         }
597
598         return vce_v4_0_hw_init(adev);
599 }
600
601 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
602 {
603         uint32_t offset, size;
604
605         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
606         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
607         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
608         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
609
610         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
611         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
612         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
613         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
614         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
615
616         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
618                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
619                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
620                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
621         } else {
622                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
623                         (adev->vce.gpu_addr >> 8));
624                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
625                         (adev->vce.gpu_addr >> 40) & 0xff);
626         }
627
628         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
629         size = VCE_V4_0_FW_SIZE;
630         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
631         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
632
633         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
634         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
635         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
636         size = VCE_V4_0_STACK_SIZE;
637         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
638         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
639
640         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
641         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
642         offset += size;
643         size = VCE_V4_0_DATA_SIZE;
644         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
645         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
646
647         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
648         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
649                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
650                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
651 }
652
653 static int vce_v4_0_set_clockgating_state(void *handle,
654                                           enum amd_clockgating_state state)
655 {
656         /* needed for driver unload*/
657         return 0;
658 }
659
660 #if 0
661 static bool vce_v4_0_is_idle(void *handle)
662 {
663         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
664         u32 mask = 0;
665
666         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
667         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
668
669         return !(RREG32(mmSRBM_STATUS2) & mask);
670 }
671
672 static int vce_v4_0_wait_for_idle(void *handle)
673 {
674         unsigned i;
675         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
676
677         for (i = 0; i < adev->usec_timeout; i++)
678                 if (vce_v4_0_is_idle(handle))
679                         return 0;
680
681         return -ETIMEDOUT;
682 }
683
684 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
685 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
686 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
687 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
688                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
689
690 static bool vce_v4_0_check_soft_reset(void *handle)
691 {
692         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
693         u32 srbm_soft_reset = 0;
694
695         /* According to VCE team , we should use VCE_STATUS instead
696          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
697          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
698          * instance's registers are accessed
699          * (0 for 1st instance, 10 for 2nd instance).
700          *
701          *VCE_STATUS
702          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
703          *|----+----+-----------+----+----+----+----------+---------+----|
704          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
705          *
706          * VCE team suggest use bit 3--bit 6 for busy status check
707          */
708         mutex_lock(&adev->grbm_idx_mutex);
709         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
710         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
711                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
712                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
713         }
714         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
715         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
716                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
717                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
718         }
719         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
720         mutex_unlock(&adev->grbm_idx_mutex);
721
722         if (srbm_soft_reset) {
723                 adev->vce.srbm_soft_reset = srbm_soft_reset;
724                 return true;
725         } else {
726                 adev->vce.srbm_soft_reset = 0;
727                 return false;
728         }
729 }
730
731 static int vce_v4_0_soft_reset(void *handle)
732 {
733         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
734         u32 srbm_soft_reset;
735
736         if (!adev->vce.srbm_soft_reset)
737                 return 0;
738         srbm_soft_reset = adev->vce.srbm_soft_reset;
739
740         if (srbm_soft_reset) {
741                 u32 tmp;
742
743                 tmp = RREG32(mmSRBM_SOFT_RESET);
744                 tmp |= srbm_soft_reset;
745                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
746                 WREG32(mmSRBM_SOFT_RESET, tmp);
747                 tmp = RREG32(mmSRBM_SOFT_RESET);
748
749                 udelay(50);
750
751                 tmp &= ~srbm_soft_reset;
752                 WREG32(mmSRBM_SOFT_RESET, tmp);
753                 tmp = RREG32(mmSRBM_SOFT_RESET);
754
755                 /* Wait a little for things to settle down */
756                 udelay(50);
757         }
758
759         return 0;
760 }
761
762 static int vce_v4_0_pre_soft_reset(void *handle)
763 {
764         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
765
766         if (!adev->vce.srbm_soft_reset)
767                 return 0;
768
769         mdelay(5);
770
771         return vce_v4_0_suspend(adev);
772 }
773
774
775 static int vce_v4_0_post_soft_reset(void *handle)
776 {
777         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
778
779         if (!adev->vce.srbm_soft_reset)
780                 return 0;
781
782         mdelay(5);
783
784         return vce_v4_0_resume(adev);
785 }
786
787 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
788 {
789         u32 tmp, data;
790
791         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
792         if (override)
793                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
794         else
795                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
796
797         if (tmp != data)
798                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
799 }
800
801 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
802                                              bool gated)
803 {
804         u32 data;
805
806         /* Set Override to disable Clock Gating */
807         vce_v4_0_override_vce_clock_gating(adev, true);
808
809         /* This function enables MGCG which is controlled by firmware.
810            With the clocks in the gated state the core is still
811            accessible but the firmware will throttle the clocks on the
812            fly as necessary.
813         */
814         if (gated) {
815                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
816                 data |= 0x1ff;
817                 data &= ~0xef0000;
818                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
819
820                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
821                 data |= 0x3ff000;
822                 data &= ~0xffc00000;
823                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
824
825                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
826                 data |= 0x2;
827                 data &= ~0x00010000;
828                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
829
830                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
831                 data |= 0x37f;
832                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
833
834                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
835                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
836                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
837                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
838                         0x8;
839                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
840         } else {
841                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
842                 data &= ~0x80010;
843                 data |= 0xe70008;
844                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
845
846                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
847                 data |= 0xffc00000;
848                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
849
850                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
851                 data |= 0x10000;
852                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
853
854                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
855                 data &= ~0xffc00000;
856                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
857
858                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
859                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
860                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
861                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
862                           0x8);
863                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
864         }
865         vce_v4_0_override_vce_clock_gating(adev, false);
866 }
867
868 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
869 {
870         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
871
872         if (enable)
873                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
874         else
875                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
876
877         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
878 }
879
880 static int vce_v4_0_set_clockgating_state(void *handle,
881                                           enum amd_clockgating_state state)
882 {
883         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
884         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
885         int i;
886
887         if ((adev->asic_type == CHIP_POLARIS10) ||
888                 (adev->asic_type == CHIP_TONGA) ||
889                 (adev->asic_type == CHIP_FIJI))
890                 vce_v4_0_set_bypass_mode(adev, enable);
891
892         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
893                 return 0;
894
895         mutex_lock(&adev->grbm_idx_mutex);
896         for (i = 0; i < 2; i++) {
897                 /* Program VCE Instance 0 or 1 if not harvested */
898                 if (adev->vce.harvest_config & (1 << i))
899                         continue;
900
901                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
902
903                 if (enable) {
904                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
905                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
906                         data &= ~(0xf | 0xff0);
907                         data |= ((0x0 << 0) | (0x04 << 4));
908                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
909
910                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
911                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
912                         data &= ~(0xf | 0xff0);
913                         data |= ((0x0 << 0) | (0x04 << 4));
914                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
915                 }
916
917                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
918         }
919
920         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
921         mutex_unlock(&adev->grbm_idx_mutex);
922
923         return 0;
924 }
925
926 static int vce_v4_0_set_powergating_state(void *handle,
927                                           enum amd_powergating_state state)
928 {
929         /* This doesn't actually powergate the VCE block.
930          * That's done in the dpm code via the SMC.  This
931          * just re-inits the block as necessary.  The actual
932          * gating still happens in the dpm code.  We should
933          * revisit this when there is a cleaner line between
934          * the smc and the hw blocks
935          */
936         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
937
938         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
939                 return 0;
940
941         if (state == AMD_PG_STATE_GATE)
942                 /* XXX do we need a vce_v4_0_stop()? */
943                 return 0;
944         else
945                 return vce_v4_0_start(adev);
946 }
947 #endif
948
949 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
950                 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
951 {
952         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
953         amdgpu_ring_write(ring, vmid);
954         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
955         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
956         amdgpu_ring_write(ring, ib->length_dw);
957 }
958
959 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
960                         u64 seq, unsigned flags)
961 {
962         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
963
964         amdgpu_ring_write(ring, VCE_CMD_FENCE);
965         amdgpu_ring_write(ring, addr);
966         amdgpu_ring_write(ring, upper_32_bits(addr));
967         amdgpu_ring_write(ring, seq);
968         amdgpu_ring_write(ring, VCE_CMD_TRAP);
969 }
970
971 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
972 {
973         amdgpu_ring_write(ring, VCE_CMD_END);
974 }
975
976 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
977                                    uint32_t val, uint32_t mask)
978 {
979         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
980         amdgpu_ring_write(ring, reg << 2);
981         amdgpu_ring_write(ring, mask);
982         amdgpu_ring_write(ring, val);
983 }
984
985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
986                                    unsigned int vmid, uint64_t pd_addr)
987 {
988         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
989
990         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
991
992         /* wait for reg writes */
993         vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
994                                lower_32_bits(pd_addr), 0xffffffff);
995 }
996
997 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
998                                uint32_t reg, uint32_t val)
999 {
1000         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1001         amdgpu_ring_write(ring, reg << 2);
1002         amdgpu_ring_write(ring, val);
1003 }
1004
1005 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1006                                         struct amdgpu_irq_src *source,
1007                                         unsigned type,
1008                                         enum amdgpu_interrupt_state state)
1009 {
1010         uint32_t val = 0;
1011
1012         if (!amdgpu_sriov_vf(adev)) {
1013                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1014                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1015
1016                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1017                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1018         }
1019         return 0;
1020 }
1021
1022 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1023                                       struct amdgpu_irq_src *source,
1024                                       struct amdgpu_iv_entry *entry)
1025 {
1026         DRM_DEBUG("IH: VCE\n");
1027
1028         switch (entry->src_data[0]) {
1029         case 0:
1030         case 1:
1031         case 2:
1032                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1033                 break;
1034         default:
1035                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1036                           entry->src_id, entry->src_data[0]);
1037                 break;
1038         }
1039
1040         return 0;
1041 }
1042
1043 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1044         .name = "vce_v4_0",
1045         .early_init = vce_v4_0_early_init,
1046         .late_init = NULL,
1047         .sw_init = vce_v4_0_sw_init,
1048         .sw_fini = vce_v4_0_sw_fini,
1049         .hw_init = vce_v4_0_hw_init,
1050         .hw_fini = vce_v4_0_hw_fini,
1051         .suspend = vce_v4_0_suspend,
1052         .resume = vce_v4_0_resume,
1053         .is_idle = NULL /* vce_v4_0_is_idle */,
1054         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1055         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1056         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1057         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1058         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1059         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1060         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1061 };
1062
1063 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1064         .type = AMDGPU_RING_TYPE_VCE,
1065         .align_mask = 0x3f,
1066         .nop = VCE_CMD_NO_OP,
1067         .support_64bit_ptrs = false,
1068         .vmhub = AMDGPU_MMHUB,
1069         .get_rptr = vce_v4_0_ring_get_rptr,
1070         .get_wptr = vce_v4_0_ring_get_wptr,
1071         .set_wptr = vce_v4_0_ring_set_wptr,
1072         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1073         .emit_frame_size =
1074                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1075                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1076                 4 + /* vce_v4_0_emit_vm_flush */
1077                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1078                 1, /* vce_v4_0_ring_insert_end */
1079         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1080         .emit_ib = vce_v4_0_ring_emit_ib,
1081         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1082         .emit_fence = vce_v4_0_ring_emit_fence,
1083         .test_ring = amdgpu_vce_ring_test_ring,
1084         .test_ib = amdgpu_vce_ring_test_ib,
1085         .insert_nop = amdgpu_ring_insert_nop,
1086         .insert_end = vce_v4_0_ring_insert_end,
1087         .pad_ib = amdgpu_ring_generic_pad_ib,
1088         .begin_use = amdgpu_vce_ring_begin_use,
1089         .end_use = amdgpu_vce_ring_end_use,
1090         .emit_wreg = vce_v4_0_emit_wreg,
1091         .emit_reg_wait = vce_v4_0_emit_reg_wait,
1092         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1093 };
1094
1095 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1096 {
1097         int i;
1098
1099         for (i = 0; i < adev->vce.num_rings; i++) {
1100                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1101                 adev->vce.ring[i].me = i;
1102         }
1103         DRM_INFO("VCE enabled in VM mode\n");
1104 }
1105
1106 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1107         .set = vce_v4_0_set_interrupt_state,
1108         .process = vce_v4_0_process_interrupt,
1109 };
1110
1111 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1112 {
1113         adev->vce.irq.num_types = 1;
1114         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1115 };
1116
1117 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1118 {
1119         .type = AMD_IP_BLOCK_TYPE_VCE,
1120         .major = 4,
1121         .minor = 0,
1122         .rev = 0,
1123         .funcs = &vce_v4_0_ip_funcs,
1124 };