GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / gpu / drm / amd / amdkfd / kfd_dbgdev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
31
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
35 #include "kfd_priv.h"
36 #include "kfd_pm4_opcodes.h"
37 #include "cik_regs.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 #include "../../radeon/cik_reg.h"
42
43 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44 {
45         dev->kfd2kgd->address_watch_disable(dev->kgd);
46 }
47
48 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
49                                 unsigned int pasid, uint64_t vmid0_address,
50                                 uint32_t *packet_buff, size_t size_in_bytes)
51 {
52         struct pm4__release_mem *rm_packet;
53         struct pm4__indirect_buffer_pasid *ib_packet;
54         struct kfd_mem_obj *mem_obj;
55         size_t pq_packets_size_in_bytes;
56         union ULARGE_INTEGER *largep;
57         union ULARGE_INTEGER addr;
58         struct kernel_queue *kq;
59         uint64_t *rm_state;
60         unsigned int *ib_packet_buff;
61         int status;
62
63         if (WARN_ON(!size_in_bytes))
64                 return -EINVAL;
65
66         kq = dbgdev->kq;
67
68         pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
69                                 sizeof(struct pm4__indirect_buffer_pasid);
70
71         /*
72          * We acquire a buffer from DIQ
73          * The receive packet buff will be sitting on the Indirect Buffer
74          * and in the PQ we put the IB packet + sync packet(s).
75          */
76         status = kq->ops.acquire_packet_buffer(kq,
77                                 pq_packets_size_in_bytes / sizeof(uint32_t),
78                                 &ib_packet_buff);
79         if (status) {
80                 pr_err("acquire_packet_buffer failed\n");
81                 return status;
82         }
83
84         memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
85
86         ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
87
88         ib_packet->header.count = 3;
89         ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
90         ib_packet->header.type = PM4_TYPE_3;
91
92         largep = (union ULARGE_INTEGER *) &vmid0_address;
93
94         ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
95         ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
96
97         ib_packet->control = (1 << 23) | (1 << 31) |
98                         ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
99
100         ib_packet->bitfields5.pasid = pasid;
101
102         /*
103          * for now we use release mem for GPU-CPU synchronization
104          * Consider WaitRegMem + WriteData as a better alternative
105          * we get a GART allocations ( gpu/cpu mapping),
106          * for the sync variable, and wait until:
107          * (a) Sync with HW
108          * (b) Sync var is written by CP to mem.
109          */
110         rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111                         (sizeof(struct pm4__indirect_buffer_pasid) /
112                                         sizeof(unsigned int)));
113
114         status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
115                                         &mem_obj);
116
117         if (status) {
118                 pr_err("Failed to allocate GART memory\n");
119                 kq->ops.rollback_packet(kq);
120                 return status;
121         }
122
123         rm_state = (uint64_t *) mem_obj->cpu_ptr;
124
125         *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
126
127         rm_packet->header.opcode = IT_RELEASE_MEM;
128         rm_packet->header.type = PM4_TYPE_3;
129         rm_packet->header.count = sizeof(struct pm4__release_mem) /
130                                         sizeof(unsigned int) - 2;
131
132         rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
133         rm_packet->bitfields2.event_index =
134                                 event_index___release_mem__end_of_pipe;
135
136         rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
137         rm_packet->bitfields2.atc = 0;
138         rm_packet->bitfields2.tc_wb_action_ena = 1;
139
140         addr.quad_part = mem_obj->gpu_addr;
141
142         rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
143         rm_packet->address_hi = addr.u.high_part;
144
145         rm_packet->bitfields3.data_sel =
146                                 data_sel___release_mem__send_64_bit_data;
147
148         rm_packet->bitfields3.int_sel =
149                         int_sel___release_mem__send_data_after_write_confirm;
150
151         rm_packet->bitfields3.dst_sel =
152                         dst_sel___release_mem__memory_controller;
153
154         rm_packet->data_lo = QUEUESTATE__ACTIVE;
155
156         kq->ops.submit_packet(kq);
157
158         /* Wait till CP writes sync code: */
159         status = amdkfd_fence_wait_timeout(
160                         (unsigned int *) rm_state,
161                         QUEUESTATE__ACTIVE, 1500);
162
163         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
164
165         return status;
166 }
167
168 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
169 {
170         /*
171          * no action is needed in this case,
172          * just make sure diq will not be used
173          */
174
175         dbgdev->kq = NULL;
176
177         return 0;
178 }
179
180 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
181 {
182         struct queue_properties properties;
183         unsigned int qid;
184         struct kernel_queue *kq = NULL;
185         int status;
186
187         status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188                                 &properties, 0, KFD_QUEUE_TYPE_DIQ,
189                                 &qid);
190
191         if (status) {
192                 pr_err("Failed to create DIQ\n");
193                 return status;
194         }
195
196         pr_debug("DIQ Created with queue id: %d\n", qid);
197
198         kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
199
200         if (!kq) {
201                 pr_err("Error getting DIQ\n");
202                 pqm_destroy_queue(dbgdev->pqm, qid);
203                 return -EFAULT;
204         }
205
206         dbgdev->kq = kq;
207
208         return status;
209 }
210
211 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
212 {
213         /* disable watch address */
214         dbgdev_address_watch_disable_nodiq(dbgdev->dev);
215         return 0;
216 }
217
218 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
219 {
220         /* todo - disable address watch */
221         int status;
222
223         status = pqm_destroy_queue(dbgdev->pqm,
224                         dbgdev->kq->queue->properties.queue_id);
225         dbgdev->kq = NULL;
226
227         return status;
228 }
229
230 static void dbgdev_address_watch_set_registers(
231                         const struct dbg_address_watch_info *adw_info,
232                         union TCP_WATCH_ADDR_H_BITS *addrHi,
233                         union TCP_WATCH_ADDR_L_BITS *addrLo,
234                         union TCP_WATCH_CNTL_BITS *cntl,
235                         unsigned int index, unsigned int vmid)
236 {
237         union ULARGE_INTEGER addr;
238
239         addr.quad_part = 0;
240         addrHi->u32All = 0;
241         addrLo->u32All = 0;
242         cntl->u32All = 0;
243
244         if (adw_info->watch_mask)
245                 cntl->bitfields.mask =
246                         (uint32_t) (adw_info->watch_mask[index] &
247                                         ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
248         else
249                 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
250
251         addr.quad_part = (unsigned long long) adw_info->watch_address[index];
252
253         addrHi->bitfields.addr = addr.u.high_part &
254                                         ADDRESS_WATCH_REG_ADDHIGH_MASK;
255         addrLo->bitfields.addr =
256                         (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
257
258         cntl->bitfields.mode = adw_info->watch_mode[index];
259         cntl->bitfields.vmid = (uint32_t) vmid;
260         /* for now assume it is an ATC address */
261         cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
262
263         pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
264         pr_debug("\t\t%20s %08x\n", "set reg add high :",
265                         addrHi->bitfields.addr);
266         pr_debug("\t\t%20s %08x\n", "set reg add low :",
267                         addrLo->bitfields.addr);
268 }
269
270 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
271                                       struct dbg_address_watch_info *adw_info)
272 {
273         union TCP_WATCH_ADDR_H_BITS addrHi;
274         union TCP_WATCH_ADDR_L_BITS addrLo;
275         union TCP_WATCH_CNTL_BITS cntl;
276         struct kfd_process_device *pdd;
277         unsigned int i;
278
279         /* taking the vmid for that process on the safe way using pdd */
280         pdd = kfd_get_process_device_data(dbgdev->dev,
281                                         adw_info->process);
282         if (!pdd) {
283                 pr_err("Failed to get pdd for wave control no DIQ\n");
284                 return -EFAULT;
285         }
286
287         addrHi.u32All = 0;
288         addrLo.u32All = 0;
289         cntl.u32All = 0;
290
291         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
292                         (adw_info->num_watch_points == 0)) {
293                 pr_err("num_watch_points is invalid\n");
294                 return -EINVAL;
295         }
296
297         if (!adw_info->watch_mode || !adw_info->watch_address) {
298                 pr_err("adw_info fields are not valid\n");
299                 return -EINVAL;
300         }
301
302         for (i = 0; i < adw_info->num_watch_points; i++) {
303                 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
304                                                 &cntl, i, pdd->qpd.vmid);
305
306                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
307                 pr_debug("\t\t%20s %08x\n", "register index :", i);
308                 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
309                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
310                                 addrLo.bitfields.addr);
311                 pr_debug("\t\t%20s %08x\n", "Address high is :",
312                                 addrHi.bitfields.addr);
313                 pr_debug("\t\t%20s %08x\n", "Address high is :",
314                                 addrHi.bitfields.addr);
315                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
316                                 cntl.bitfields.mask);
317                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
318                                 cntl.bitfields.mode);
319                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
320                                 cntl.bitfields.vmid);
321                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
322                                 cntl.bitfields.atc);
323                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
324
325                 pdd->dev->kfd2kgd->address_watch_execute(
326                                                 dbgdev->dev->kgd,
327                                                 i,
328                                                 cntl.u32All,
329                                                 addrHi.u32All,
330                                                 addrLo.u32All);
331         }
332
333         return 0;
334 }
335
336 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
337                                     struct dbg_address_watch_info *adw_info)
338 {
339         struct pm4__set_config_reg *packets_vec;
340         union TCP_WATCH_ADDR_H_BITS addrHi;
341         union TCP_WATCH_ADDR_L_BITS addrLo;
342         union TCP_WATCH_CNTL_BITS cntl;
343         struct kfd_mem_obj *mem_obj;
344         unsigned int aw_reg_add_dword;
345         uint32_t *packet_buff_uint;
346         unsigned int i;
347         int status;
348         size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
349         /* we do not control the vmid in DIQ mode, just a place holder */
350         unsigned int vmid = 0;
351
352         addrHi.u32All = 0;
353         addrLo.u32All = 0;
354         cntl.u32All = 0;
355
356         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
357                         (adw_info->num_watch_points == 0)) {
358                 pr_err("num_watch_points is invalid\n");
359                 return -EINVAL;
360         }
361
362         if (!adw_info->watch_mode || !adw_info->watch_address) {
363                 pr_err("adw_info fields are not valid\n");
364                 return -EINVAL;
365         }
366
367         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
368
369         if (status) {
370                 pr_err("Failed to allocate GART memory\n");
371                 return status;
372         }
373
374         packet_buff_uint = mem_obj->cpu_ptr;
375
376         memset(packet_buff_uint, 0, ib_size);
377
378         packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
379
380         packets_vec[0].header.count = 1;
381         packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
382         packets_vec[0].header.type = PM4_TYPE_3;
383         packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
384         packets_vec[0].bitfields2.insert_vmid = 1;
385         packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
386         packets_vec[1].bitfields2.insert_vmid = 0;
387         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
388         packets_vec[2].bitfields2.insert_vmid = 0;
389         packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
390         packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
391         packets_vec[3].bitfields2.insert_vmid = 1;
392
393         for (i = 0; i < adw_info->num_watch_points; i++) {
394                 dbgdev_address_watch_set_registers(adw_info,
395                                                 &addrHi,
396                                                 &addrLo,
397                                                 &cntl,
398                                                 i,
399                                                 vmid);
400
401                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
402                 pr_debug("\t\t%20s %08x\n", "register index :", i);
403                 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
404                 pr_debug("\t\t%20s %p\n", "Add ptr is :",
405                                 adw_info->watch_address);
406                 pr_debug("\t\t%20s %08llx\n", "Add     is :",
407                                 adw_info->watch_address[i]);
408                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
409                                 addrLo.bitfields.addr);
410                 pr_debug("\t\t%20s %08x\n", "Address high is :",
411                                 addrHi.bitfields.addr);
412                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
413                                 cntl.bitfields.mask);
414                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
415                                 cntl.bitfields.mode);
416                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
417                                 cntl.bitfields.vmid);
418                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
419                                 cntl.bitfields.atc);
420                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
421
422                 aw_reg_add_dword =
423                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
424                                         dbgdev->dev->kgd,
425                                         i,
426                                         ADDRESS_WATCH_REG_CNTL);
427
428                 packets_vec[0].bitfields2.reg_offset =
429                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
430
431                 packets_vec[0].reg_data[0] = cntl.u32All;
432
433                 aw_reg_add_dword =
434                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
435                                         dbgdev->dev->kgd,
436                                         i,
437                                         ADDRESS_WATCH_REG_ADDR_HI);
438
439                 packets_vec[1].bitfields2.reg_offset =
440                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
441                 packets_vec[1].reg_data[0] = addrHi.u32All;
442
443                 aw_reg_add_dword =
444                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
445                                         dbgdev->dev->kgd,
446                                         i,
447                                         ADDRESS_WATCH_REG_ADDR_LO);
448
449                 packets_vec[2].bitfields2.reg_offset =
450                                 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
451                 packets_vec[2].reg_data[0] = addrLo.u32All;
452
453                 /* enable watch flag if address is not zero*/
454                 if (adw_info->watch_address[i] > 0)
455                         cntl.bitfields.valid = 1;
456                 else
457                         cntl.bitfields.valid = 0;
458
459                 aw_reg_add_dword =
460                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
461                                         dbgdev->dev->kgd,
462                                         i,
463                                         ADDRESS_WATCH_REG_CNTL);
464
465                 packets_vec[3].bitfields2.reg_offset =
466                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
467                 packets_vec[3].reg_data[0] = cntl.u32All;
468
469                 status = dbgdev_diq_submit_ib(
470                                         dbgdev,
471                                         adw_info->process->pasid,
472                                         mem_obj->gpu_addr,
473                                         packet_buff_uint,
474                                         ib_size);
475
476                 if (status) {
477                         pr_err("Failed to submit IB to DIQ\n");
478                         break;
479                 }
480         }
481
482         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
483         return status;
484 }
485
486 static int dbgdev_wave_control_set_registers(
487                                 struct dbg_wave_control_info *wac_info,
488                                 union SQ_CMD_BITS *in_reg_sq_cmd,
489                                 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
490 {
491         int status = 0;
492         union SQ_CMD_BITS reg_sq_cmd;
493         union GRBM_GFX_INDEX_BITS reg_gfx_index;
494         struct HsaDbgWaveMsgAMDGen2 *pMsg;
495
496         reg_sq_cmd.u32All = 0;
497         reg_gfx_index.u32All = 0;
498         pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
499
500         switch (wac_info->mode) {
501         /* Send command to single wave */
502         case HSA_DBG_WAVEMODE_SINGLE:
503                 /*
504                  * Limit access to the process waves only,
505                  * by setting vmid check
506                  */
507                 reg_sq_cmd.bits.check_vmid = 1;
508                 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
509                 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
510                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
511
512                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
513                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
514                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
515
516                 break;
517
518         /* Send command to all waves with matching VMID */
519         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
520
521                 reg_gfx_index.bits.sh_broadcast_writes = 1;
522                 reg_gfx_index.bits.se_broadcast_writes = 1;
523                 reg_gfx_index.bits.instance_broadcast_writes = 1;
524
525                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
526
527                 break;
528
529         /* Send command to all CU waves with matching VMID */
530         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
531
532                 reg_sq_cmd.bits.check_vmid = 1;
533                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
534
535                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
536                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
537                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
538
539                 break;
540
541         default:
542                 return -EINVAL;
543         }
544
545         switch (wac_info->operand) {
546         case HSA_DBG_WAVEOP_HALT:
547                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
548                 break;
549
550         case HSA_DBG_WAVEOP_RESUME:
551                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
552                 break;
553
554         case HSA_DBG_WAVEOP_KILL:
555                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
556                 break;
557
558         case HSA_DBG_WAVEOP_DEBUG:
559                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
560                 break;
561
562         case HSA_DBG_WAVEOP_TRAP:
563                 if (wac_info->trapId < MAX_TRAPID) {
564                         reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
565                         reg_sq_cmd.bits.trap_id = wac_info->trapId;
566                 } else {
567                         status = -EINVAL;
568                 }
569                 break;
570
571         default:
572                 status = -EINVAL;
573                 break;
574         }
575
576         if (status == 0) {
577                 *in_reg_sq_cmd = reg_sq_cmd;
578                 *in_reg_gfx_index = reg_gfx_index;
579         }
580
581         return status;
582 }
583
584 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
585                                         struct dbg_wave_control_info *wac_info)
586 {
587
588         int status;
589         union SQ_CMD_BITS reg_sq_cmd;
590         union GRBM_GFX_INDEX_BITS reg_gfx_index;
591         struct kfd_mem_obj *mem_obj;
592         uint32_t *packet_buff_uint;
593         struct pm4__set_config_reg *packets_vec;
594         size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
595
596         reg_sq_cmd.u32All = 0;
597
598         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
599                                                         &reg_gfx_index);
600         if (status) {
601                 pr_err("Failed to set wave control registers\n");
602                 return status;
603         }
604
605         /* we do not control the VMID in DIQ, so reset it to a known value */
606         reg_sq_cmd.bits.vm_id = 0;
607
608         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
609
610         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
611         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
612         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
613         pr_debug("\t\t msg value is: %u\n",
614                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
615         pr_debug("\t\t vmid      is: N/A\n");
616
617         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
618         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
619         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
620         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
621         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
622         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
623         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
624
625         pr_debug("\t\t ibw       is : %u\n",
626                         reg_gfx_index.bitfields.instance_broadcast_writes);
627         pr_debug("\t\t ii        is : %u\n",
628                         reg_gfx_index.bitfields.instance_index);
629         pr_debug("\t\t sebw      is : %u\n",
630                         reg_gfx_index.bitfields.se_broadcast_writes);
631         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
632         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
633         pr_debug("\t\t sbw       is : %u\n",
634                         reg_gfx_index.bitfields.sh_broadcast_writes);
635
636         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
637
638         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
639
640         if (status != 0) {
641                 pr_err("Failed to allocate GART memory\n");
642                 return status;
643         }
644
645         packet_buff_uint = mem_obj->cpu_ptr;
646
647         memset(packet_buff_uint, 0, ib_size);
648
649         packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
650         packets_vec[0].header.count = 1;
651         packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
652         packets_vec[0].header.type = PM4_TYPE_3;
653         packets_vec[0].bitfields2.reg_offset =
654                         GRBM_GFX_INDEX / (sizeof(uint32_t)) -
655                                 USERCONFIG_REG_BASE;
656
657         packets_vec[0].bitfields2.insert_vmid = 0;
658         packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
659
660         packets_vec[1].header.count = 1;
661         packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
662         packets_vec[1].header.type = PM4_TYPE_3;
663         packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
664                                                 AMD_CONFIG_REG_BASE;
665
666         packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
667         packets_vec[1].bitfields2.insert_vmid = 1;
668         packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
669
670         /* Restore the GRBM_GFX_INDEX register */
671
672         reg_gfx_index.u32All = 0;
673         reg_gfx_index.bits.sh_broadcast_writes = 1;
674         reg_gfx_index.bits.instance_broadcast_writes = 1;
675         reg_gfx_index.bits.se_broadcast_writes = 1;
676
677
678         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
679         packets_vec[2].bitfields2.reg_offset =
680                                 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
681                                         USERCONFIG_REG_BASE;
682
683         packets_vec[2].bitfields2.insert_vmid = 0;
684         packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
685
686         status = dbgdev_diq_submit_ib(
687                         dbgdev,
688                         wac_info->process->pasid,
689                         mem_obj->gpu_addr,
690                         packet_buff_uint,
691                         ib_size);
692
693         if (status)
694                 pr_err("Failed to submit IB to DIQ\n");
695
696         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
697
698         return status;
699 }
700
701 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
702                                         struct dbg_wave_control_info *wac_info)
703 {
704         int status;
705         union SQ_CMD_BITS reg_sq_cmd;
706         union GRBM_GFX_INDEX_BITS reg_gfx_index;
707         struct kfd_process_device *pdd;
708
709         reg_sq_cmd.u32All = 0;
710
711         /* taking the VMID for that process on the safe way using PDD */
712         pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
713
714         if (!pdd) {
715                 pr_err("Failed to get pdd for wave control no DIQ\n");
716                 return -EFAULT;
717         }
718         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
719                                                         &reg_gfx_index);
720         if (status) {
721                 pr_err("Failed to set wave control registers\n");
722                 return status;
723         }
724
725         /* for non DIQ we need to patch the VMID: */
726
727         reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
728
729         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
730
731         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
732         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
733         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
734         pr_debug("\t\t msg value is: %u\n",
735                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
736         pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
737
738         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
739         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
740         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
741         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
742         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
743         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
744         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
745
746         pr_debug("\t\t ibw       is : %u\n",
747                         reg_gfx_index.bitfields.instance_broadcast_writes);
748         pr_debug("\t\t ii        is : %u\n",
749                         reg_gfx_index.bitfields.instance_index);
750         pr_debug("\t\t sebw      is : %u\n",
751                         reg_gfx_index.bitfields.se_broadcast_writes);
752         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
753         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
754         pr_debug("\t\t sbw       is : %u\n",
755                         reg_gfx_index.bitfields.sh_broadcast_writes);
756
757         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
758
759         return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
760                                                         reg_gfx_index.u32All,
761                                                         reg_sq_cmd.u32All);
762 }
763
764 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
765 {
766         int status = 0;
767         unsigned int vmid;
768         union SQ_CMD_BITS reg_sq_cmd;
769         union GRBM_GFX_INDEX_BITS reg_gfx_index;
770         struct kfd_process_device *pdd;
771         struct dbg_wave_control_info wac_info;
772         int temp;
773         int first_vmid_to_scan = 8;
774         int last_vmid_to_scan = 15;
775
776         first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
777         temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
778         last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
779
780         reg_sq_cmd.u32All = 0;
781         status = 0;
782
783         wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
784         wac_info.operand = HSA_DBG_WAVEOP_KILL;
785
786         pr_debug("Killing all process wavefronts\n");
787
788         /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
789          * ATC_VMID15_PASID_MAPPING
790          * to check which VMID the current process is mapped to.
791          */
792
793         for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
794                 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
795                                 (dev->kgd, vmid)) {
796                         if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
797                                         (dev->kgd, vmid) == p->pasid) {
798                                 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
799                                                 vmid, p->pasid);
800                                 break;
801                         }
802                 }
803         }
804
805         if (vmid > last_vmid_to_scan) {
806                 pr_err("Didn't find vmid for pasid %d\n", p->pasid);
807                 return -EFAULT;
808         }
809
810         /* taking the VMID for that process on the safe way using PDD */
811         pdd = kfd_get_process_device_data(dev, p);
812         if (!pdd)
813                 return -EFAULT;
814
815         status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
816                         &reg_gfx_index);
817         if (status != 0)
818                 return -EINVAL;
819
820         /* for non DIQ we need to patch the VMID: */
821         reg_sq_cmd.bits.vm_id = vmid;
822
823         dev->kfd2kgd->wave_control_execute(dev->kgd,
824                                         reg_gfx_index.u32All,
825                                         reg_sq_cmd.u32All);
826
827         return 0;
828 }
829
830 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
831                         enum DBGDEV_TYPE type)
832 {
833         pdbgdev->dev = pdev;
834         pdbgdev->kq = NULL;
835         pdbgdev->type = type;
836         pdbgdev->pqm = NULL;
837
838         switch (type) {
839         case DBGDEV_TYPE_NODIQ:
840                 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
841                 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
842                 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
843                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
844                 break;
845         case DBGDEV_TYPE_DIQ:
846         default:
847                 pdbgdev->dbgdev_register = dbgdev_register_diq;
848                 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
849                 pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
850                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
851                 break;
852         }
853
854 }