GNU Linux-libre 4.14.290-gnu1
[releases.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
44
45 #include <linux/nospec.h>
46
47 #include "i915_drv.h"
48 #include "gvt.h"
49
50 static const struct intel_gvt_ops *intel_gvt_ops;
51
52 /* helper macros copied from vfio-pci */
53 #define VFIO_PCI_OFFSET_SHIFT   40
54 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
55 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
56 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
57
58 struct vfio_region {
59         u32                             type;
60         u32                             subtype;
61         size_t                          size;
62         u32                             flags;
63 };
64
65 struct kvmgt_pgfn {
66         gfn_t gfn;
67         struct hlist_node hnode;
68 };
69
70 struct kvmgt_guest_info {
71         struct kvm *kvm;
72         struct intel_vgpu *vgpu;
73         struct kvm_page_track_notifier_node track_node;
74 #define NR_BKT (1 << 18)
75         struct hlist_head ptable[NR_BKT];
76 #undef NR_BKT
77 };
78
79 struct gvt_dma {
80         struct rb_node node;
81         gfn_t gfn;
82         unsigned long iova;
83 };
84
85 static inline bool handle_valid(unsigned long handle)
86 {
87         return !!(handle & ~0xff);
88 }
89
90 static int kvmgt_guest_init(struct mdev_device *mdev);
91 static void intel_vgpu_release_work(struct work_struct *work);
92 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
93
94 static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
95                 unsigned long *iova)
96 {
97         struct page *page;
98         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
99         dma_addr_t daddr;
100
101         if (unlikely(!pfn_valid(pfn)))
102                 return -EFAULT;
103
104         page = pfn_to_page(pfn);
105         daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
106                         PCI_DMA_BIDIRECTIONAL);
107         if (dma_mapping_error(dev, daddr))
108                 return -ENOMEM;
109
110         *iova = (unsigned long)(daddr >> PAGE_SHIFT);
111         return 0;
112 }
113
114 static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
115 {
116         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
117         dma_addr_t daddr;
118
119         daddr = (dma_addr_t)(iova << PAGE_SHIFT);
120         dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
121 }
122
123 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
124 {
125         struct rb_node *node = vgpu->vdev.cache.rb_node;
126         struct gvt_dma *ret = NULL;
127
128         while (node) {
129                 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
130
131                 if (gfn < itr->gfn)
132                         node = node->rb_left;
133                 else if (gfn > itr->gfn)
134                         node = node->rb_right;
135                 else {
136                         ret = itr;
137                         goto out;
138                 }
139         }
140
141 out:
142         return ret;
143 }
144
145 static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
146 {
147         struct gvt_dma *entry;
148         unsigned long iova;
149
150         mutex_lock(&vgpu->vdev.cache_lock);
151
152         entry = __gvt_cache_find(vgpu, gfn);
153         iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
154
155         mutex_unlock(&vgpu->vdev.cache_lock);
156         return iova;
157 }
158
159 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
160                 unsigned long iova)
161 {
162         struct gvt_dma *new, *itr;
163         struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
164
165         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
166         if (!new)
167                 return;
168
169         new->gfn = gfn;
170         new->iova = iova;
171
172         mutex_lock(&vgpu->vdev.cache_lock);
173         while (*link) {
174                 parent = *link;
175                 itr = rb_entry(parent, struct gvt_dma, node);
176
177                 if (gfn == itr->gfn)
178                         goto out;
179                 else if (gfn < itr->gfn)
180                         link = &parent->rb_left;
181                 else
182                         link = &parent->rb_right;
183         }
184
185         rb_link_node(&new->node, parent, link);
186         rb_insert_color(&new->node, &vgpu->vdev.cache);
187         mutex_unlock(&vgpu->vdev.cache_lock);
188         return;
189
190 out:
191         mutex_unlock(&vgpu->vdev.cache_lock);
192         kfree(new);
193 }
194
195 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
196                                 struct gvt_dma *entry)
197 {
198         rb_erase(&entry->node, &vgpu->vdev.cache);
199         kfree(entry);
200 }
201
202 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
203 {
204         struct device *dev = mdev_dev(vgpu->vdev.mdev);
205         struct gvt_dma *this;
206         unsigned long g1;
207         int rc;
208
209         mutex_lock(&vgpu->vdev.cache_lock);
210         this  = __gvt_cache_find(vgpu, gfn);
211         if (!this) {
212                 mutex_unlock(&vgpu->vdev.cache_lock);
213                 return;
214         }
215
216         g1 = gfn;
217         gvt_dma_unmap_iova(vgpu, this->iova);
218         rc = vfio_unpin_pages(dev, &g1, 1);
219         WARN_ON(rc != 1);
220         __gvt_cache_remove_entry(vgpu, this);
221         mutex_unlock(&vgpu->vdev.cache_lock);
222 }
223
224 static void gvt_cache_init(struct intel_vgpu *vgpu)
225 {
226         vgpu->vdev.cache = RB_ROOT;
227         mutex_init(&vgpu->vdev.cache_lock);
228 }
229
230 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
231 {
232         struct gvt_dma *dma;
233         struct rb_node *node = NULL;
234         struct device *dev = mdev_dev(vgpu->vdev.mdev);
235         unsigned long gfn;
236
237         for (;;) {
238                 mutex_lock(&vgpu->vdev.cache_lock);
239                 node = rb_first(&vgpu->vdev.cache);
240                 if (!node) {
241                         mutex_unlock(&vgpu->vdev.cache_lock);
242                         break;
243                 }
244                 dma = rb_entry(node, struct gvt_dma, node);
245                 gvt_dma_unmap_iova(vgpu, dma->iova);
246                 gfn = dma->gfn;
247                 __gvt_cache_remove_entry(vgpu, dma);
248                 mutex_unlock(&vgpu->vdev.cache_lock);
249                 vfio_unpin_pages(dev, &gfn, 1);
250         }
251 }
252
253 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
254                 const char *name)
255 {
256         int i;
257         struct intel_vgpu_type *t;
258         const char *driver_name = dev_driver_string(
259                         &gvt->dev_priv->drm.pdev->dev);
260
261         for (i = 0; i < gvt->num_types; i++) {
262                 t = &gvt->types[i];
263                 if (!strncmp(t->name, name + strlen(driver_name) + 1,
264                         sizeof(t->name)))
265                         return t;
266         }
267
268         return NULL;
269 }
270
271 static ssize_t available_instances_show(struct kobject *kobj,
272                                         struct device *dev, char *buf)
273 {
274         struct intel_vgpu_type *type;
275         unsigned int num = 0;
276         void *gvt = kdev_to_i915(dev)->gvt;
277
278         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
279         if (!type)
280                 num = 0;
281         else
282                 num = type->avail_instance;
283
284         return sprintf(buf, "%u\n", num);
285 }
286
287 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
288                 char *buf)
289 {
290         return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
291 }
292
293 static ssize_t description_show(struct kobject *kobj, struct device *dev,
294                 char *buf)
295 {
296         struct intel_vgpu_type *type;
297         void *gvt = kdev_to_i915(dev)->gvt;
298
299         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
300         if (!type)
301                 return 0;
302
303         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
304                        "fence: %d\nresolution: %s\n"
305                        "weight: %d\n",
306                        BYTES_TO_MB(type->low_gm_size),
307                        BYTES_TO_MB(type->high_gm_size),
308                        type->fence, vgpu_edid_str(type->resolution),
309                        type->weight);
310 }
311
312 static MDEV_TYPE_ATTR_RO(available_instances);
313 static MDEV_TYPE_ATTR_RO(device_api);
314 static MDEV_TYPE_ATTR_RO(description);
315
316 static struct attribute *type_attrs[] = {
317         &mdev_type_attr_available_instances.attr,
318         &mdev_type_attr_device_api.attr,
319         &mdev_type_attr_description.attr,
320         NULL,
321 };
322
323 static struct attribute_group *intel_vgpu_type_groups[] = {
324         [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
325 };
326
327 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
328 {
329         int i, j;
330         struct intel_vgpu_type *type;
331         struct attribute_group *group;
332
333         for (i = 0; i < gvt->num_types; i++) {
334                 type = &gvt->types[i];
335
336                 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
337                 if (WARN_ON(!group))
338                         goto unwind;
339
340                 group->name = type->name;
341                 group->attrs = type_attrs;
342                 intel_vgpu_type_groups[i] = group;
343         }
344
345         return true;
346
347 unwind:
348         for (j = 0; j < i; j++) {
349                 group = intel_vgpu_type_groups[j];
350                 kfree(group);
351         }
352
353         return false;
354 }
355
356 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
357 {
358         int i;
359         struct attribute_group *group;
360
361         for (i = 0; i < gvt->num_types; i++) {
362                 group = intel_vgpu_type_groups[i];
363                 kfree(group);
364         }
365 }
366
367 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
368 {
369         hash_init(info->ptable);
370 }
371
372 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
373 {
374         struct kvmgt_pgfn *p;
375         struct hlist_node *tmp;
376         int i;
377
378         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
379                 hash_del(&p->hnode);
380                 kfree(p);
381         }
382 }
383
384 static struct kvmgt_pgfn *
385 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
386 {
387         struct kvmgt_pgfn *p, *res = NULL;
388
389         hash_for_each_possible(info->ptable, p, hnode, gfn) {
390                 if (gfn == p->gfn) {
391                         res = p;
392                         break;
393                 }
394         }
395
396         return res;
397 }
398
399 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
400                                 gfn_t gfn)
401 {
402         struct kvmgt_pgfn *p;
403
404         p = __kvmgt_protect_table_find(info, gfn);
405         return !!p;
406 }
407
408 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
409 {
410         struct kvmgt_pgfn *p;
411
412         if (kvmgt_gfn_is_write_protected(info, gfn))
413                 return;
414
415         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
416         if (WARN(!p, "gfn: 0x%llx\n", gfn))
417                 return;
418
419         p->gfn = gfn;
420         hash_add(info->ptable, &p->hnode, gfn);
421 }
422
423 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
424                                 gfn_t gfn)
425 {
426         struct kvmgt_pgfn *p;
427
428         p = __kvmgt_protect_table_find(info, gfn);
429         if (p) {
430                 hash_del(&p->hnode);
431                 kfree(p);
432         }
433 }
434
435 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
436 {
437         struct intel_vgpu *vgpu = NULL;
438         struct intel_vgpu_type *type;
439         struct device *pdev;
440         void *gvt;
441         int ret;
442
443         pdev = mdev_parent_dev(mdev);
444         gvt = kdev_to_i915(pdev)->gvt;
445
446         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
447         if (!type) {
448                 gvt_vgpu_err("failed to find type %s to create\n",
449                                                 kobject_name(kobj));
450                 ret = -EINVAL;
451                 goto out;
452         }
453
454         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
455         if (IS_ERR_OR_NULL(vgpu)) {
456                 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
457                 gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
458                 goto out;
459         }
460
461         INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
462
463         vgpu->vdev.mdev = mdev;
464         mdev_set_drvdata(mdev, vgpu);
465
466         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
467                      dev_name(mdev_dev(mdev)));
468         ret = 0;
469
470 out:
471         return ret;
472 }
473
474 static int intel_vgpu_remove(struct mdev_device *mdev)
475 {
476         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
477
478         if (handle_valid(vgpu->handle))
479                 return -EBUSY;
480
481         intel_gvt_ops->vgpu_destroy(vgpu);
482         return 0;
483 }
484
485 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
486                                      unsigned long action, void *data)
487 {
488         struct intel_vgpu *vgpu = container_of(nb,
489                                         struct intel_vgpu,
490                                         vdev.iommu_notifier);
491
492         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
493                 struct vfio_iommu_type1_dma_unmap *unmap = data;
494                 unsigned long gfn, end_gfn;
495
496                 gfn = unmap->iova >> PAGE_SHIFT;
497                 end_gfn = gfn + unmap->size / PAGE_SIZE;
498
499                 while (gfn < end_gfn)
500                         gvt_cache_remove(vgpu, gfn++);
501         }
502
503         return NOTIFY_OK;
504 }
505
506 static int intel_vgpu_group_notifier(struct notifier_block *nb,
507                                      unsigned long action, void *data)
508 {
509         struct intel_vgpu *vgpu = container_of(nb,
510                                         struct intel_vgpu,
511                                         vdev.group_notifier);
512
513         /* the only action we care about */
514         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
515                 vgpu->vdev.kvm = data;
516
517                 if (!data)
518                         schedule_work(&vgpu->vdev.release_work);
519         }
520
521         return NOTIFY_OK;
522 }
523
524 static int intel_vgpu_open(struct mdev_device *mdev)
525 {
526         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
527         unsigned long events;
528         int ret;
529
530         vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
531         vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
532
533         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
534         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
535                                 &vgpu->vdev.iommu_notifier);
536         if (ret != 0) {
537                 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
538                         ret);
539                 goto out;
540         }
541
542         events = VFIO_GROUP_NOTIFY_SET_KVM;
543         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
544                                 &vgpu->vdev.group_notifier);
545         if (ret != 0) {
546                 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
547                         ret);
548                 goto undo_iommu;
549         }
550
551         ret = kvmgt_guest_init(mdev);
552         if (ret)
553                 goto undo_group;
554
555         intel_gvt_ops->vgpu_activate(vgpu);
556
557         atomic_set(&vgpu->vdev.released, 0);
558         return ret;
559
560 undo_group:
561         vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
562                                         &vgpu->vdev.group_notifier);
563
564 undo_iommu:
565         vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
566                                         &vgpu->vdev.iommu_notifier);
567 out:
568         return ret;
569 }
570
571 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
572 {
573         struct kvmgt_guest_info *info;
574         int ret;
575
576         if (!handle_valid(vgpu->handle))
577                 return;
578
579         if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
580                 return;
581
582         intel_gvt_ops->vgpu_deactivate(vgpu);
583
584         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
585                                         &vgpu->vdev.iommu_notifier);
586         WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
587
588         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
589                                         &vgpu->vdev.group_notifier);
590         WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
591
592         info = (struct kvmgt_guest_info *)vgpu->handle;
593         kvmgt_guest_exit(info);
594
595         vgpu->vdev.kvm = NULL;
596         vgpu->handle = 0;
597 }
598
599 static void intel_vgpu_release(struct mdev_device *mdev)
600 {
601         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
602
603         __intel_vgpu_release(vgpu);
604 }
605
606 static void intel_vgpu_release_work(struct work_struct *work)
607 {
608         struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
609                                         vdev.release_work);
610
611         __intel_vgpu_release(vgpu);
612 }
613
614 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
615 {
616         u32 start_lo, start_hi;
617         u32 mem_type;
618         int pos = PCI_BASE_ADDRESS_0;
619
620         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
621                         PCI_BASE_ADDRESS_MEM_MASK;
622         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
623                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
624
625         switch (mem_type) {
626         case PCI_BASE_ADDRESS_MEM_TYPE_64:
627                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
628                                                 + pos + 4));
629                 break;
630         case PCI_BASE_ADDRESS_MEM_TYPE_32:
631         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
632                 /* 1M mem BAR treated as 32-bit BAR */
633         default:
634                 /* mem unknown type treated as 32-bit BAR */
635                 start_hi = 0;
636                 break;
637         }
638
639         return ((u64)start_hi << 32) | start_lo;
640 }
641
642 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
643                         size_t count, loff_t *ppos, bool is_write)
644 {
645         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
646         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
647         uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
648         int ret = -EINVAL;
649
650
651         if (index >= VFIO_PCI_NUM_REGIONS) {
652                 gvt_vgpu_err("invalid index: %u\n", index);
653                 return -EINVAL;
654         }
655
656         switch (index) {
657         case VFIO_PCI_CONFIG_REGION_INDEX:
658                 if (is_write)
659                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
660                                                 buf, count);
661                 else
662                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
663                                                 buf, count);
664                 break;
665         case VFIO_PCI_BAR0_REGION_INDEX:
666         case VFIO_PCI_BAR1_REGION_INDEX:
667                 if (is_write) {
668                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
669
670                         ret = intel_gvt_ops->emulate_mmio_write(vgpu,
671                                                 bar0_start + pos, buf, count);
672                 } else {
673                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
674
675                         ret = intel_gvt_ops->emulate_mmio_read(vgpu,
676                                                 bar0_start + pos, buf, count);
677                 }
678                 break;
679         case VFIO_PCI_BAR2_REGION_INDEX:
680         case VFIO_PCI_BAR3_REGION_INDEX:
681         case VFIO_PCI_BAR4_REGION_INDEX:
682         case VFIO_PCI_BAR5_REGION_INDEX:
683         case VFIO_PCI_VGA_REGION_INDEX:
684         case VFIO_PCI_ROM_REGION_INDEX:
685         default:
686                 gvt_vgpu_err("unsupported region: %u\n", index);
687         }
688
689         return ret == 0 ? count : ret;
690 }
691
692 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
693                         size_t count, loff_t *ppos)
694 {
695         unsigned int done = 0;
696         int ret;
697
698         while (count) {
699                 size_t filled;
700
701                 if (count >= 4 && !(*ppos % 4)) {
702                         u32 val;
703
704                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
705                                         ppos, false);
706                         if (ret <= 0)
707                                 goto read_err;
708
709                         if (copy_to_user(buf, &val, sizeof(val)))
710                                 goto read_err;
711
712                         filled = 4;
713                 } else if (count >= 2 && !(*ppos % 2)) {
714                         u16 val;
715
716                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
717                                         ppos, false);
718                         if (ret <= 0)
719                                 goto read_err;
720
721                         if (copy_to_user(buf, &val, sizeof(val)))
722                                 goto read_err;
723
724                         filled = 2;
725                 } else {
726                         u8 val;
727
728                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
729                                         false);
730                         if (ret <= 0)
731                                 goto read_err;
732
733                         if (copy_to_user(buf, &val, sizeof(val)))
734                                 goto read_err;
735
736                         filled = 1;
737                 }
738
739                 count -= filled;
740                 done += filled;
741                 *ppos += filled;
742                 buf += filled;
743         }
744
745         return done;
746
747 read_err:
748         return -EFAULT;
749 }
750
751 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
752                                 const char __user *buf,
753                                 size_t count, loff_t *ppos)
754 {
755         unsigned int done = 0;
756         int ret;
757
758         while (count) {
759                 size_t filled;
760
761                 if (count >= 4 && !(*ppos % 4)) {
762                         u32 val;
763
764                         if (copy_from_user(&val, buf, sizeof(val)))
765                                 goto write_err;
766
767                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
768                                         ppos, true);
769                         if (ret <= 0)
770                                 goto write_err;
771
772                         filled = 4;
773                 } else if (count >= 2 && !(*ppos % 2)) {
774                         u16 val;
775
776                         if (copy_from_user(&val, buf, sizeof(val)))
777                                 goto write_err;
778
779                         ret = intel_vgpu_rw(mdev, (char *)&val,
780                                         sizeof(val), ppos, true);
781                         if (ret <= 0)
782                                 goto write_err;
783
784                         filled = 2;
785                 } else {
786                         u8 val;
787
788                         if (copy_from_user(&val, buf, sizeof(val)))
789                                 goto write_err;
790
791                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
792                                         ppos, true);
793                         if (ret <= 0)
794                                 goto write_err;
795
796                         filled = 1;
797                 }
798
799                 count -= filled;
800                 done += filled;
801                 *ppos += filled;
802                 buf += filled;
803         }
804
805         return done;
806 write_err:
807         return -EFAULT;
808 }
809
810 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu,
811                                           unsigned long off)
812 {
813         return off >= vgpu_aperture_offset(vgpu) &&
814                 off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
815 }
816
817 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
818 {
819         unsigned int index;
820         u64 virtaddr;
821         unsigned long req_size, pgoff, req_start;
822         pgprot_t pg_prot;
823         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
824
825         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
826         if (index >= VFIO_PCI_ROM_REGION_INDEX)
827                 return -EINVAL;
828
829         if (vma->vm_end < vma->vm_start)
830                 return -EINVAL;
831         if ((vma->vm_flags & VM_SHARED) == 0)
832                 return -EINVAL;
833         if (index != VFIO_PCI_BAR2_REGION_INDEX)
834                 return -EINVAL;
835
836         pg_prot = vma->vm_page_prot;
837         virtaddr = vma->vm_start;
838         req_size = vma->vm_end - vma->vm_start;
839         pgoff = vma->vm_pgoff &
840                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
841         req_start = pgoff << PAGE_SHIFT;
842
843         if (!intel_vgpu_in_aperture(vgpu, req_start))
844                 return -EINVAL;
845         if (req_start + req_size >
846             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
847                 return -EINVAL;
848
849         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
850
851         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
852 }
853
854 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
855 {
856         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
857                 return 1;
858
859         return 0;
860 }
861
862 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
863                         unsigned int index, unsigned int start,
864                         unsigned int count, uint32_t flags,
865                         void *data)
866 {
867         return 0;
868 }
869
870 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
871                         unsigned int index, unsigned int start,
872                         unsigned int count, uint32_t flags, void *data)
873 {
874         return 0;
875 }
876
877 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
878                 unsigned int index, unsigned int start, unsigned int count,
879                 uint32_t flags, void *data)
880 {
881         return 0;
882 }
883
884 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
885                 unsigned int index, unsigned int start, unsigned int count,
886                 uint32_t flags, void *data)
887 {
888         struct eventfd_ctx *trigger;
889
890         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
891                 int fd = *(int *)data;
892
893                 trigger = eventfd_ctx_fdget(fd);
894                 if (IS_ERR(trigger)) {
895                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
896                         return PTR_ERR(trigger);
897                 }
898                 vgpu->vdev.msi_trigger = trigger;
899         }
900
901         return 0;
902 }
903
904 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
905                 unsigned int index, unsigned int start, unsigned int count,
906                 void *data)
907 {
908         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
909                         unsigned int start, unsigned int count, uint32_t flags,
910                         void *data) = NULL;
911
912         switch (index) {
913         case VFIO_PCI_INTX_IRQ_INDEX:
914                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
915                 case VFIO_IRQ_SET_ACTION_MASK:
916                         func = intel_vgpu_set_intx_mask;
917                         break;
918                 case VFIO_IRQ_SET_ACTION_UNMASK:
919                         func = intel_vgpu_set_intx_unmask;
920                         break;
921                 case VFIO_IRQ_SET_ACTION_TRIGGER:
922                         func = intel_vgpu_set_intx_trigger;
923                         break;
924                 }
925                 break;
926         case VFIO_PCI_MSI_IRQ_INDEX:
927                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
928                 case VFIO_IRQ_SET_ACTION_MASK:
929                 case VFIO_IRQ_SET_ACTION_UNMASK:
930                         /* XXX Need masking support exported */
931                         break;
932                 case VFIO_IRQ_SET_ACTION_TRIGGER:
933                         func = intel_vgpu_set_msi_trigger;
934                         break;
935                 }
936                 break;
937         }
938
939         if (!func)
940                 return -ENOTTY;
941
942         return func(vgpu, index, start, count, flags, data);
943 }
944
945 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
946                              unsigned long arg)
947 {
948         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
949         unsigned long minsz;
950
951         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
952
953         if (cmd == VFIO_DEVICE_GET_INFO) {
954                 struct vfio_device_info info;
955
956                 minsz = offsetofend(struct vfio_device_info, num_irqs);
957
958                 if (copy_from_user(&info, (void __user *)arg, minsz))
959                         return -EFAULT;
960
961                 if (info.argsz < minsz)
962                         return -EINVAL;
963
964                 info.flags = VFIO_DEVICE_FLAGS_PCI;
965                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
966                 info.num_regions = VFIO_PCI_NUM_REGIONS;
967                 info.num_irqs = VFIO_PCI_NUM_IRQS;
968
969                 return copy_to_user((void __user *)arg, &info, minsz) ?
970                         -EFAULT : 0;
971
972         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
973                 struct vfio_region_info info;
974                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
975                 unsigned int i;
976                 int ret;
977                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
978                 size_t size;
979                 int nr_areas = 1;
980                 int cap_type_id;
981
982                 minsz = offsetofend(struct vfio_region_info, offset);
983
984                 if (copy_from_user(&info, (void __user *)arg, minsz))
985                         return -EFAULT;
986
987                 if (info.argsz < minsz)
988                         return -EINVAL;
989
990                 switch (info.index) {
991                 case VFIO_PCI_CONFIG_REGION_INDEX:
992                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
993                         info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
994                         info.flags = VFIO_REGION_INFO_FLAG_READ |
995                                      VFIO_REGION_INFO_FLAG_WRITE;
996                         break;
997                 case VFIO_PCI_BAR0_REGION_INDEX:
998                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
999                         info.size = vgpu->cfg_space.bar[info.index].size;
1000                         if (!info.size) {
1001                                 info.flags = 0;
1002                                 break;
1003                         }
1004
1005                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1006                                      VFIO_REGION_INFO_FLAG_WRITE;
1007                         break;
1008                 case VFIO_PCI_BAR1_REGION_INDEX:
1009                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1010                         info.size = 0;
1011                         info.flags = 0;
1012                         break;
1013                 case VFIO_PCI_BAR2_REGION_INDEX:
1014                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1015                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1016                                         VFIO_REGION_INFO_FLAG_MMAP |
1017                                         VFIO_REGION_INFO_FLAG_READ |
1018                                         VFIO_REGION_INFO_FLAG_WRITE;
1019                         info.size = gvt_aperture_sz(vgpu->gvt);
1020
1021                         size = sizeof(*sparse) +
1022                                         (nr_areas * sizeof(*sparse->areas));
1023                         sparse = kzalloc(size, GFP_KERNEL);
1024                         if (!sparse)
1025                                 return -ENOMEM;
1026
1027                         sparse->nr_areas = nr_areas;
1028                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1029                         sparse->areas[0].offset =
1030                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1031                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1032                         break;
1033
1034                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1035                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1036                         info.size = 0;
1037
1038                         info.flags = 0;
1039                         gvt_dbg_core("get region info bar:%d\n", info.index);
1040                         break;
1041
1042                 case VFIO_PCI_ROM_REGION_INDEX:
1043                 case VFIO_PCI_VGA_REGION_INDEX:
1044                         gvt_dbg_core("get region info index:%d\n", info.index);
1045                         break;
1046                 default:
1047                         {
1048                                 struct vfio_region_info_cap_type cap_type;
1049
1050                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1051                                                 vgpu->vdev.num_regions)
1052                                         return -EINVAL;
1053                                 info.index =
1054                                         array_index_nospec(info.index,
1055                                                         VFIO_PCI_NUM_REGIONS +
1056                                                         vgpu->vdev.num_regions);
1057
1058                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1059
1060                                 info.offset =
1061                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1062                                 info.size = vgpu->vdev.region[i].size;
1063                                 info.flags = vgpu->vdev.region[i].flags;
1064
1065                                 cap_type.type = vgpu->vdev.region[i].type;
1066                                 cap_type.subtype = vgpu->vdev.region[i].subtype;
1067
1068                                 ret = vfio_info_add_capability(&caps,
1069                                                 VFIO_REGION_INFO_CAP_TYPE,
1070                                                 &cap_type);
1071                                 if (ret)
1072                                         return ret;
1073                         }
1074                 }
1075
1076                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1077                         switch (cap_type_id) {
1078                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1079                                 ret = vfio_info_add_capability(&caps,
1080                                         VFIO_REGION_INFO_CAP_SPARSE_MMAP,
1081                                         sparse);
1082                                 kfree(sparse);
1083                                 if (ret)
1084                                         return ret;
1085                                 break;
1086                         default:
1087                                 return -EINVAL;
1088                         }
1089                 }
1090
1091                 if (caps.size) {
1092                         if (info.argsz < sizeof(info) + caps.size) {
1093                                 info.argsz = sizeof(info) + caps.size;
1094                                 info.cap_offset = 0;
1095                         } else {
1096                                 vfio_info_cap_shift(&caps, sizeof(info));
1097                                 if (copy_to_user((void __user *)arg +
1098                                                   sizeof(info), caps.buf,
1099                                                   caps.size)) {
1100                                         kfree(caps.buf);
1101                                         return -EFAULT;
1102                                 }
1103                                 info.cap_offset = sizeof(info);
1104                         }
1105
1106                         kfree(caps.buf);
1107                 }
1108
1109                 return copy_to_user((void __user *)arg, &info, minsz) ?
1110                         -EFAULT : 0;
1111         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1112                 struct vfio_irq_info info;
1113
1114                 minsz = offsetofend(struct vfio_irq_info, count);
1115
1116                 if (copy_from_user(&info, (void __user *)arg, minsz))
1117                         return -EFAULT;
1118
1119                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1120                         return -EINVAL;
1121
1122                 switch (info.index) {
1123                 case VFIO_PCI_INTX_IRQ_INDEX:
1124                 case VFIO_PCI_MSI_IRQ_INDEX:
1125                         break;
1126                 default:
1127                         return -EINVAL;
1128                 }
1129
1130                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1131
1132                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1133
1134                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1135                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1136                                        VFIO_IRQ_INFO_AUTOMASKED);
1137                 else
1138                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1139
1140                 return copy_to_user((void __user *)arg, &info, minsz) ?
1141                         -EFAULT : 0;
1142         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1143                 struct vfio_irq_set hdr;
1144                 u8 *data = NULL;
1145                 int ret = 0;
1146                 size_t data_size = 0;
1147
1148                 minsz = offsetofend(struct vfio_irq_set, count);
1149
1150                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1151                         return -EFAULT;
1152
1153                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1154                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1155
1156                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1157                                                 VFIO_PCI_NUM_IRQS, &data_size);
1158                         if (ret) {
1159                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1160                                 return -EINVAL;
1161                         }
1162                         if (data_size) {
1163                                 data = memdup_user((void __user *)(arg + minsz),
1164                                                    data_size);
1165                                 if (IS_ERR(data))
1166                                         return PTR_ERR(data);
1167                         }
1168                 }
1169
1170                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1171                                         hdr.start, hdr.count, data);
1172                 kfree(data);
1173
1174                 return ret;
1175         } else if (cmd == VFIO_DEVICE_RESET) {
1176                 intel_gvt_ops->vgpu_reset(vgpu);
1177                 return 0;
1178         }
1179
1180         return -ENOTTY;
1181 }
1182
1183 static ssize_t
1184 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1185              char *buf)
1186 {
1187         struct mdev_device *mdev = mdev_from_dev(dev);
1188
1189         if (mdev) {
1190                 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1191                         mdev_get_drvdata(mdev);
1192                 return sprintf(buf, "%d\n", vgpu->id);
1193         }
1194         return sprintf(buf, "\n");
1195 }
1196
1197 static ssize_t
1198 hw_id_show(struct device *dev, struct device_attribute *attr,
1199            char *buf)
1200 {
1201         struct mdev_device *mdev = mdev_from_dev(dev);
1202
1203         if (mdev) {
1204                 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1205                         mdev_get_drvdata(mdev);
1206                 return sprintf(buf, "%u\n",
1207                                vgpu->shadow_ctx->hw_id);
1208         }
1209         return sprintf(buf, "\n");
1210 }
1211
1212 static DEVICE_ATTR_RO(vgpu_id);
1213 static DEVICE_ATTR_RO(hw_id);
1214
1215 static struct attribute *intel_vgpu_attrs[] = {
1216         &dev_attr_vgpu_id.attr,
1217         &dev_attr_hw_id.attr,
1218         NULL
1219 };
1220
1221 static const struct attribute_group intel_vgpu_group = {
1222         .name = "intel_vgpu",
1223         .attrs = intel_vgpu_attrs,
1224 };
1225
1226 static const struct attribute_group *intel_vgpu_groups[] = {
1227         &intel_vgpu_group,
1228         NULL,
1229 };
1230
1231 static const struct mdev_parent_ops intel_vgpu_ops = {
1232         .supported_type_groups  = intel_vgpu_type_groups,
1233         .mdev_attr_groups       = intel_vgpu_groups,
1234         .create                 = intel_vgpu_create,
1235         .remove                 = intel_vgpu_remove,
1236
1237         .open                   = intel_vgpu_open,
1238         .release                = intel_vgpu_release,
1239
1240         .read                   = intel_vgpu_read,
1241         .write                  = intel_vgpu_write,
1242         .mmap                   = intel_vgpu_mmap,
1243         .ioctl                  = intel_vgpu_ioctl,
1244 };
1245
1246 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1247 {
1248         if (!intel_gvt_init_vgpu_type_groups(gvt))
1249                 return -EFAULT;
1250
1251         intel_gvt_ops = ops;
1252
1253         return mdev_register_device(dev, &intel_vgpu_ops);
1254 }
1255
1256 static void kvmgt_host_exit(struct device *dev, void *gvt)
1257 {
1258         intel_gvt_cleanup_vgpu_type_groups(gvt);
1259         mdev_unregister_device(dev);
1260 }
1261
1262 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1263 {
1264         struct kvmgt_guest_info *info;
1265         struct kvm *kvm;
1266         struct kvm_memory_slot *slot;
1267         int idx;
1268
1269         if (!handle_valid(handle))
1270                 return -ESRCH;
1271
1272         info = (struct kvmgt_guest_info *)handle;
1273         kvm = info->kvm;
1274
1275         idx = srcu_read_lock(&kvm->srcu);
1276         slot = gfn_to_memslot(kvm, gfn);
1277         if (!slot) {
1278                 srcu_read_unlock(&kvm->srcu, idx);
1279                 return -EINVAL;
1280         }
1281
1282         spin_lock(&kvm->mmu_lock);
1283
1284         if (kvmgt_gfn_is_write_protected(info, gfn))
1285                 goto out;
1286
1287         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1288         kvmgt_protect_table_add(info, gfn);
1289
1290 out:
1291         spin_unlock(&kvm->mmu_lock);
1292         srcu_read_unlock(&kvm->srcu, idx);
1293         return 0;
1294 }
1295
1296 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1297 {
1298         struct kvmgt_guest_info *info;
1299         struct kvm *kvm;
1300         struct kvm_memory_slot *slot;
1301         int idx;
1302
1303         if (!handle_valid(handle))
1304                 return 0;
1305
1306         info = (struct kvmgt_guest_info *)handle;
1307         kvm = info->kvm;
1308
1309         idx = srcu_read_lock(&kvm->srcu);
1310         slot = gfn_to_memslot(kvm, gfn);
1311         if (!slot) {
1312                 srcu_read_unlock(&kvm->srcu, idx);
1313                 return -EINVAL;
1314         }
1315
1316         spin_lock(&kvm->mmu_lock);
1317
1318         if (!kvmgt_gfn_is_write_protected(info, gfn))
1319                 goto out;
1320
1321         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1322         kvmgt_protect_table_del(info, gfn);
1323
1324 out:
1325         spin_unlock(&kvm->mmu_lock);
1326         srcu_read_unlock(&kvm->srcu, idx);
1327         return 0;
1328 }
1329
1330 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1331                 const u8 *val, int len,
1332                 struct kvm_page_track_notifier_node *node)
1333 {
1334         struct kvmgt_guest_info *info = container_of(node,
1335                                         struct kvmgt_guest_info, track_node);
1336
1337         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1338                 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1339                                         (void *)val, len);
1340 }
1341
1342 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1343                 struct kvm_memory_slot *slot,
1344                 struct kvm_page_track_notifier_node *node)
1345 {
1346         int i;
1347         gfn_t gfn;
1348         struct kvmgt_guest_info *info = container_of(node,
1349                                         struct kvmgt_guest_info, track_node);
1350
1351         spin_lock(&kvm->mmu_lock);
1352         for (i = 0; i < slot->npages; i++) {
1353                 gfn = slot->base_gfn + i;
1354                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1355                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1356                                                 KVM_PAGE_TRACK_WRITE);
1357                         kvmgt_protect_table_del(info, gfn);
1358                 }
1359         }
1360         spin_unlock(&kvm->mmu_lock);
1361 }
1362
1363 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1364 {
1365         struct intel_vgpu *itr;
1366         struct kvmgt_guest_info *info;
1367         int id;
1368         bool ret = false;
1369
1370         mutex_lock(&vgpu->gvt->lock);
1371         for_each_active_vgpu(vgpu->gvt, itr, id) {
1372                 if (!handle_valid(itr->handle))
1373                         continue;
1374
1375                 info = (struct kvmgt_guest_info *)itr->handle;
1376                 if (kvm && kvm == info->kvm) {
1377                         ret = true;
1378                         goto out;
1379                 }
1380         }
1381 out:
1382         mutex_unlock(&vgpu->gvt->lock);
1383         return ret;
1384 }
1385
1386 static int kvmgt_guest_init(struct mdev_device *mdev)
1387 {
1388         struct kvmgt_guest_info *info;
1389         struct intel_vgpu *vgpu;
1390         struct kvm *kvm;
1391
1392         vgpu = mdev_get_drvdata(mdev);
1393         if (handle_valid(vgpu->handle))
1394                 return -EEXIST;
1395
1396         kvm = vgpu->vdev.kvm;
1397         if (!kvm || kvm->mm != current->mm) {
1398                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1399                 return -ESRCH;
1400         }
1401
1402         if (__kvmgt_vgpu_exist(vgpu, kvm))
1403                 return -EEXIST;
1404
1405         info = vzalloc(sizeof(struct kvmgt_guest_info));
1406         if (!info)
1407                 return -ENOMEM;
1408
1409         vgpu->handle = (unsigned long)info;
1410         info->vgpu = vgpu;
1411         info->kvm = kvm;
1412         kvm_get_kvm(info->kvm);
1413
1414         kvmgt_protect_table_init(info);
1415         gvt_cache_init(vgpu);
1416
1417         info->track_node.track_write = kvmgt_page_track_write;
1418         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1419         kvm_page_track_register_notifier(kvm, &info->track_node);
1420
1421         return 0;
1422 }
1423
1424 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1425 {
1426         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1427         kvm_put_kvm(info->kvm);
1428         kvmgt_protect_table_destroy(info);
1429         gvt_cache_destroy(info->vgpu);
1430         vfree(info);
1431
1432         return true;
1433 }
1434
1435 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1436 {
1437         /* nothing to do here */
1438         return 0;
1439 }
1440
1441 static void kvmgt_detach_vgpu(unsigned long handle)
1442 {
1443         /* nothing to do here */
1444 }
1445
1446 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1447 {
1448         struct kvmgt_guest_info *info;
1449         struct intel_vgpu *vgpu;
1450
1451         if (!handle_valid(handle))
1452                 return -ESRCH;
1453
1454         info = (struct kvmgt_guest_info *)handle;
1455         vgpu = info->vgpu;
1456
1457         if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1458                 return 0;
1459
1460         return -EFAULT;
1461 }
1462
1463 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1464 {
1465         unsigned long iova, pfn;
1466         struct kvmgt_guest_info *info;
1467         struct device *dev;
1468         struct intel_vgpu *vgpu;
1469         int rc;
1470
1471         if (!handle_valid(handle))
1472                 return INTEL_GVT_INVALID_ADDR;
1473
1474         info = (struct kvmgt_guest_info *)handle;
1475         vgpu = info->vgpu;
1476         iova = gvt_cache_find(info->vgpu, gfn);
1477         if (iova != INTEL_GVT_INVALID_ADDR)
1478                 return iova;
1479
1480         pfn = INTEL_GVT_INVALID_ADDR;
1481         dev = mdev_dev(info->vgpu->vdev.mdev);
1482         rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
1483         if (rc != 1) {
1484                 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
1485                         gfn, rc);
1486                 return INTEL_GVT_INVALID_ADDR;
1487         }
1488         /* transfer to host iova for GFX to use DMA */
1489         rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
1490         if (rc) {
1491                 gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
1492                 vfio_unpin_pages(dev, &gfn, 1);
1493                 return INTEL_GVT_INVALID_ADDR;
1494         }
1495
1496         gvt_cache_add(info->vgpu, gfn, iova);
1497         return iova;
1498 }
1499
1500 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1501                         void *buf, unsigned long len, bool write)
1502 {
1503         struct kvmgt_guest_info *info;
1504         struct kvm *kvm;
1505         int idx, ret;
1506         bool kthread = current->mm == NULL;
1507
1508         if (!handle_valid(handle))
1509                 return -ESRCH;
1510
1511         info = (struct kvmgt_guest_info *)handle;
1512         kvm = info->kvm;
1513
1514         if (kthread)
1515                 use_mm(kvm->mm);
1516
1517         idx = srcu_read_lock(&kvm->srcu);
1518         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1519                       kvm_read_guest(kvm, gpa, buf, len);
1520         srcu_read_unlock(&kvm->srcu, idx);
1521
1522         if (kthread)
1523                 unuse_mm(kvm->mm);
1524
1525         return ret;
1526 }
1527
1528 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1529                         void *buf, unsigned long len)
1530 {
1531         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1532 }
1533
1534 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1535                         void *buf, unsigned long len)
1536 {
1537         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1538 }
1539
1540 static unsigned long kvmgt_virt_to_pfn(void *addr)
1541 {
1542         return PFN_DOWN(__pa(addr));
1543 }
1544
1545 struct intel_gvt_mpt kvmgt_mpt = {
1546         .host_init = kvmgt_host_init,
1547         .host_exit = kvmgt_host_exit,
1548         .attach_vgpu = kvmgt_attach_vgpu,
1549         .detach_vgpu = kvmgt_detach_vgpu,
1550         .inject_msi = kvmgt_inject_msi,
1551         .from_virt_to_mfn = kvmgt_virt_to_pfn,
1552         .set_wp_page = kvmgt_write_protect_add,
1553         .unset_wp_page = kvmgt_write_protect_remove,
1554         .read_gpa = kvmgt_read_gpa,
1555         .write_gpa = kvmgt_write_gpa,
1556         .gfn_to_mfn = kvmgt_gfn_to_pfn,
1557 };
1558 EXPORT_SYMBOL_GPL(kvmgt_mpt);
1559
1560 static int __init kvmgt_init(void)
1561 {
1562         return 0;
1563 }
1564
1565 static void __exit kvmgt_exit(void)
1566 {
1567 }
1568
1569 module_init(kvmgt_init);
1570 module_exit(kvmgt_exit);
1571
1572 MODULE_LICENSE("GPL and additional rights");
1573 MODULE_AUTHOR("Intel Corporation");