GNU Linux-libre 4.9.337-gnu1
[releases.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34
35 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
36
37 /**
38  * DOC: Global GTT views
39  *
40  * Background and previous state
41  *
42  * Historically objects could exists (be bound) in global GTT space only as
43  * singular instances with a view representing all of the object's backing pages
44  * in a linear fashion. This view will be called a normal view.
45  *
46  * To support multiple views of the same object, where the number of mapped
47  * pages is not equal to the backing store, or where the layout of the pages
48  * is not linear, concept of a GGTT view was added.
49  *
50  * One example of an alternative view is a stereo display driven by a single
51  * image. In this case we would have a framebuffer looking like this
52  * (2x2 pages):
53  *
54  *    12
55  *    34
56  *
57  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
58  * rendering. In contrast, fed to the display engine would be an alternative
59  * view which could look something like this:
60  *
61  *   1212
62  *   3434
63  *
64  * In this example both the size and layout of pages in the alternative view is
65  * different from the normal view.
66  *
67  * Implementation and usage
68  *
69  * GGTT views are implemented using VMAs and are distinguished via enum
70  * i915_ggtt_view_type and struct i915_ggtt_view.
71  *
72  * A new flavour of core GEM functions which work with GGTT bound objects were
73  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
74  * renaming  in large amounts of code. They take the struct i915_ggtt_view
75  * parameter encapsulating all metadata required to implement a view.
76  *
77  * As a helper for callers which are only interested in the normal view,
78  * globally const i915_ggtt_view_normal singleton instance exists. All old core
79  * GEM API functions, the ones not taking the view parameter, are operating on,
80  * or with the normal GGTT view.
81  *
82  * Code wanting to add or use a new GGTT view needs to:
83  *
84  * 1. Add a new enum with a suitable name.
85  * 2. Extend the metadata in the i915_ggtt_view structure if required.
86  * 3. Add support to i915_get_vma_pages().
87  *
88  * New views are required to build a scatter-gather table from within the
89  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
90  * exists for the lifetime of an VMA.
91  *
92  * Core API is designed to have copy semantics which means that passed in
93  * struct i915_ggtt_view does not need to be persistent (left around after
94  * calling the core API functions).
95  *
96  */
97
98 static inline struct i915_ggtt *
99 i915_vm_to_ggtt(struct i915_address_space *vm)
100 {
101         GEM_BUG_ON(!i915_is_ggtt(vm));
102         return container_of(vm, struct i915_ggtt, base);
103 }
104
105 static int
106 i915_get_ggtt_vma_pages(struct i915_vma *vma);
107
108 const struct i915_ggtt_view i915_ggtt_view_normal = {
109         .type = I915_GGTT_VIEW_NORMAL,
110 };
111 const struct i915_ggtt_view i915_ggtt_view_rotated = {
112         .type = I915_GGTT_VIEW_ROTATED,
113 };
114
115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
116                                 int enable_ppgtt)
117 {
118         bool has_aliasing_ppgtt;
119         bool has_full_ppgtt;
120         bool has_full_48bit_ppgtt;
121
122         has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
123         has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
124         has_full_48bit_ppgtt =
125                 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
126
127         if (intel_vgpu_active(dev_priv)) {
128                 /* emulation is too hard */
129                 has_full_ppgtt = false;
130                 has_full_48bit_ppgtt = false;
131         }
132
133         if (!has_aliasing_ppgtt)
134                 return 0;
135
136         /*
137          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
138          * execlists, the sole mechanism available to submit work.
139          */
140         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
141                 return 0;
142
143         /* Full PPGTT is required by the Gen9 cmdparser */
144         if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9)
145                 return 1;
146
147         if (enable_ppgtt == 2 && has_full_ppgtt)
148                 return 2;
149
150         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
151                 return 3;
152
153 #ifdef CONFIG_INTEL_IOMMU
154         /* Disable ppgtt on SNB if VT-d is on. */
155         if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
156                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
157                 return 0;
158         }
159 #endif
160
161         /* Early VLV doesn't have this */
162         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
163                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
164                 return 0;
165         }
166
167         if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
168                 return has_full_48bit_ppgtt ? 3 : 2;
169         else
170                 return has_aliasing_ppgtt ? 1 : 0;
171 }
172
173 static int ppgtt_bind_vma(struct i915_vma *vma,
174                           enum i915_cache_level cache_level,
175                           u32 unused)
176 {
177         u32 pte_flags = 0;
178
179         vma->pages = vma->obj->pages;
180
181         /* Applicable to VLV, and gen8+ */
182         if (i915_gem_object_is_readonly(vma->obj))
183                 pte_flags |= PTE_READ_ONLY;
184
185         vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
186                                 cache_level, pte_flags);
187
188         return 0;
189 }
190
191 static void ppgtt_unbind_vma(struct i915_vma *vma)
192 {
193         vma->vm->clear_range(vma->vm,
194                              vma->node.start,
195                              vma->size,
196                              true);
197 }
198
199 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
200                                   enum i915_cache_level level,
201                                   bool valid, u32 flags)
202 {
203         gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
204         pte |= addr;
205
206         if (unlikely(flags & PTE_READ_ONLY))
207                 pte &= ~_PAGE_RW;
208
209         switch (level) {
210         case I915_CACHE_NONE:
211                 pte |= PPAT_UNCACHED_INDEX;
212                 break;
213         case I915_CACHE_WT:
214                 pte |= PPAT_DISPLAY_ELLC_INDEX;
215                 break;
216         default:
217                 pte |= PPAT_CACHED_INDEX;
218                 break;
219         }
220
221         return pte;
222 }
223
224 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
225                                   const enum i915_cache_level level)
226 {
227         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
228         pde |= addr;
229         if (level != I915_CACHE_NONE)
230                 pde |= PPAT_CACHED_PDE_INDEX;
231         else
232                 pde |= PPAT_UNCACHED_INDEX;
233         return pde;
234 }
235
236 #define gen8_pdpe_encode gen8_pde_encode
237 #define gen8_pml4e_encode gen8_pde_encode
238
239 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
240                                  enum i915_cache_level level,
241                                  bool valid, u32 unused)
242 {
243         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
244         pte |= GEN6_PTE_ADDR_ENCODE(addr);
245
246         switch (level) {
247         case I915_CACHE_L3_LLC:
248         case I915_CACHE_LLC:
249                 pte |= GEN6_PTE_CACHE_LLC;
250                 break;
251         case I915_CACHE_NONE:
252                 pte |= GEN6_PTE_UNCACHED;
253                 break;
254         default:
255                 MISSING_CASE(level);
256         }
257
258         return pte;
259 }
260
261 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
262                                  enum i915_cache_level level,
263                                  bool valid, u32 unused)
264 {
265         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
266         pte |= GEN6_PTE_ADDR_ENCODE(addr);
267
268         switch (level) {
269         case I915_CACHE_L3_LLC:
270                 pte |= GEN7_PTE_CACHE_L3_LLC;
271                 break;
272         case I915_CACHE_LLC:
273                 pte |= GEN6_PTE_CACHE_LLC;
274                 break;
275         case I915_CACHE_NONE:
276                 pte |= GEN6_PTE_UNCACHED;
277                 break;
278         default:
279                 MISSING_CASE(level);
280         }
281
282         return pte;
283 }
284
285 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
286                                  enum i915_cache_level level,
287                                  bool valid, u32 flags)
288 {
289         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
290         pte |= GEN6_PTE_ADDR_ENCODE(addr);
291
292         if (!(flags & PTE_READ_ONLY))
293                 pte |= BYT_PTE_WRITEABLE;
294
295         if (level != I915_CACHE_NONE)
296                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
297
298         return pte;
299 }
300
301 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
302                                  enum i915_cache_level level,
303                                  bool valid, u32 unused)
304 {
305         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
306         pte |= HSW_PTE_ADDR_ENCODE(addr);
307
308         if (level != I915_CACHE_NONE)
309                 pte |= HSW_WB_LLC_AGE3;
310
311         return pte;
312 }
313
314 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
315                                   enum i915_cache_level level,
316                                   bool valid, u32 unused)
317 {
318         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
319         pte |= HSW_PTE_ADDR_ENCODE(addr);
320
321         switch (level) {
322         case I915_CACHE_NONE:
323                 break;
324         case I915_CACHE_WT:
325                 pte |= HSW_WT_ELLC_LLC_AGE3;
326                 break;
327         default:
328                 pte |= HSW_WB_ELLC_LLC_AGE3;
329                 break;
330         }
331
332         return pte;
333 }
334
335 static int __setup_page_dma(struct drm_device *dev,
336                             struct i915_page_dma *p, gfp_t flags)
337 {
338         struct device *kdev = &dev->pdev->dev;
339
340         p->page = alloc_page(flags);
341         if (!p->page)
342                 return -ENOMEM;
343
344         p->daddr = dma_map_page(kdev,
345                                 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
346
347         if (dma_mapping_error(kdev, p->daddr)) {
348                 __free_page(p->page);
349                 return -EINVAL;
350         }
351
352         return 0;
353 }
354
355 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
356 {
357         return __setup_page_dma(dev, p, I915_GFP_DMA);
358 }
359
360 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
361 {
362         struct pci_dev *pdev = dev->pdev;
363
364         if (WARN_ON(!p->page))
365                 return;
366
367         dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
368         __free_page(p->page);
369         memset(p, 0, sizeof(*p));
370 }
371
372 static void *kmap_page_dma(struct i915_page_dma *p)
373 {
374         return kmap_atomic(p->page);
375 }
376
377 /* We use the flushing unmap only with ppgtt structures:
378  * page directories, page tables and scratch pages.
379  */
380 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
381 {
382         /* There are only few exceptions for gen >=6. chv and bxt.
383          * And we are not sure about the latter so play safe for now.
384          */
385         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
386                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
387
388         kunmap_atomic(vaddr);
389 }
390
391 #define kmap_px(px) kmap_page_dma(px_base(px))
392 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
393
394 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
395 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
396 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
397 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
398
399 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
400                           const uint64_t val)
401 {
402         int i;
403         uint64_t * const vaddr = kmap_page_dma(p);
404
405         for (i = 0; i < 512; i++)
406                 vaddr[i] = val;
407
408         kunmap_page_dma(dev, vaddr);
409 }
410
411 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
412                              const uint32_t val32)
413 {
414         uint64_t v = val32;
415
416         v = v << 32 | val32;
417
418         fill_page_dma(dev, p, v);
419 }
420
421 static int
422 setup_scratch_page(struct drm_device *dev,
423                    struct i915_page_dma *scratch,
424                    gfp_t gfp)
425 {
426         return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
427 }
428
429 static void cleanup_scratch_page(struct drm_device *dev,
430                                  struct i915_page_dma *scratch)
431 {
432         cleanup_page_dma(dev, scratch);
433 }
434
435 static struct i915_page_table *alloc_pt(struct drm_device *dev)
436 {
437         struct i915_page_table *pt;
438         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
439                 GEN8_PTES : GEN6_PTES;
440         int ret = -ENOMEM;
441
442         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
443         if (!pt)
444                 return ERR_PTR(-ENOMEM);
445
446         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
447                                 GFP_KERNEL);
448
449         if (!pt->used_ptes)
450                 goto fail_bitmap;
451
452         ret = setup_px(dev, pt);
453         if (ret)
454                 goto fail_page_m;
455
456         return pt;
457
458 fail_page_m:
459         kfree(pt->used_ptes);
460 fail_bitmap:
461         kfree(pt);
462
463         return ERR_PTR(ret);
464 }
465
466 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
467 {
468         cleanup_px(dev, pt);
469         kfree(pt->used_ptes);
470         kfree(pt);
471 }
472
473 static void gen8_initialize_pt(struct i915_address_space *vm,
474                                struct i915_page_table *pt)
475 {
476         gen8_pte_t scratch_pte;
477
478         scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
479                                       I915_CACHE_LLC, true, 0);
480
481         fill_px(vm->dev, pt, scratch_pte);
482 }
483
484 static void gen6_initialize_pt(struct i915_address_space *vm,
485                                struct i915_page_table *pt)
486 {
487         gen6_pte_t scratch_pte;
488
489         WARN_ON(vm->scratch_page.daddr == 0);
490
491         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
492                                      I915_CACHE_LLC, true, 0);
493
494         fill32_px(vm->dev, pt, scratch_pte);
495 }
496
497 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
498 {
499         struct i915_page_directory *pd;
500         int ret = -ENOMEM;
501
502         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
503         if (!pd)
504                 return ERR_PTR(-ENOMEM);
505
506         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
507                                 sizeof(*pd->used_pdes), GFP_KERNEL);
508         if (!pd->used_pdes)
509                 goto fail_bitmap;
510
511         ret = setup_px(dev, pd);
512         if (ret)
513                 goto fail_page_m;
514
515         return pd;
516
517 fail_page_m:
518         kfree(pd->used_pdes);
519 fail_bitmap:
520         kfree(pd);
521
522         return ERR_PTR(ret);
523 }
524
525 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
526 {
527         if (px_page(pd)) {
528                 cleanup_px(dev, pd);
529                 kfree(pd->used_pdes);
530                 kfree(pd);
531         }
532 }
533
534 static void gen8_initialize_pd(struct i915_address_space *vm,
535                                struct i915_page_directory *pd)
536 {
537         gen8_pde_t scratch_pde;
538
539         scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
540
541         fill_px(vm->dev, pd, scratch_pde);
542 }
543
544 static int __pdp_init(struct drm_device *dev,
545                       struct i915_page_directory_pointer *pdp)
546 {
547         size_t pdpes = I915_PDPES_PER_PDP(dev);
548
549         pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
550                                   sizeof(unsigned long),
551                                   GFP_KERNEL);
552         if (!pdp->used_pdpes)
553                 return -ENOMEM;
554
555         pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
556                                       GFP_KERNEL);
557         if (!pdp->page_directory) {
558                 kfree(pdp->used_pdpes);
559                 /* the PDP might be the statically allocated top level. Keep it
560                  * as clean as possible */
561                 pdp->used_pdpes = NULL;
562                 return -ENOMEM;
563         }
564
565         return 0;
566 }
567
568 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
569 {
570         kfree(pdp->used_pdpes);
571         kfree(pdp->page_directory);
572         pdp->page_directory = NULL;
573 }
574
575 static struct
576 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
577 {
578         struct i915_page_directory_pointer *pdp;
579         int ret = -ENOMEM;
580
581         WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
582
583         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
584         if (!pdp)
585                 return ERR_PTR(-ENOMEM);
586
587         ret = __pdp_init(dev, pdp);
588         if (ret)
589                 goto fail_bitmap;
590
591         ret = setup_px(dev, pdp);
592         if (ret)
593                 goto fail_page_m;
594
595         return pdp;
596
597 fail_page_m:
598         __pdp_fini(pdp);
599 fail_bitmap:
600         kfree(pdp);
601
602         return ERR_PTR(ret);
603 }
604
605 static void free_pdp(struct drm_device *dev,
606                      struct i915_page_directory_pointer *pdp)
607 {
608         __pdp_fini(pdp);
609         if (USES_FULL_48BIT_PPGTT(dev)) {
610                 cleanup_px(dev, pdp);
611                 kfree(pdp);
612         }
613 }
614
615 static void gen8_initialize_pdp(struct i915_address_space *vm,
616                                 struct i915_page_directory_pointer *pdp)
617 {
618         gen8_ppgtt_pdpe_t scratch_pdpe;
619
620         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
621
622         fill_px(vm->dev, pdp, scratch_pdpe);
623 }
624
625 static void gen8_initialize_pml4(struct i915_address_space *vm,
626                                  struct i915_pml4 *pml4)
627 {
628         gen8_ppgtt_pml4e_t scratch_pml4e;
629
630         scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
631                                           I915_CACHE_LLC);
632
633         fill_px(vm->dev, pml4, scratch_pml4e);
634 }
635
636 static void
637 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
638                           struct i915_page_directory_pointer *pdp,
639                           struct i915_page_directory *pd,
640                           int index)
641 {
642         gen8_ppgtt_pdpe_t *page_directorypo;
643
644         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
645                 return;
646
647         page_directorypo = kmap_px(pdp);
648         page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
649         kunmap_px(ppgtt, page_directorypo);
650 }
651
652 static void
653 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
654                                   struct i915_pml4 *pml4,
655                                   struct i915_page_directory_pointer *pdp,
656                                   int index)
657 {
658         gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
659
660         WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
661         pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
662         kunmap_px(ppgtt, pagemap);
663 }
664
665 /* Broadwell Page Directory Pointer Descriptors */
666 static int gen8_write_pdp(struct drm_i915_gem_request *req,
667                           unsigned entry,
668                           dma_addr_t addr)
669 {
670         struct intel_ring *ring = req->ring;
671         struct intel_engine_cs *engine = req->engine;
672         int ret;
673
674         BUG_ON(entry >= 4);
675
676         ret = intel_ring_begin(req, 6);
677         if (ret)
678                 return ret;
679
680         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
681         intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry));
682         intel_ring_emit(ring, upper_32_bits(addr));
683         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
684         intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry));
685         intel_ring_emit(ring, lower_32_bits(addr));
686         intel_ring_advance(ring);
687
688         return 0;
689 }
690
691 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
692                                  struct drm_i915_gem_request *req)
693 {
694         int i, ret;
695
696         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
697                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
698
699                 ret = gen8_write_pdp(req, i, pd_daddr);
700                 if (ret)
701                         return ret;
702         }
703
704         return 0;
705 }
706
707 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
708                               struct drm_i915_gem_request *req)
709 {
710         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
711 }
712
713 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
714                                        struct i915_page_directory_pointer *pdp,
715                                        uint64_t start,
716                                        uint64_t length,
717                                        gen8_pte_t scratch_pte)
718 {
719         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
720         gen8_pte_t *pt_vaddr;
721         unsigned pdpe = gen8_pdpe_index(start);
722         unsigned pde = gen8_pde_index(start);
723         unsigned pte = gen8_pte_index(start);
724         unsigned num_entries = length >> PAGE_SHIFT;
725         unsigned last_pte, i;
726
727         if (WARN_ON(!pdp))
728                 return;
729
730         while (num_entries) {
731                 struct i915_page_directory *pd;
732                 struct i915_page_table *pt;
733
734                 if (WARN_ON(!pdp->page_directory[pdpe]))
735                         break;
736
737                 pd = pdp->page_directory[pdpe];
738
739                 if (WARN_ON(!pd->page_table[pde]))
740                         break;
741
742                 pt = pd->page_table[pde];
743
744                 if (WARN_ON(!px_page(pt)))
745                         break;
746
747                 last_pte = pte + num_entries;
748                 if (last_pte > GEN8_PTES)
749                         last_pte = GEN8_PTES;
750
751                 pt_vaddr = kmap_px(pt);
752
753                 for (i = pte; i < last_pte; i++) {
754                         pt_vaddr[i] = scratch_pte;
755                         num_entries--;
756                 }
757
758                 kunmap_px(ppgtt, pt_vaddr);
759
760                 pte = 0;
761                 if (++pde == I915_PDES) {
762                         if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
763                                 break;
764                         pde = 0;
765                 }
766         }
767 }
768
769 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
770                                    uint64_t start,
771                                    uint64_t length,
772                                    bool use_scratch)
773 {
774         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
775         gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
776                                                  I915_CACHE_LLC, use_scratch, 0);
777
778         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
779                 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
780                                            scratch_pte);
781         } else {
782                 uint64_t pml4e;
783                 struct i915_page_directory_pointer *pdp;
784
785                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
786                         gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
787                                                    scratch_pte);
788                 }
789         }
790 }
791
792 static void
793 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
794                               struct i915_page_directory_pointer *pdp,
795                               struct sg_page_iter *sg_iter,
796                               uint64_t start,
797                               enum i915_cache_level cache_level,
798                               u32 flags)
799 {
800         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
801         gen8_pte_t *pt_vaddr;
802         unsigned pdpe = gen8_pdpe_index(start);
803         unsigned pde = gen8_pde_index(start);
804         unsigned pte = gen8_pte_index(start);
805
806         pt_vaddr = NULL;
807
808         while (__sg_page_iter_next(sg_iter)) {
809                 if (pt_vaddr == NULL) {
810                         struct i915_page_directory *pd = pdp->page_directory[pdpe];
811                         struct i915_page_table *pt = pd->page_table[pde];
812                         pt_vaddr = kmap_px(pt);
813                 }
814
815                 pt_vaddr[pte] =
816                         gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
817                                         cache_level, true, flags);
818                 if (++pte == GEN8_PTES) {
819                         kunmap_px(ppgtt, pt_vaddr);
820                         pt_vaddr = NULL;
821                         if (++pde == I915_PDES) {
822                                 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
823                                         break;
824                                 pde = 0;
825                         }
826                         pte = 0;
827                 }
828         }
829
830         if (pt_vaddr)
831                 kunmap_px(ppgtt, pt_vaddr);
832 }
833
834 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
835                                       struct sg_table *pages,
836                                       uint64_t start,
837                                       enum i915_cache_level cache_level,
838                                       u32 flags)
839 {
840         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
841         struct sg_page_iter sg_iter;
842
843         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
844
845         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
846                 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
847                                               cache_level, flags);
848         } else {
849                 struct i915_page_directory_pointer *pdp;
850                 uint64_t pml4e;
851                 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
852
853                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
854                         gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
855                                                       start, cache_level, flags);
856                 }
857         }
858 }
859
860 static void gen8_free_page_tables(struct drm_device *dev,
861                                   struct i915_page_directory *pd)
862 {
863         int i;
864
865         if (!px_page(pd))
866                 return;
867
868         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
869                 if (WARN_ON(!pd->page_table[i]))
870                         continue;
871
872                 free_pt(dev, pd->page_table[i]);
873                 pd->page_table[i] = NULL;
874         }
875 }
876
877 static int gen8_init_scratch(struct i915_address_space *vm)
878 {
879         struct drm_device *dev = vm->dev;
880         int ret;
881
882         ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
883         if (ret)
884                 return ret;
885
886         vm->scratch_pt = alloc_pt(dev);
887         if (IS_ERR(vm->scratch_pt)) {
888                 ret = PTR_ERR(vm->scratch_pt);
889                 goto free_scratch_page;
890         }
891
892         vm->scratch_pd = alloc_pd(dev);
893         if (IS_ERR(vm->scratch_pd)) {
894                 ret = PTR_ERR(vm->scratch_pd);
895                 goto free_pt;
896         }
897
898         if (USES_FULL_48BIT_PPGTT(dev)) {
899                 vm->scratch_pdp = alloc_pdp(dev);
900                 if (IS_ERR(vm->scratch_pdp)) {
901                         ret = PTR_ERR(vm->scratch_pdp);
902                         goto free_pd;
903                 }
904         }
905
906         gen8_initialize_pt(vm, vm->scratch_pt);
907         gen8_initialize_pd(vm, vm->scratch_pd);
908         if (USES_FULL_48BIT_PPGTT(dev))
909                 gen8_initialize_pdp(vm, vm->scratch_pdp);
910
911         return 0;
912
913 free_pd:
914         free_pd(dev, vm->scratch_pd);
915 free_pt:
916         free_pt(dev, vm->scratch_pt);
917 free_scratch_page:
918         cleanup_scratch_page(dev, &vm->scratch_page);
919
920         return ret;
921 }
922
923 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
924 {
925         enum vgt_g2v_type msg;
926         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
927         int i;
928
929         if (USES_FULL_48BIT_PPGTT(dev_priv)) {
930                 u64 daddr = px_dma(&ppgtt->pml4);
931
932                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
933                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
934
935                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
936                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
937         } else {
938                 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
939                         u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
940
941                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
942                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
943                 }
944
945                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
946                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
947         }
948
949         I915_WRITE(vgtif_reg(g2v_notify), msg);
950
951         return 0;
952 }
953
954 static void gen8_free_scratch(struct i915_address_space *vm)
955 {
956         struct drm_device *dev = vm->dev;
957
958         if (USES_FULL_48BIT_PPGTT(dev))
959                 free_pdp(dev, vm->scratch_pdp);
960         free_pd(dev, vm->scratch_pd);
961         free_pt(dev, vm->scratch_pt);
962         cleanup_scratch_page(dev, &vm->scratch_page);
963 }
964
965 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
966                                     struct i915_page_directory_pointer *pdp)
967 {
968         int i;
969
970         for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
971                 if (WARN_ON(!pdp->page_directory[i]))
972                         continue;
973
974                 gen8_free_page_tables(dev, pdp->page_directory[i]);
975                 free_pd(dev, pdp->page_directory[i]);
976         }
977
978         free_pdp(dev, pdp);
979 }
980
981 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
982 {
983         int i;
984
985         for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
986                 if (WARN_ON(!ppgtt->pml4.pdps[i]))
987                         continue;
988
989                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
990         }
991
992         cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
993 }
994
995 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
996 {
997         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
998
999         if (intel_vgpu_active(to_i915(vm->dev)))
1000                 gen8_ppgtt_notify_vgt(ppgtt, false);
1001
1002         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1003                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1004         else
1005                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1006
1007         gen8_free_scratch(vm);
1008 }
1009
1010 /**
1011  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1012  * @vm: Master vm structure.
1013  * @pd: Page directory for this address range.
1014  * @start:      Starting virtual address to begin allocations.
1015  * @length:     Size of the allocations.
1016  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1017  *              caller to free on error.
1018  *
1019  * Allocate the required number of page tables. Extremely similar to
1020  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1021  * the page directory boundary (instead of the page directory pointer). That
1022  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1023  * possible, and likely that the caller will need to use multiple calls of this
1024  * function to achieve the appropriate allocation.
1025  *
1026  * Return: 0 if success; negative error code otherwise.
1027  */
1028 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1029                                      struct i915_page_directory *pd,
1030                                      uint64_t start,
1031                                      uint64_t length,
1032                                      unsigned long *new_pts)
1033 {
1034         struct drm_device *dev = vm->dev;
1035         struct i915_page_table *pt;
1036         uint32_t pde;
1037
1038         gen8_for_each_pde(pt, pd, start, length, pde) {
1039                 /* Don't reallocate page tables */
1040                 if (test_bit(pde, pd->used_pdes)) {
1041                         /* Scratch is never allocated this way */
1042                         WARN_ON(pt == vm->scratch_pt);
1043                         continue;
1044                 }
1045
1046                 pt = alloc_pt(dev);
1047                 if (IS_ERR(pt))
1048                         goto unwind_out;
1049
1050                 gen8_initialize_pt(vm, pt);
1051                 pd->page_table[pde] = pt;
1052                 __set_bit(pde, new_pts);
1053                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1054         }
1055
1056         return 0;
1057
1058 unwind_out:
1059         for_each_set_bit(pde, new_pts, I915_PDES)
1060                 free_pt(dev, pd->page_table[pde]);
1061
1062         return -ENOMEM;
1063 }
1064
1065 /**
1066  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1067  * @vm: Master vm structure.
1068  * @pdp:        Page directory pointer for this address range.
1069  * @start:      Starting virtual address to begin allocations.
1070  * @length:     Size of the allocations.
1071  * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1072  *              caller to free on error.
1073  *
1074  * Allocate the required number of page directories starting at the pde index of
1075  * @start, and ending at the pde index @start + @length. This function will skip
1076  * over already allocated page directories within the range, and only allocate
1077  * new ones, setting the appropriate pointer within the pdp as well as the
1078  * correct position in the bitmap @new_pds.
1079  *
1080  * The function will only allocate the pages within the range for a give page
1081  * directory pointer. In other words, if @start + @length straddles a virtually
1082  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1083  * required by the caller, This is not currently possible, and the BUG in the
1084  * code will prevent it.
1085  *
1086  * Return: 0 if success; negative error code otherwise.
1087  */
1088 static int
1089 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1090                                   struct i915_page_directory_pointer *pdp,
1091                                   uint64_t start,
1092                                   uint64_t length,
1093                                   unsigned long *new_pds)
1094 {
1095         struct drm_device *dev = vm->dev;
1096         struct i915_page_directory *pd;
1097         uint32_t pdpe;
1098         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1099
1100         WARN_ON(!bitmap_empty(new_pds, pdpes));
1101
1102         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1103                 if (test_bit(pdpe, pdp->used_pdpes))
1104                         continue;
1105
1106                 pd = alloc_pd(dev);
1107                 if (IS_ERR(pd))
1108                         goto unwind_out;
1109
1110                 gen8_initialize_pd(vm, pd);
1111                 pdp->page_directory[pdpe] = pd;
1112                 __set_bit(pdpe, new_pds);
1113                 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1114         }
1115
1116         return 0;
1117
1118 unwind_out:
1119         for_each_set_bit(pdpe, new_pds, pdpes)
1120                 free_pd(dev, pdp->page_directory[pdpe]);
1121
1122         return -ENOMEM;
1123 }
1124
1125 /**
1126  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1127  * @vm: Master vm structure.
1128  * @pml4:       Page map level 4 for this address range.
1129  * @start:      Starting virtual address to begin allocations.
1130  * @length:     Size of the allocations.
1131  * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1132  *              caller to free on error.
1133  *
1134  * Allocate the required number of page directory pointers. Extremely similar to
1135  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1136  * The main difference is here we are limited by the pml4 boundary (instead of
1137  * the page directory pointer).
1138  *
1139  * Return: 0 if success; negative error code otherwise.
1140  */
1141 static int
1142 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1143                                   struct i915_pml4 *pml4,
1144                                   uint64_t start,
1145                                   uint64_t length,
1146                                   unsigned long *new_pdps)
1147 {
1148         struct drm_device *dev = vm->dev;
1149         struct i915_page_directory_pointer *pdp;
1150         uint32_t pml4e;
1151
1152         WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1153
1154         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1155                 if (!test_bit(pml4e, pml4->used_pml4es)) {
1156                         pdp = alloc_pdp(dev);
1157                         if (IS_ERR(pdp))
1158                                 goto unwind_out;
1159
1160                         gen8_initialize_pdp(vm, pdp);
1161                         pml4->pdps[pml4e] = pdp;
1162                         __set_bit(pml4e, new_pdps);
1163                         trace_i915_page_directory_pointer_entry_alloc(vm,
1164                                                                       pml4e,
1165                                                                       start,
1166                                                                       GEN8_PML4E_SHIFT);
1167                 }
1168         }
1169
1170         return 0;
1171
1172 unwind_out:
1173         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1174                 free_pdp(dev, pml4->pdps[pml4e]);
1175
1176         return -ENOMEM;
1177 }
1178
1179 static void
1180 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1181 {
1182         kfree(new_pts);
1183         kfree(new_pds);
1184 }
1185
1186 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1187  * of these are based on the number of PDPEs in the system.
1188  */
1189 static
1190 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1191                                          unsigned long **new_pts,
1192                                          uint32_t pdpes)
1193 {
1194         unsigned long *pds;
1195         unsigned long *pts;
1196
1197         pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1198         if (!pds)
1199                 return -ENOMEM;
1200
1201         pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1202                       GFP_TEMPORARY);
1203         if (!pts)
1204                 goto err_out;
1205
1206         *new_pds = pds;
1207         *new_pts = pts;
1208
1209         return 0;
1210
1211 err_out:
1212         free_gen8_temp_bitmaps(pds, pts);
1213         return -ENOMEM;
1214 }
1215
1216 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1217  * the page table structures, we mark them dirty so that
1218  * context switching/execlist queuing code takes extra steps
1219  * to ensure that tlbs are flushed.
1220  */
1221 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1222 {
1223         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1224 }
1225
1226 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1227                                     struct i915_page_directory_pointer *pdp,
1228                                     uint64_t start,
1229                                     uint64_t length)
1230 {
1231         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1232         unsigned long *new_page_dirs, *new_page_tables;
1233         struct drm_device *dev = vm->dev;
1234         struct i915_page_directory *pd;
1235         const uint64_t orig_start = start;
1236         const uint64_t orig_length = length;
1237         uint32_t pdpe;
1238         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1239         int ret;
1240
1241         /* Wrap is never okay since we can only represent 48b, and we don't
1242          * actually use the other side of the canonical address space.
1243          */
1244         if (WARN_ON(start + length < start))
1245                 return -ENODEV;
1246
1247         if (WARN_ON(start + length > vm->total))
1248                 return -ENODEV;
1249
1250         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1251         if (ret)
1252                 return ret;
1253
1254         /* Do the allocations first so we can easily bail out */
1255         ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1256                                                 new_page_dirs);
1257         if (ret) {
1258                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1259                 return ret;
1260         }
1261
1262         /* For every page directory referenced, allocate page tables */
1263         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1264                 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1265                                                 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1266                 if (ret)
1267                         goto err_out;
1268         }
1269
1270         start = orig_start;
1271         length = orig_length;
1272
1273         /* Allocations have completed successfully, so set the bitmaps, and do
1274          * the mappings. */
1275         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1276                 gen8_pde_t *const page_directory = kmap_px(pd);
1277                 struct i915_page_table *pt;
1278                 uint64_t pd_len = length;
1279                 uint64_t pd_start = start;
1280                 uint32_t pde;
1281
1282                 /* Every pd should be allocated, we just did that above. */
1283                 WARN_ON(!pd);
1284
1285                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1286                         /* Same reasoning as pd */
1287                         WARN_ON(!pt);
1288                         WARN_ON(!pd_len);
1289                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
1290
1291                         /* Set our used ptes within the page table */
1292                         bitmap_set(pt->used_ptes,
1293                                    gen8_pte_index(pd_start),
1294                                    gen8_pte_count(pd_start, pd_len));
1295
1296                         /* Our pde is now pointing to the pagetable, pt */
1297                         __set_bit(pde, pd->used_pdes);
1298
1299                         /* Map the PDE to the page table */
1300                         page_directory[pde] = gen8_pde_encode(px_dma(pt),
1301                                                               I915_CACHE_LLC);
1302                         trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1303                                                         gen8_pte_index(start),
1304                                                         gen8_pte_count(start, length),
1305                                                         GEN8_PTES);
1306
1307                         /* NB: We haven't yet mapped ptes to pages. At this
1308                          * point we're still relying on insert_entries() */
1309                 }
1310
1311                 kunmap_px(ppgtt, page_directory);
1312                 __set_bit(pdpe, pdp->used_pdpes);
1313                 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1314         }
1315
1316         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1317         mark_tlbs_dirty(ppgtt);
1318         return 0;
1319
1320 err_out:
1321         while (pdpe--) {
1322                 unsigned long temp;
1323
1324                 for_each_set_bit(temp, new_page_tables + pdpe *
1325                                 BITS_TO_LONGS(I915_PDES), I915_PDES)
1326                         free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1327         }
1328
1329         for_each_set_bit(pdpe, new_page_dirs, pdpes)
1330                 free_pd(dev, pdp->page_directory[pdpe]);
1331
1332         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1333         mark_tlbs_dirty(ppgtt);
1334         return ret;
1335 }
1336
1337 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1338                                     struct i915_pml4 *pml4,
1339                                     uint64_t start,
1340                                     uint64_t length)
1341 {
1342         DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1343         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1344         struct i915_page_directory_pointer *pdp;
1345         uint64_t pml4e;
1346         int ret = 0;
1347
1348         /* Do the pml4 allocations first, so we don't need to track the newly
1349          * allocated tables below the pdp */
1350         bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1351
1352         /* The pagedirectory and pagetable allocations are done in the shared 3
1353          * and 4 level code. Just allocate the pdps.
1354          */
1355         ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1356                                                 new_pdps);
1357         if (ret)
1358                 return ret;
1359
1360         WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1361              "The allocation has spanned more than 512GB. "
1362              "It is highly likely this is incorrect.");
1363
1364         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1365                 WARN_ON(!pdp);
1366
1367                 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1368                 if (ret)
1369                         goto err_out;
1370
1371                 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1372         }
1373
1374         bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1375                   GEN8_PML4ES_PER_PML4);
1376
1377         return 0;
1378
1379 err_out:
1380         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1381                 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1382
1383         return ret;
1384 }
1385
1386 static int gen8_alloc_va_range(struct i915_address_space *vm,
1387                                uint64_t start, uint64_t length)
1388 {
1389         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1390
1391         if (USES_FULL_48BIT_PPGTT(vm->dev))
1392                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1393         else
1394                 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1395 }
1396
1397 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1398                           uint64_t start, uint64_t length,
1399                           gen8_pte_t scratch_pte,
1400                           struct seq_file *m)
1401 {
1402         struct i915_page_directory *pd;
1403         uint32_t pdpe;
1404
1405         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1406                 struct i915_page_table *pt;
1407                 uint64_t pd_len = length;
1408                 uint64_t pd_start = start;
1409                 uint32_t pde;
1410
1411                 if (!test_bit(pdpe, pdp->used_pdpes))
1412                         continue;
1413
1414                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1415                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1416                         uint32_t  pte;
1417                         gen8_pte_t *pt_vaddr;
1418
1419                         if (!test_bit(pde, pd->used_pdes))
1420                                 continue;
1421
1422                         pt_vaddr = kmap_px(pt);
1423                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1424                                 uint64_t va =
1425                                         (pdpe << GEN8_PDPE_SHIFT) |
1426                                         (pde << GEN8_PDE_SHIFT) |
1427                                         (pte << GEN8_PTE_SHIFT);
1428                                 int i;
1429                                 bool found = false;
1430
1431                                 for (i = 0; i < 4; i++)
1432                                         if (pt_vaddr[pte + i] != scratch_pte)
1433                                                 found = true;
1434                                 if (!found)
1435                                         continue;
1436
1437                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1438                                 for (i = 0; i < 4; i++) {
1439                                         if (pt_vaddr[pte + i] != scratch_pte)
1440                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1441                                         else
1442                                                 seq_puts(m, "  SCRATCH ");
1443                                 }
1444                                 seq_puts(m, "\n");
1445                         }
1446                         /* don't use kunmap_px, it could trigger
1447                          * an unnecessary flush.
1448                          */
1449                         kunmap_atomic(pt_vaddr);
1450                 }
1451         }
1452 }
1453
1454 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1455 {
1456         struct i915_address_space *vm = &ppgtt->base;
1457         uint64_t start = ppgtt->base.start;
1458         uint64_t length = ppgtt->base.total;
1459         gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
1460                                                  I915_CACHE_LLC, true, 0);
1461
1462         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1463                 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1464         } else {
1465                 uint64_t pml4e;
1466                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1467                 struct i915_page_directory_pointer *pdp;
1468
1469                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1470                         if (!test_bit(pml4e, pml4->used_pml4es))
1471                                 continue;
1472
1473                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1474                         gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1475                 }
1476         }
1477 }
1478
1479 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1480 {
1481         unsigned long *new_page_dirs, *new_page_tables;
1482         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1483         int ret;
1484
1485         /* We allocate temp bitmap for page tables for no gain
1486          * but as this is for init only, lets keep the things simple
1487          */
1488         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1489         if (ret)
1490                 return ret;
1491
1492         /* Allocate for all pdps regardless of how the ppgtt
1493          * was defined.
1494          */
1495         ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1496                                                 0, 1ULL << 32,
1497                                                 new_page_dirs);
1498         if (!ret)
1499                 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1500
1501         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1502
1503         return ret;
1504 }
1505
1506 /*
1507  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1508  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1509  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1510  * space.
1511  *
1512  */
1513 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1514 {
1515         int ret;
1516
1517         ret = gen8_init_scratch(&ppgtt->base);
1518         if (ret)
1519                 return ret;
1520
1521         ppgtt->base.start = 0;
1522         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1523         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1524         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1525         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1526         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1527         ppgtt->base.bind_vma = ppgtt_bind_vma;
1528
1529         /*
1530          * From bdw, there is support for read-only pages in the PPGTT.
1531          *
1532          * XXX GVT is not honouring the lack of RW in the PTE bits.
1533          */
1534         ppgtt->base.has_read_only = !intel_vgpu_active(to_i915(ppgtt->base.dev));
1535
1536         ppgtt->debug_dump = gen8_dump_ppgtt;
1537
1538         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1539                 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1540                 if (ret)
1541                         goto free_scratch;
1542
1543                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1544
1545                 ppgtt->base.total = 1ULL << 48;
1546                 ppgtt->switch_mm = gen8_48b_mm_switch;
1547         } else {
1548                 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1549                 if (ret)
1550                         goto free_scratch;
1551
1552                 ppgtt->base.total = 1ULL << 32;
1553                 ppgtt->switch_mm = gen8_legacy_mm_switch;
1554                 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1555                                                               0, 0,
1556                                                               GEN8_PML4E_SHIFT);
1557
1558                 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
1559                         ret = gen8_preallocate_top_level_pdps(ppgtt);
1560                         if (ret)
1561                                 goto free_scratch;
1562                 }
1563         }
1564
1565         if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
1566                 gen8_ppgtt_notify_vgt(ppgtt, true);
1567
1568         return 0;
1569
1570 free_scratch:
1571         gen8_free_scratch(&ppgtt->base);
1572         return ret;
1573 }
1574
1575 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1576 {
1577         struct i915_address_space *vm = &ppgtt->base;
1578         struct i915_page_table *unused;
1579         gen6_pte_t scratch_pte;
1580         uint32_t pd_entry;
1581         uint32_t  pte, pde;
1582         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1583
1584         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1585                                      I915_CACHE_LLC, true, 0);
1586
1587         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1588                 u32 expected;
1589                 gen6_pte_t *pt_vaddr;
1590                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1591                 pd_entry = readl(ppgtt->pd_addr + pde);
1592                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1593
1594                 if (pd_entry != expected)
1595                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1596                                    pde,
1597                                    pd_entry,
1598                                    expected);
1599                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1600
1601                 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1602
1603                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1604                         unsigned long va =
1605                                 (pde * PAGE_SIZE * GEN6_PTES) +
1606                                 (pte * PAGE_SIZE);
1607                         int i;
1608                         bool found = false;
1609                         for (i = 0; i < 4; i++)
1610                                 if (pt_vaddr[pte + i] != scratch_pte)
1611                                         found = true;
1612                         if (!found)
1613                                 continue;
1614
1615                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1616                         for (i = 0; i < 4; i++) {
1617                                 if (pt_vaddr[pte + i] != scratch_pte)
1618                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1619                                 else
1620                                         seq_puts(m, "  SCRATCH ");
1621                         }
1622                         seq_puts(m, "\n");
1623                 }
1624                 kunmap_px(ppgtt, pt_vaddr);
1625         }
1626 }
1627
1628 /* Write pde (index) from the page directory @pd to the page table @pt */
1629 static void gen6_write_pde(struct i915_page_directory *pd,
1630                             const int pde, struct i915_page_table *pt)
1631 {
1632         /* Caller needs to make sure the write completes if necessary */
1633         struct i915_hw_ppgtt *ppgtt =
1634                 container_of(pd, struct i915_hw_ppgtt, pd);
1635         u32 pd_entry;
1636
1637         pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1638         pd_entry |= GEN6_PDE_VALID;
1639
1640         writel(pd_entry, ppgtt->pd_addr + pde);
1641 }
1642
1643 /* Write all the page tables found in the ppgtt structure to incrementing page
1644  * directories. */
1645 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1646                                   struct i915_page_directory *pd,
1647                                   uint32_t start, uint32_t length)
1648 {
1649         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1650         struct i915_page_table *pt;
1651         uint32_t pde;
1652
1653         gen6_for_each_pde(pt, pd, start, length, pde)
1654                 gen6_write_pde(pd, pde, pt);
1655
1656         /* Make sure write is complete before other code can use this page
1657          * table. Also require for WC mapped PTEs */
1658         readl(ggtt->gsm);
1659 }
1660
1661 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1662 {
1663         BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1664
1665         return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1666 }
1667
1668 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1669                          struct drm_i915_gem_request *req)
1670 {
1671         struct intel_ring *ring = req->ring;
1672         struct intel_engine_cs *engine = req->engine;
1673         int ret;
1674
1675         /* NB: TLBs must be flushed and invalidated before a switch */
1676         ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1677         if (ret)
1678                 return ret;
1679
1680         ret = intel_ring_begin(req, 6);
1681         if (ret)
1682                 return ret;
1683
1684         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1685         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1686         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1687         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1688         intel_ring_emit(ring, get_pd_offset(ppgtt));
1689         intel_ring_emit(ring, MI_NOOP);
1690         intel_ring_advance(ring);
1691
1692         return 0;
1693 }
1694
1695 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1696                           struct drm_i915_gem_request *req)
1697 {
1698         struct intel_ring *ring = req->ring;
1699         struct intel_engine_cs *engine = req->engine;
1700         int ret;
1701
1702         /* NB: TLBs must be flushed and invalidated before a switch */
1703         ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1704         if (ret)
1705                 return ret;
1706
1707         ret = intel_ring_begin(req, 6);
1708         if (ret)
1709                 return ret;
1710
1711         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1712         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1713         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1714         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1715         intel_ring_emit(ring, get_pd_offset(ppgtt));
1716         intel_ring_emit(ring, MI_NOOP);
1717         intel_ring_advance(ring);
1718
1719         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1720         if (engine->id != RCS) {
1721                 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1722                 if (ret)
1723                         return ret;
1724         }
1725
1726         return 0;
1727 }
1728
1729 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1730                           struct drm_i915_gem_request *req)
1731 {
1732         struct intel_engine_cs *engine = req->engine;
1733         struct drm_i915_private *dev_priv = req->i915;
1734
1735         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1736         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1737         return 0;
1738 }
1739
1740 static void gen8_ppgtt_enable(struct drm_device *dev)
1741 {
1742         struct drm_i915_private *dev_priv = to_i915(dev);
1743         struct intel_engine_cs *engine;
1744
1745         for_each_engine(engine, dev_priv) {
1746                 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1747                 I915_WRITE(RING_MODE_GEN7(engine),
1748                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1749         }
1750 }
1751
1752 static void gen7_ppgtt_enable(struct drm_device *dev)
1753 {
1754         struct drm_i915_private *dev_priv = to_i915(dev);
1755         struct intel_engine_cs *engine;
1756         uint32_t ecochk, ecobits;
1757
1758         ecobits = I915_READ(GAC_ECO_BITS);
1759         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1760
1761         ecochk = I915_READ(GAM_ECOCHK);
1762         if (IS_HASWELL(dev)) {
1763                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1764         } else {
1765                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1766                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1767         }
1768         I915_WRITE(GAM_ECOCHK, ecochk);
1769
1770         for_each_engine(engine, dev_priv) {
1771                 /* GFX_MODE is per-ring on gen7+ */
1772                 I915_WRITE(RING_MODE_GEN7(engine),
1773                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1774         }
1775 }
1776
1777 static void gen6_ppgtt_enable(struct drm_device *dev)
1778 {
1779         struct drm_i915_private *dev_priv = to_i915(dev);
1780         uint32_t ecochk, gab_ctl, ecobits;
1781
1782         ecobits = I915_READ(GAC_ECO_BITS);
1783         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1784                    ECOBITS_PPGTT_CACHE64B);
1785
1786         gab_ctl = I915_READ(GAB_CTL);
1787         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1788
1789         ecochk = I915_READ(GAM_ECOCHK);
1790         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1791
1792         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1793 }
1794
1795 /* PPGTT support for Sandybdrige/Gen6 and later */
1796 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1797                                    uint64_t start,
1798                                    uint64_t length,
1799                                    bool use_scratch)
1800 {
1801         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1802         gen6_pte_t *pt_vaddr, scratch_pte;
1803         unsigned first_entry = start >> PAGE_SHIFT;
1804         unsigned num_entries = length >> PAGE_SHIFT;
1805         unsigned act_pt = first_entry / GEN6_PTES;
1806         unsigned first_pte = first_entry % GEN6_PTES;
1807         unsigned last_pte, i;
1808
1809         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1810                                      I915_CACHE_LLC, true, 0);
1811
1812         while (num_entries) {
1813                 last_pte = first_pte + num_entries;
1814                 if (last_pte > GEN6_PTES)
1815                         last_pte = GEN6_PTES;
1816
1817                 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1818
1819                 for (i = first_pte; i < last_pte; i++)
1820                         pt_vaddr[i] = scratch_pte;
1821
1822                 kunmap_px(ppgtt, pt_vaddr);
1823
1824                 num_entries -= last_pte - first_pte;
1825                 first_pte = 0;
1826                 act_pt++;
1827         }
1828 }
1829
1830 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1831                                       struct sg_table *pages,
1832                                       uint64_t start,
1833                                       enum i915_cache_level cache_level, u32 flags)
1834 {
1835         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1836         unsigned first_entry = start >> PAGE_SHIFT;
1837         unsigned act_pt = first_entry / GEN6_PTES;
1838         unsigned act_pte = first_entry % GEN6_PTES;
1839         gen6_pte_t *pt_vaddr = NULL;
1840         struct sgt_iter sgt_iter;
1841         dma_addr_t addr;
1842
1843         for_each_sgt_dma(addr, sgt_iter, pages) {
1844                 if (pt_vaddr == NULL)
1845                         pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1846
1847                 pt_vaddr[act_pte] =
1848                         vm->pte_encode(addr, cache_level, true, flags);
1849
1850                 if (++act_pte == GEN6_PTES) {
1851                         kunmap_px(ppgtt, pt_vaddr);
1852                         pt_vaddr = NULL;
1853                         act_pt++;
1854                         act_pte = 0;
1855                 }
1856         }
1857
1858         if (pt_vaddr)
1859                 kunmap_px(ppgtt, pt_vaddr);
1860 }
1861
1862 static int gen6_alloc_va_range(struct i915_address_space *vm,
1863                                uint64_t start_in, uint64_t length_in)
1864 {
1865         DECLARE_BITMAP(new_page_tables, I915_PDES);
1866         struct drm_device *dev = vm->dev;
1867         struct drm_i915_private *dev_priv = to_i915(dev);
1868         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1869         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1870         struct i915_page_table *pt;
1871         uint32_t start, length, start_save, length_save;
1872         uint32_t pde;
1873         int ret;
1874
1875         if (WARN_ON(start_in + length_in > ppgtt->base.total))
1876                 return -ENODEV;
1877
1878         start = start_save = start_in;
1879         length = length_save = length_in;
1880
1881         bitmap_zero(new_page_tables, I915_PDES);
1882
1883         /* The allocation is done in two stages so that we can bail out with
1884          * minimal amount of pain. The first stage finds new page tables that
1885          * need allocation. The second stage marks use ptes within the page
1886          * tables.
1887          */
1888         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1889                 if (pt != vm->scratch_pt) {
1890                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1891                         continue;
1892                 }
1893
1894                 /* We've already allocated a page table */
1895                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1896
1897                 pt = alloc_pt(dev);
1898                 if (IS_ERR(pt)) {
1899                         ret = PTR_ERR(pt);
1900                         goto unwind_out;
1901                 }
1902
1903                 gen6_initialize_pt(vm, pt);
1904
1905                 ppgtt->pd.page_table[pde] = pt;
1906                 __set_bit(pde, new_page_tables);
1907                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1908         }
1909
1910         start = start_save;
1911         length = length_save;
1912
1913         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1914                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1915
1916                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1917                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1918                            gen6_pte_count(start, length));
1919
1920                 if (__test_and_clear_bit(pde, new_page_tables))
1921                         gen6_write_pde(&ppgtt->pd, pde, pt);
1922
1923                 trace_i915_page_table_entry_map(vm, pde, pt,
1924                                          gen6_pte_index(start),
1925                                          gen6_pte_count(start, length),
1926                                          GEN6_PTES);
1927                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1928                                 GEN6_PTES);
1929         }
1930
1931         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1932
1933         /* Make sure write is complete before other code can use this page
1934          * table. Also require for WC mapped PTEs */
1935         readl(ggtt->gsm);
1936
1937         mark_tlbs_dirty(ppgtt);
1938         return 0;
1939
1940 unwind_out:
1941         for_each_set_bit(pde, new_page_tables, I915_PDES) {
1942                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1943
1944                 ppgtt->pd.page_table[pde] = vm->scratch_pt;
1945                 free_pt(vm->dev, pt);
1946         }
1947
1948         mark_tlbs_dirty(ppgtt);
1949         return ret;
1950 }
1951
1952 static int gen6_init_scratch(struct i915_address_space *vm)
1953 {
1954         struct drm_device *dev = vm->dev;
1955         int ret;
1956
1957         ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
1958         if (ret)
1959                 return ret;
1960
1961         vm->scratch_pt = alloc_pt(dev);
1962         if (IS_ERR(vm->scratch_pt)) {
1963                 cleanup_scratch_page(dev, &vm->scratch_page);
1964                 return PTR_ERR(vm->scratch_pt);
1965         }
1966
1967         gen6_initialize_pt(vm, vm->scratch_pt);
1968
1969         return 0;
1970 }
1971
1972 static void gen6_free_scratch(struct i915_address_space *vm)
1973 {
1974         struct drm_device *dev = vm->dev;
1975
1976         free_pt(dev, vm->scratch_pt);
1977         cleanup_scratch_page(dev, &vm->scratch_page);
1978 }
1979
1980 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1981 {
1982         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1983         struct i915_page_directory *pd = &ppgtt->pd;
1984         struct drm_device *dev = vm->dev;
1985         struct i915_page_table *pt;
1986         uint32_t pde;
1987
1988         drm_mm_remove_node(&ppgtt->node);
1989
1990         gen6_for_all_pdes(pt, pd, pde)
1991                 if (pt != vm->scratch_pt)
1992                         free_pt(dev, pt);
1993
1994         gen6_free_scratch(vm);
1995 }
1996
1997 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1998 {
1999         struct i915_address_space *vm = &ppgtt->base;
2000         struct drm_device *dev = ppgtt->base.dev;
2001         struct drm_i915_private *dev_priv = to_i915(dev);
2002         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2003         bool retried = false;
2004         int ret;
2005
2006         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2007          * allocator works in address space sizes, so it's multiplied by page
2008          * size. We allocate at the top of the GTT to avoid fragmentation.
2009          */
2010         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2011
2012         ret = gen6_init_scratch(vm);
2013         if (ret)
2014                 return ret;
2015
2016 alloc:
2017         ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2018                                                   &ppgtt->node, GEN6_PD_SIZE,
2019                                                   GEN6_PD_ALIGN, 0,
2020                                                   0, ggtt->base.total,
2021                                                   DRM_MM_TOPDOWN);
2022         if (ret == -ENOSPC && !retried) {
2023                 ret = i915_gem_evict_something(&ggtt->base,
2024                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
2025                                                I915_CACHE_NONE,
2026                                                0, ggtt->base.total,
2027                                                0);
2028                 if (ret)
2029                         goto err_out;
2030
2031                 retried = true;
2032                 goto alloc;
2033         }
2034
2035         if (ret)
2036                 goto err_out;
2037
2038
2039         if (ppgtt->node.start < ggtt->mappable_end)
2040                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2041
2042         return 0;
2043
2044 err_out:
2045         gen6_free_scratch(vm);
2046         return ret;
2047 }
2048
2049 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2050 {
2051         return gen6_ppgtt_allocate_page_directories(ppgtt);
2052 }
2053
2054 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2055                                   uint64_t start, uint64_t length)
2056 {
2057         struct i915_page_table *unused;
2058         uint32_t pde;
2059
2060         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2061                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2062 }
2063
2064 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2065 {
2066         struct drm_device *dev = ppgtt->base.dev;
2067         struct drm_i915_private *dev_priv = to_i915(dev);
2068         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2069         int ret;
2070
2071         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2072         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev))
2073                 ppgtt->switch_mm = gen6_mm_switch;
2074         else if (IS_HASWELL(dev))
2075                 ppgtt->switch_mm = hsw_mm_switch;
2076         else if (IS_GEN7(dev))
2077                 ppgtt->switch_mm = gen7_mm_switch;
2078         else
2079                 BUG();
2080
2081         ret = gen6_ppgtt_alloc(ppgtt);
2082         if (ret)
2083                 return ret;
2084
2085         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2086         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2087         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2088         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2089         ppgtt->base.bind_vma = ppgtt_bind_vma;
2090         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2091         ppgtt->base.start = 0;
2092         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2093         ppgtt->debug_dump = gen6_dump_ppgtt;
2094
2095         ppgtt->pd.base.ggtt_offset =
2096                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2097
2098         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2099                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2100
2101         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2102
2103         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2104
2105         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2106                          ppgtt->node.size >> 20,
2107                          ppgtt->node.start / PAGE_SIZE);
2108
2109         DRM_DEBUG("Adding PPGTT at offset %x\n",
2110                   ppgtt->pd.base.ggtt_offset << 10);
2111
2112         return 0;
2113 }
2114
2115 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2116                            struct drm_i915_private *dev_priv)
2117 {
2118         ppgtt->base.dev = &dev_priv->drm;
2119
2120         if (INTEL_INFO(dev_priv)->gen < 8)
2121                 return gen6_ppgtt_init(ppgtt);
2122         else
2123                 return gen8_ppgtt_init(ppgtt);
2124 }
2125
2126 static void i915_address_space_init(struct i915_address_space *vm,
2127                                     struct drm_i915_private *dev_priv)
2128 {
2129         drm_mm_init(&vm->mm, vm->start, vm->total);
2130         INIT_LIST_HEAD(&vm->active_list);
2131         INIT_LIST_HEAD(&vm->inactive_list);
2132         INIT_LIST_HEAD(&vm->unbound_list);
2133         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2134 }
2135
2136 static void gtt_write_workarounds(struct drm_device *dev)
2137 {
2138         struct drm_i915_private *dev_priv = to_i915(dev);
2139
2140         /* This function is for gtt related workarounds. This function is
2141          * called on driver load and after a GPU reset, so you can place
2142          * workarounds here even if they get overwritten by GPU reset.
2143          */
2144         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2145         if (IS_BROADWELL(dev))
2146                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2147         else if (IS_CHERRYVIEW(dev))
2148                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2149         else if (IS_SKYLAKE(dev))
2150                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2151         else if (IS_BROXTON(dev))
2152                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2153 }
2154
2155 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2156                            struct drm_i915_private *dev_priv,
2157                            struct drm_i915_file_private *file_priv)
2158 {
2159         int ret;
2160
2161         ret = __hw_ppgtt_init(ppgtt, dev_priv);
2162         if (ret == 0) {
2163                 kref_init(&ppgtt->ref);
2164                 i915_address_space_init(&ppgtt->base, dev_priv);
2165                 ppgtt->base.file = file_priv;
2166         }
2167
2168         return ret;
2169 }
2170
2171 int i915_ppgtt_init_hw(struct drm_device *dev)
2172 {
2173         gtt_write_workarounds(dev);
2174
2175         /* In the case of execlists, PPGTT is enabled by the context descriptor
2176          * and the PDPs are contained within the context itself.  We don't
2177          * need to do anything here. */
2178         if (i915.enable_execlists)
2179                 return 0;
2180
2181         if (!USES_PPGTT(dev))
2182                 return 0;
2183
2184         if (IS_GEN6(dev))
2185                 gen6_ppgtt_enable(dev);
2186         else if (IS_GEN7(dev))
2187                 gen7_ppgtt_enable(dev);
2188         else if (INTEL_INFO(dev)->gen >= 8)
2189                 gen8_ppgtt_enable(dev);
2190         else
2191                 MISSING_CASE(INTEL_INFO(dev)->gen);
2192
2193         return 0;
2194 }
2195
2196 struct i915_hw_ppgtt *
2197 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2198                   struct drm_i915_file_private *fpriv)
2199 {
2200         struct i915_hw_ppgtt *ppgtt;
2201         int ret;
2202
2203         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2204         if (!ppgtt)
2205                 return ERR_PTR(-ENOMEM);
2206
2207         ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
2208         if (ret) {
2209                 kfree(ppgtt);
2210                 return ERR_PTR(ret);
2211         }
2212
2213         trace_i915_ppgtt_create(&ppgtt->base);
2214
2215         return ppgtt;
2216 }
2217
2218 void  i915_ppgtt_release(struct kref *kref)
2219 {
2220         struct i915_hw_ppgtt *ppgtt =
2221                 container_of(kref, struct i915_hw_ppgtt, ref);
2222
2223         trace_i915_ppgtt_release(&ppgtt->base);
2224
2225         /* vmas should already be unbound and destroyed */
2226         WARN_ON(!list_empty(&ppgtt->base.active_list));
2227         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2228         WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2229
2230         list_del(&ppgtt->base.global_link);
2231         drm_mm_takedown(&ppgtt->base.mm);
2232
2233         ppgtt->base.cleanup(&ppgtt->base);
2234         kfree(ppgtt);
2235 }
2236
2237 /* Certain Gen5 chipsets require require idling the GPU before
2238  * unmapping anything from the GTT when VT-d is enabled.
2239  */
2240 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2241 {
2242 #ifdef CONFIG_INTEL_IOMMU
2243         /* Query intel_iommu to see if we need the workaround. Presumably that
2244          * was loaded first.
2245          */
2246         if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
2247                 return true;
2248 #endif
2249         return false;
2250 }
2251
2252 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2253 {
2254         struct intel_engine_cs *engine;
2255
2256         if (INTEL_INFO(dev_priv)->gen < 6)
2257                 return;
2258
2259         for_each_engine(engine, dev_priv) {
2260                 u32 fault_reg;
2261                 fault_reg = I915_READ(RING_FAULT_REG(engine));
2262                 if (fault_reg & RING_FAULT_VALID) {
2263                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2264                                          "\tAddr: 0x%08lx\n"
2265                                          "\tAddress space: %s\n"
2266                                          "\tSource ID: %d\n"
2267                                          "\tType: %d\n",
2268                                          fault_reg & PAGE_MASK,
2269                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2270                                          RING_FAULT_SRCID(fault_reg),
2271                                          RING_FAULT_FAULT_TYPE(fault_reg));
2272                         I915_WRITE(RING_FAULT_REG(engine),
2273                                    fault_reg & ~RING_FAULT_VALID);
2274                 }
2275         }
2276         POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
2277 }
2278
2279 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2280 {
2281         if (INTEL_INFO(dev_priv)->gen < 6) {
2282                 intel_gtt_chipset_flush();
2283         } else {
2284                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2285                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2286         }
2287 }
2288
2289 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2290 {
2291         struct drm_i915_private *dev_priv = to_i915(dev);
2292         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2293
2294         /* Don't bother messing with faults pre GEN6 as we have little
2295          * documentation supporting that it's a good idea.
2296          */
2297         if (INTEL_INFO(dev)->gen < 6)
2298                 return;
2299
2300         i915_check_and_clear_faults(dev_priv);
2301
2302         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
2303                              true);
2304
2305         i915_ggtt_flush(dev_priv);
2306 }
2307
2308 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2309 {
2310         if (!dma_map_sg(&obj->base.dev->pdev->dev,
2311                         obj->pages->sgl, obj->pages->nents,
2312                         PCI_DMA_BIDIRECTIONAL))
2313                 return -ENOSPC;
2314
2315         return 0;
2316 }
2317
2318 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2319 {
2320         writeq(pte, addr);
2321 }
2322
2323 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2324                                   dma_addr_t addr,
2325                                   uint64_t offset,
2326                                   enum i915_cache_level level,
2327                                   u32 unused)
2328 {
2329         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2330         gen8_pte_t __iomem *pte =
2331                 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
2332                 (offset >> PAGE_SHIFT);
2333         int rpm_atomic_seq;
2334
2335         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2336
2337         gen8_set_pte(pte, gen8_pte_encode(addr, level, true, 0));
2338
2339         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2340         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2341
2342         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2343 }
2344
2345 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2346                                      struct sg_table *st,
2347                                      uint64_t start,
2348                                      enum i915_cache_level level, u32 flags)
2349 {
2350         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2351         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2352         struct sgt_iter sgt_iter;
2353         gen8_pte_t __iomem *gtt_entries;
2354         gen8_pte_t gtt_entry;
2355         dma_addr_t addr;
2356         int rpm_atomic_seq;
2357         int i = 0;
2358
2359         /* The GTT does not support read-only mappings */
2360         GEM_BUG_ON(flags & PTE_READ_ONLY);
2361
2362         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2363
2364         /*
2365          * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2366          * not to allow the user to override access to a read only page.
2367          */
2368
2369         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2370
2371         for_each_sgt_dma(addr, sgt_iter, st) {
2372                 gtt_entry = gen8_pte_encode(addr, level, true, 0);
2373                 gen8_set_pte(&gtt_entries[i++], gtt_entry);
2374         }
2375
2376         /*
2377          * XXX: This serves as a posting read to make sure that the PTE has
2378          * actually been updated. There is some concern that even though
2379          * registers and PTEs are within the same BAR that they are potentially
2380          * of NUMA access patterns. Therefore, even with the way we assume
2381          * hardware should work, we must keep this posting read for paranoia.
2382          */
2383         if (i != 0)
2384                 WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
2385
2386         /* This next bit makes the above posting read even more important. We
2387          * want to flush the TLBs only after we're certain all the PTE updates
2388          * have finished.
2389          */
2390         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2391         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2392
2393         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2394 }
2395
2396 struct insert_entries {
2397         struct i915_address_space *vm;
2398         struct sg_table *st;
2399         uint64_t start;
2400         enum i915_cache_level level;
2401         u32 flags;
2402 };
2403
2404 static int gen8_ggtt_insert_entries__cb(void *_arg)
2405 {
2406         struct insert_entries *arg = _arg;
2407         gen8_ggtt_insert_entries(arg->vm, arg->st,
2408                                  arg->start, arg->level, arg->flags);
2409         return 0;
2410 }
2411
2412 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2413                                           struct sg_table *st,
2414                                           uint64_t start,
2415                                           enum i915_cache_level level,
2416                                           u32 flags)
2417 {
2418         struct insert_entries arg = { vm, st, start, level, flags };
2419         stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2420 }
2421
2422 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2423                                   dma_addr_t addr,
2424                                   uint64_t offset,
2425                                   enum i915_cache_level level,
2426                                   u32 flags)
2427 {
2428         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2429         gen6_pte_t __iomem *pte =
2430                 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
2431                 (offset >> PAGE_SHIFT);
2432         int rpm_atomic_seq;
2433
2434         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2435
2436         iowrite32(vm->pte_encode(addr, level, true, flags), pte);
2437
2438         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2439         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2440
2441         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2442 }
2443
2444 /*
2445  * Binds an object into the global gtt with the specified cache level. The object
2446  * will be accessible to the GPU via commands whose operands reference offsets
2447  * within the global GTT as well as accessible by the GPU through the GMADR
2448  * mapped BAR (dev_priv->mm.gtt->gtt).
2449  */
2450 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2451                                      struct sg_table *st,
2452                                      uint64_t start,
2453                                      enum i915_cache_level level, u32 flags)
2454 {
2455         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2456         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2457         struct sgt_iter sgt_iter;
2458         gen6_pte_t __iomem *gtt_entries;
2459         gen6_pte_t gtt_entry;
2460         dma_addr_t addr;
2461         int rpm_atomic_seq;
2462         int i = 0;
2463
2464         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2465
2466         gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2467
2468         for_each_sgt_dma(addr, sgt_iter, st) {
2469                 gtt_entry = vm->pte_encode(addr, level, true, flags);
2470                 iowrite32(gtt_entry, &gtt_entries[i++]);
2471         }
2472
2473         /* XXX: This serves as a posting read to make sure that the PTE has
2474          * actually been updated. There is some concern that even though
2475          * registers and PTEs are within the same BAR that they are potentially
2476          * of NUMA access patterns. Therefore, even with the way we assume
2477          * hardware should work, we must keep this posting read for paranoia.
2478          */
2479         if (i != 0)
2480                 WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
2481
2482         /* This next bit makes the above posting read even more important. We
2483          * want to flush the TLBs only after we're certain all the PTE updates
2484          * have finished.
2485          */
2486         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2487         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2488
2489         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2490 }
2491
2492 static void nop_clear_range(struct i915_address_space *vm,
2493                             uint64_t start,
2494                             uint64_t length,
2495                             bool use_scratch)
2496 {
2497 }
2498
2499 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2500                                   uint64_t start,
2501                                   uint64_t length,
2502                                   bool use_scratch)
2503 {
2504         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2505         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2506         unsigned first_entry = start >> PAGE_SHIFT;
2507         unsigned num_entries = length >> PAGE_SHIFT;
2508         gen8_pte_t scratch_pte, __iomem *gtt_base =
2509                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2510         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2511         int i;
2512         int rpm_atomic_seq;
2513
2514         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2515
2516         if (WARN(num_entries > max_entries,
2517                  "First entry = %d; Num entries = %d (max=%d)\n",
2518                  first_entry, num_entries, max_entries))
2519                 num_entries = max_entries;
2520
2521         scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
2522                                       I915_CACHE_LLC,
2523                                       use_scratch, 0);
2524         for (i = 0; i < num_entries; i++)
2525                 gen8_set_pte(&gtt_base[i], scratch_pte);
2526         readl(gtt_base);
2527
2528         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2529 }
2530
2531 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2532                                   uint64_t start,
2533                                   uint64_t length,
2534                                   bool use_scratch)
2535 {
2536         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2537         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2538         unsigned first_entry = start >> PAGE_SHIFT;
2539         unsigned num_entries = length >> PAGE_SHIFT;
2540         gen6_pte_t scratch_pte, __iomem *gtt_base =
2541                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2542         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2543         int i;
2544         int rpm_atomic_seq;
2545
2546         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2547
2548         if (WARN(num_entries > max_entries,
2549                  "First entry = %d; Num entries = %d (max=%d)\n",
2550                  first_entry, num_entries, max_entries))
2551                 num_entries = max_entries;
2552
2553         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2554                                      I915_CACHE_LLC, use_scratch, 0);
2555
2556         for (i = 0; i < num_entries; i++)
2557                 iowrite32(scratch_pte, &gtt_base[i]);
2558         readl(gtt_base);
2559
2560         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2561 }
2562
2563 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2564                                   dma_addr_t addr,
2565                                   uint64_t offset,
2566                                   enum i915_cache_level cache_level,
2567                                   u32 unused)
2568 {
2569         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2570         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2571                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2572         int rpm_atomic_seq;
2573
2574         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2575
2576         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2577
2578         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2579 }
2580
2581 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2582                                      struct sg_table *pages,
2583                                      uint64_t start,
2584                                      enum i915_cache_level cache_level, u32 unused)
2585 {
2586         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2587         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2588                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2589         int rpm_atomic_seq;
2590
2591         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2592
2593         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2594
2595         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2596
2597 }
2598
2599 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2600                                   uint64_t start,
2601                                   uint64_t length,
2602                                   bool unused)
2603 {
2604         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2605         unsigned first_entry = start >> PAGE_SHIFT;
2606         unsigned num_entries = length >> PAGE_SHIFT;
2607         int rpm_atomic_seq;
2608
2609         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2610
2611         intel_gtt_clear_range(first_entry, num_entries);
2612
2613         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2614 }
2615
2616 static int ggtt_bind_vma(struct i915_vma *vma,
2617                          enum i915_cache_level cache_level,
2618                          u32 flags)
2619 {
2620         struct drm_i915_gem_object *obj = vma->obj;
2621         u32 pte_flags = 0;
2622         int ret;
2623
2624         ret = i915_get_ggtt_vma_pages(vma);
2625         if (ret)
2626                 return ret;
2627
2628         /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2629         if (i915_gem_object_is_readonly(obj))
2630                 pte_flags |= PTE_READ_ONLY;
2631
2632         vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
2633                                 cache_level, pte_flags);
2634
2635         /*
2636          * Without aliasing PPGTT there's no difference between
2637          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2638          * upgrade to both bound if we bind either to avoid double-binding.
2639          */
2640         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2641
2642         return 0;
2643 }
2644
2645 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2646                                  enum i915_cache_level cache_level,
2647                                  u32 flags)
2648 {
2649         u32 pte_flags;
2650         int ret;
2651
2652         ret = i915_get_ggtt_vma_pages(vma);
2653         if (ret)
2654                 return ret;
2655
2656         /* Currently applicable only to VLV */
2657         pte_flags = 0;
2658         if (i915_gem_object_is_readonly(vma->obj))
2659                 pte_flags |= PTE_READ_ONLY;
2660
2661
2662         if (flags & I915_VMA_GLOBAL_BIND) {
2663                 vma->vm->insert_entries(vma->vm,
2664                                         vma->pages, vma->node.start,
2665                                         cache_level, pte_flags);
2666         }
2667
2668         if (flags & I915_VMA_LOCAL_BIND) {
2669                 struct i915_hw_ppgtt *appgtt =
2670                         to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2671                 appgtt->base.insert_entries(&appgtt->base,
2672                                             vma->pages, vma->node.start,
2673                                             cache_level, pte_flags);
2674         }
2675
2676         return 0;
2677 }
2678
2679 static void ggtt_unbind_vma(struct i915_vma *vma)
2680 {
2681         struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2682         const u64 size = min(vma->size, vma->node.size);
2683
2684         if (vma->flags & I915_VMA_GLOBAL_BIND)
2685                 vma->vm->clear_range(vma->vm,
2686                                      vma->node.start, size,
2687                                      true);
2688
2689         if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
2690                 appgtt->base.clear_range(&appgtt->base,
2691                                          vma->node.start, size,
2692                                          true);
2693 }
2694
2695 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2696 {
2697         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2698         struct device *kdev = &dev_priv->drm.pdev->dev;
2699         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2700
2701         if (unlikely(ggtt->do_idle_maps)) {
2702                 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
2703                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2704                         /* Wait a bit, in hopes it avoids the hang */
2705                         udelay(10);
2706                 }
2707         }
2708
2709         dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
2710                      PCI_DMA_BIDIRECTIONAL);
2711 }
2712
2713 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2714                                   unsigned long color,
2715                                   u64 *start,
2716                                   u64 *end)
2717 {
2718         if (node->color != color)
2719                 *start += 4096;
2720
2721         node = list_first_entry_or_null(&node->node_list,
2722                                         struct drm_mm_node,
2723                                         node_list);
2724         if (node && node->allocated && node->color != color)
2725                 *end -= 4096;
2726 }
2727
2728 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2729 {
2730         /* Let GEM Manage all of the aperture.
2731          *
2732          * However, leave one page at the end still bound to the scratch page.
2733          * There are a number of places where the hardware apparently prefetches
2734          * past the end of the object, and we've seen multiple hangs with the
2735          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2736          * aperture.  One page should be enough to keep any prefetching inside
2737          * of the aperture.
2738          */
2739         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2740         unsigned long hole_start, hole_end;
2741         struct drm_mm_node *entry;
2742         int ret;
2743
2744         ret = intel_vgt_balloon(dev_priv);
2745         if (ret)
2746                 return ret;
2747
2748         /* Clear any non-preallocated blocks */
2749         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2750                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2751                               hole_start, hole_end);
2752                 ggtt->base.clear_range(&ggtt->base, hole_start,
2753                                      hole_end - hole_start, true);
2754         }
2755
2756         /* And finally clear the reserved guard page */
2757         ggtt->base.clear_range(&ggtt->base,
2758                                ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
2759                                true);
2760
2761         if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2762                 struct i915_hw_ppgtt *ppgtt;
2763
2764                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2765                 if (!ppgtt)
2766                         return -ENOMEM;
2767
2768                 ret = __hw_ppgtt_init(ppgtt, dev_priv);
2769                 if (ret) {
2770                         kfree(ppgtt);
2771                         return ret;
2772                 }
2773
2774                 if (ppgtt->base.allocate_va_range)
2775                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2776                                                             ppgtt->base.total);
2777                 if (ret) {
2778                         ppgtt->base.cleanup(&ppgtt->base);
2779                         kfree(ppgtt);
2780                         return ret;
2781                 }
2782
2783                 ppgtt->base.clear_range(&ppgtt->base,
2784                                         ppgtt->base.start,
2785                                         ppgtt->base.total,
2786                                         true);
2787
2788                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2789                 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2790                 ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2791         }
2792
2793         return 0;
2794 }
2795
2796 /**
2797  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2798  * @dev_priv: i915 device
2799  */
2800 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2801 {
2802         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2803
2804         if (dev_priv->mm.aliasing_ppgtt) {
2805                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2806                 ppgtt->base.cleanup(&ppgtt->base);
2807                 kfree(ppgtt);
2808         }
2809
2810         i915_gem_cleanup_stolen(&dev_priv->drm);
2811
2812         if (drm_mm_initialized(&ggtt->base.mm)) {
2813                 intel_vgt_deballoon(dev_priv);
2814
2815                 drm_mm_takedown(&ggtt->base.mm);
2816                 list_del(&ggtt->base.global_link);
2817         }
2818
2819         ggtt->base.cleanup(&ggtt->base);
2820
2821         arch_phys_wc_del(ggtt->mtrr);
2822         io_mapping_fini(&ggtt->mappable);
2823 }
2824
2825 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2826 {
2827         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2828         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2829         return snb_gmch_ctl << 20;
2830 }
2831
2832 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2833 {
2834         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2835         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2836         if (bdw_gmch_ctl)
2837                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2838
2839 #ifdef CONFIG_X86_32
2840         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2841         if (bdw_gmch_ctl > 4)
2842                 bdw_gmch_ctl = 4;
2843 #endif
2844
2845         return bdw_gmch_ctl << 20;
2846 }
2847
2848 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2849 {
2850         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2851         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2852
2853         if (gmch_ctrl)
2854                 return 1 << (20 + gmch_ctrl);
2855
2856         return 0;
2857 }
2858
2859 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2860 {
2861         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2862         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2863         return snb_gmch_ctl << 25; /* 32 MB units */
2864 }
2865
2866 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2867 {
2868         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2869         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2870         return bdw_gmch_ctl << 25; /* 32 MB units */
2871 }
2872
2873 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2874 {
2875         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2876         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2877
2878         /*
2879          * 0x0  to 0x10: 32MB increments starting at 0MB
2880          * 0x11 to 0x16: 4MB increments starting at 8MB
2881          * 0x17 to 0x1d: 4MB increments start at 36MB
2882          */
2883         if (gmch_ctrl < 0x11)
2884                 return gmch_ctrl << 25;
2885         else if (gmch_ctrl < 0x17)
2886                 return (gmch_ctrl - 0x11 + 2) << 22;
2887         else
2888                 return (gmch_ctrl - 0x17 + 9) << 22;
2889 }
2890
2891 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2892 {
2893         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2894         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2895
2896         if (gen9_gmch_ctl < 0xf0)
2897                 return gen9_gmch_ctl << 25; /* 32 MB units */
2898         else
2899                 /* 4MB increments starting at 0xf0 for 4MB */
2900                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2901 }
2902
2903 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2904 {
2905         struct pci_dev *pdev = ggtt->base.dev->pdev;
2906         phys_addr_t phys_addr;
2907         int ret;
2908
2909         /* For Modern GENs the PTEs and register space are split in the BAR */
2910         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2911
2912         /*
2913          * On BXT writes larger than 64 bit to the GTT pagetable range will be
2914          * dropped. For WC mappings in general we have 64 byte burst writes
2915          * when the WC buffer is flushed, so we can't use it, but have to
2916          * resort to an uncached mapping. The WC issue is easily caught by the
2917          * readback check when writing GTT PTE entries.
2918          */
2919         if (IS_BROXTON(ggtt->base.dev))
2920                 ggtt->gsm = ioremap_nocache(phys_addr, size);
2921         else
2922                 ggtt->gsm = ioremap_wc(phys_addr, size);
2923         if (!ggtt->gsm) {
2924                 DRM_ERROR("Failed to map the ggtt page table\n");
2925                 return -ENOMEM;
2926         }
2927
2928         ret = setup_scratch_page(ggtt->base.dev,
2929                                  &ggtt->base.scratch_page,
2930                                  GFP_DMA32);
2931         if (ret) {
2932                 DRM_ERROR("Scratch setup failed\n");
2933                 /* iounmap will also get called at remove, but meh */
2934                 iounmap(ggtt->gsm);
2935                 return ret;
2936         }
2937
2938         return 0;
2939 }
2940
2941 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2942  * bits. When using advanced contexts each context stores its own PAT, but
2943  * writing this data shouldn't be harmful even in those cases. */
2944 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2945 {
2946         uint64_t pat;
2947
2948         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2949               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2950               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2951               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2952               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2953               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2954               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2955               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2956
2957         if (!USES_PPGTT(dev_priv))
2958                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2959                  * so RTL will always use the value corresponding to
2960                  * pat_sel = 000".
2961                  * So let's disable cache for GGTT to avoid screen corruptions.
2962                  * MOCS still can be used though.
2963                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2964                  * before this patch, i.e. the same uncached + snooping access
2965                  * like on gen6/7 seems to be in effect.
2966                  * - So this just fixes blitter/render access. Again it looks
2967                  * like it's not just uncached access, but uncached + snooping.
2968                  * So we can still hold onto all our assumptions wrt cpu
2969                  * clflushing on LLC machines.
2970                  */
2971                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2972
2973         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2974          * write would work. */
2975         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2976         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
2977 }
2978
2979 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
2980 {
2981         uint64_t pat;
2982
2983         /*
2984          * Map WB on BDW to snooped on CHV.
2985          *
2986          * Only the snoop bit has meaning for CHV, the rest is
2987          * ignored.
2988          *
2989          * The hardware will never snoop for certain types of accesses:
2990          * - CPU GTT (GMADR->GGTT->no snoop->memory)
2991          * - PPGTT page tables
2992          * - some other special cycles
2993          *
2994          * As with BDW, we also need to consider the following for GT accesses:
2995          * "For GGTT, there is NO pat_sel[2:0] from the entry,
2996          * so RTL will always use the value corresponding to
2997          * pat_sel = 000".
2998          * Which means we must set the snoop bit in PAT entry 0
2999          * in order to keep the global status page working.
3000          */
3001         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3002               GEN8_PPAT(1, 0) |
3003               GEN8_PPAT(2, 0) |
3004               GEN8_PPAT(3, 0) |
3005               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3006               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3007               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3008               GEN8_PPAT(7, CHV_PPAT_SNOOP);
3009
3010         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3011         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3012 }
3013
3014 static void gen6_gmch_remove(struct i915_address_space *vm)
3015 {
3016         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3017
3018         iounmap(ggtt->gsm);
3019         cleanup_scratch_page(vm->dev, &vm->scratch_page);
3020 }
3021
3022 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3023 {
3024         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3025         struct pci_dev *pdev = dev_priv->drm.pdev;
3026         unsigned int size;
3027         u16 snb_gmch_ctl;
3028
3029         /* TODO: We're not aware of mappable constraints on gen8 yet */
3030         ggtt->mappable_base = pci_resource_start(pdev, 2);
3031         ggtt->mappable_end = pci_resource_len(pdev, 2);
3032
3033         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
3034                 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3035
3036         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3037
3038         if (INTEL_GEN(dev_priv) >= 9) {
3039                 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3040                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3041         } else if (IS_CHERRYVIEW(dev_priv)) {
3042                 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3043                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3044         } else {
3045                 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3046                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3047         }
3048
3049         ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3050
3051         if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
3052                 chv_setup_private_ppat(dev_priv);
3053         else
3054                 bdw_setup_private_ppat(dev_priv);
3055
3056         ggtt->base.cleanup = gen6_gmch_remove;
3057         ggtt->base.bind_vma = ggtt_bind_vma;
3058         ggtt->base.unbind_vma = ggtt_unbind_vma;
3059         ggtt->base.insert_page = gen8_ggtt_insert_page;
3060         ggtt->base.clear_range = nop_clear_range;
3061         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3062                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3063
3064         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3065         if (IS_CHERRYVIEW(dev_priv))
3066                 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3067
3068         return ggtt_probe_common(ggtt, size);
3069 }
3070
3071 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3072 {
3073         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3074         struct pci_dev *pdev = dev_priv->drm.pdev;
3075         unsigned int size;
3076         u16 snb_gmch_ctl;
3077
3078         ggtt->mappable_base = pci_resource_start(pdev, 2);
3079         ggtt->mappable_end = pci_resource_len(pdev, 2);
3080
3081         /* 64/512MB is the current min/max we actually know of, but this is just
3082          * a coarse sanity check.
3083          */
3084         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3085                 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3086                 return -ENXIO;
3087         }
3088
3089         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
3090                 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3091         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3092
3093         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3094
3095         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3096         ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3097
3098         ggtt->base.clear_range = gen6_ggtt_clear_range;
3099         ggtt->base.insert_page = gen6_ggtt_insert_page;
3100         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3101         ggtt->base.bind_vma = ggtt_bind_vma;
3102         ggtt->base.unbind_vma = ggtt_unbind_vma;
3103         ggtt->base.cleanup = gen6_gmch_remove;
3104
3105         if (HAS_EDRAM(dev_priv))
3106                 ggtt->base.pte_encode = iris_pte_encode;
3107         else if (IS_HASWELL(dev_priv))
3108                 ggtt->base.pte_encode = hsw_pte_encode;
3109         else if (IS_VALLEYVIEW(dev_priv))
3110                 ggtt->base.pte_encode = byt_pte_encode;
3111         else if (INTEL_GEN(dev_priv) >= 7)
3112                 ggtt->base.pte_encode = ivb_pte_encode;
3113         else
3114                 ggtt->base.pte_encode = snb_pte_encode;
3115
3116         return ggtt_probe_common(ggtt, size);
3117 }
3118
3119 static void i915_gmch_remove(struct i915_address_space *vm)
3120 {
3121         intel_gmch_remove();
3122 }
3123
3124 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3125 {
3126         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3127         int ret;
3128
3129         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3130         if (!ret) {
3131                 DRM_ERROR("failed to set up gmch\n");
3132                 return -EIO;
3133         }
3134
3135         intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3136                       &ggtt->mappable_base, &ggtt->mappable_end);
3137
3138         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3139         ggtt->base.insert_page = i915_ggtt_insert_page;
3140         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3141         ggtt->base.clear_range = i915_ggtt_clear_range;
3142         ggtt->base.bind_vma = ggtt_bind_vma;
3143         ggtt->base.unbind_vma = ggtt_unbind_vma;
3144         ggtt->base.cleanup = i915_gmch_remove;
3145
3146         if (unlikely(ggtt->do_idle_maps))
3147                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3148
3149         return 0;
3150 }
3151
3152 /**
3153  * i915_ggtt_probe_hw - Probe GGTT hardware location
3154  * @dev_priv: i915 device
3155  */
3156 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3157 {
3158         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3159         int ret;
3160
3161         ggtt->base.dev = &dev_priv->drm;
3162
3163         if (INTEL_GEN(dev_priv) <= 5)
3164                 ret = i915_gmch_probe(ggtt);
3165         else if (INTEL_GEN(dev_priv) < 8)
3166                 ret = gen6_gmch_probe(ggtt);
3167         else
3168                 ret = gen8_gmch_probe(ggtt);
3169         if (ret)
3170                 return ret;
3171
3172         if ((ggtt->base.total - 1) >> 32) {
3173                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3174                           " of address space! Found %lldM!\n",
3175                           ggtt->base.total >> 20);
3176                 ggtt->base.total = 1ULL << 32;
3177                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3178         }
3179
3180         if (ggtt->mappable_end > ggtt->base.total) {
3181                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3182                           " aperture=%llx, total=%llx\n",
3183                           ggtt->mappable_end, ggtt->base.total);
3184                 ggtt->mappable_end = ggtt->base.total;
3185         }
3186
3187         /* GMADR is the PCI mmio aperture into the global GTT. */
3188         DRM_INFO("Memory usable by graphics device = %lluM\n",
3189                  ggtt->base.total >> 20);
3190         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3191         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3192 #ifdef CONFIG_INTEL_IOMMU
3193         if (intel_iommu_gfx_mapped)
3194                 DRM_INFO("VT-d active for gfx access\n");
3195 #endif
3196
3197         return 0;
3198 }
3199
3200 /**
3201  * i915_ggtt_init_hw - Initialize GGTT hardware
3202  * @dev_priv: i915 device
3203  */
3204 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3205 {
3206         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3207         int ret;
3208
3209         INIT_LIST_HEAD(&dev_priv->vm_list);
3210
3211         /* Subtract the guard page before address space initialization to
3212          * shrink the range used by drm_mm.
3213          */
3214         ggtt->base.total -= PAGE_SIZE;
3215         i915_address_space_init(&ggtt->base, dev_priv);
3216         ggtt->base.total += PAGE_SIZE;
3217
3218         /* Only VLV supports read-only GGTT mappings */
3219         ggtt->base.has_read_only = IS_VALLEYVIEW(dev_priv);
3220
3221         if (!HAS_LLC(dev_priv))
3222                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3223
3224         if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3225                                 dev_priv->ggtt.mappable_base,
3226                                 dev_priv->ggtt.mappable_end)) {
3227                 ret = -EIO;
3228                 goto out_gtt_cleanup;
3229         }
3230
3231         ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3232
3233         /*
3234          * Initialise stolen early so that we may reserve preallocated
3235          * objects for the BIOS to KMS transition.
3236          */
3237         ret = i915_gem_init_stolen(&dev_priv->drm);
3238         if (ret)
3239                 goto out_gtt_cleanup;
3240
3241         return 0;
3242
3243 out_gtt_cleanup:
3244         ggtt->base.cleanup(&ggtt->base);
3245         return ret;
3246 }
3247
3248 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3249 {
3250         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3251                 return -EIO;
3252
3253         return 0;
3254 }
3255
3256 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3257 {
3258         struct drm_i915_private *dev_priv = to_i915(dev);
3259         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3260         struct drm_i915_gem_object *obj, *on;
3261
3262         i915_check_and_clear_faults(dev_priv);
3263
3264         /* First fill our portion of the GTT with scratch pages */
3265         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
3266                                true);
3267
3268         ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3269
3270         /* clflush objects bound into the GGTT and rebind them. */
3271         list_for_each_entry_safe(obj, on,
3272                                  &dev_priv->mm.bound_list, global_list) {
3273                 bool ggtt_bound = false;
3274                 struct i915_vma *vma;
3275
3276                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3277                         if (vma->vm != &ggtt->base)
3278                                 continue;
3279
3280                         if (!i915_vma_unbind(vma))
3281                                 continue;
3282
3283                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3284                                               PIN_UPDATE));
3285                         ggtt_bound = true;
3286                 }
3287
3288                 if (ggtt_bound)
3289                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3290         }
3291
3292         ggtt->base.closed = false;
3293
3294         if (INTEL_INFO(dev)->gen >= 8) {
3295                 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3296                         chv_setup_private_ppat(dev_priv);
3297                 else
3298                         bdw_setup_private_ppat(dev_priv);
3299
3300                 return;
3301         }
3302
3303         if (USES_PPGTT(dev)) {
3304                 struct i915_address_space *vm;
3305
3306                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3307                         /* TODO: Perhaps it shouldn't be gen6 specific */
3308
3309                         struct i915_hw_ppgtt *ppgtt;
3310
3311                         if (i915_is_ggtt(vm))
3312                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3313                         else
3314                                 ppgtt = i915_vm_to_ppgtt(vm);
3315
3316                         gen6_write_page_range(dev_priv, &ppgtt->pd,
3317                                               0, ppgtt->base.total);
3318                 }
3319         }
3320
3321         i915_ggtt_flush(dev_priv);
3322 }
3323
3324 static void
3325 i915_vma_retire(struct i915_gem_active *active,
3326                 struct drm_i915_gem_request *rq)
3327 {
3328         const unsigned int idx = rq->engine->id;
3329         struct i915_vma *vma =
3330                 container_of(active, struct i915_vma, last_read[idx]);
3331
3332         GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
3333
3334         i915_vma_clear_active(vma, idx);
3335         if (i915_vma_is_active(vma))
3336                 return;
3337
3338         list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3339         if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
3340                 WARN_ON(i915_vma_unbind(vma));
3341 }
3342
3343 void i915_vma_destroy(struct i915_vma *vma)
3344 {
3345         GEM_BUG_ON(vma->node.allocated);
3346         GEM_BUG_ON(i915_vma_is_active(vma));
3347         GEM_BUG_ON(!i915_vma_is_closed(vma));
3348         GEM_BUG_ON(vma->fence);
3349
3350         list_del(&vma->vm_link);
3351         if (!i915_vma_is_ggtt(vma))
3352                 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
3353
3354         kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
3355 }
3356
3357 void i915_vma_close(struct i915_vma *vma)
3358 {
3359         GEM_BUG_ON(i915_vma_is_closed(vma));
3360         vma->flags |= I915_VMA_CLOSED;
3361
3362         list_del_init(&vma->obj_link);
3363         if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
3364                 WARN_ON(i915_vma_unbind(vma));
3365 }
3366
3367 static struct i915_vma *
3368 __i915_vma_create(struct drm_i915_gem_object *obj,
3369                   struct i915_address_space *vm,
3370                   const struct i915_ggtt_view *view)
3371 {
3372         struct i915_vma *vma;
3373         int i;
3374
3375         GEM_BUG_ON(vm->closed);
3376
3377         vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3378         if (vma == NULL)
3379                 return ERR_PTR(-ENOMEM);
3380
3381         INIT_LIST_HEAD(&vma->exec_list);
3382         for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
3383                 init_request_active(&vma->last_read[i], i915_vma_retire);
3384         init_request_active(&vma->last_fence, NULL);
3385         list_add(&vma->vm_link, &vm->unbound_list);
3386         vma->vm = vm;
3387         vma->obj = obj;
3388         vma->size = obj->base.size;
3389
3390         if (view) {
3391                 vma->ggtt_view = *view;
3392                 if (view->type == I915_GGTT_VIEW_PARTIAL) {
3393                         vma->size = view->params.partial.size;
3394                         vma->size <<= PAGE_SHIFT;
3395                 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3396                         vma->size =
3397                                 intel_rotation_info_size(&view->params.rotated);
3398                         vma->size <<= PAGE_SHIFT;
3399                 }
3400         }
3401
3402         if (i915_is_ggtt(vm)) {
3403                 vma->flags |= I915_VMA_GGTT;
3404         } else {
3405                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3406         }
3407
3408         list_add_tail(&vma->obj_link, &obj->vma_list);
3409         return vma;
3410 }
3411
3412 static inline bool vma_matches(struct i915_vma *vma,
3413                                struct i915_address_space *vm,
3414                                const struct i915_ggtt_view *view)
3415 {
3416         if (vma->vm != vm)
3417                 return false;
3418
3419         if (!i915_vma_is_ggtt(vma))
3420                 return true;
3421
3422         if (!view)
3423                 return vma->ggtt_view.type == 0;
3424
3425         if (vma->ggtt_view.type != view->type)
3426                 return false;
3427
3428         return memcmp(&vma->ggtt_view.params,
3429                       &view->params,
3430                       sizeof(view->params)) == 0;
3431 }
3432
3433 struct i915_vma *
3434 i915_vma_create(struct drm_i915_gem_object *obj,
3435                 struct i915_address_space *vm,
3436                 const struct i915_ggtt_view *view)
3437 {
3438         GEM_BUG_ON(view && !i915_is_ggtt(vm));
3439         GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
3440
3441         return __i915_vma_create(obj, vm, view);
3442 }
3443
3444 struct i915_vma *
3445 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
3446                     struct i915_address_space *vm,
3447                     const struct i915_ggtt_view *view)
3448 {
3449         struct i915_vma *vma;
3450
3451         list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
3452                 if (vma_matches(vma, vm, view))
3453                         return vma;
3454
3455         return NULL;
3456 }
3457
3458 struct i915_vma *
3459 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3460                                   struct i915_address_space *vm,
3461                                   const struct i915_ggtt_view *view)
3462 {
3463         struct i915_vma *vma;
3464
3465         GEM_BUG_ON(view && !i915_is_ggtt(vm));
3466
3467         vma = i915_gem_obj_to_vma(obj, vm, view);
3468         if (!vma)
3469                 vma = __i915_vma_create(obj, vm, view);
3470
3471         GEM_BUG_ON(i915_vma_is_closed(vma));
3472         return vma;
3473 }
3474
3475 static struct scatterlist *
3476 rotate_pages(const dma_addr_t *in, unsigned int offset,
3477              unsigned int width, unsigned int height,
3478              unsigned int stride,
3479              struct sg_table *st, struct scatterlist *sg)
3480 {
3481         unsigned int column, row;
3482         unsigned int src_idx;
3483
3484         for (column = 0; column < width; column++) {
3485                 src_idx = stride * (height - 1) + column;
3486                 for (row = 0; row < height; row++) {
3487                         st->nents++;
3488                         /* We don't need the pages, but need to initialize
3489                          * the entries so the sg list can be happily traversed.
3490                          * The only thing we need are DMA addresses.
3491                          */
3492                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3493                         sg_dma_address(sg) = in[offset + src_idx];
3494                         sg_dma_len(sg) = PAGE_SIZE;
3495                         sg = sg_next(sg);
3496                         src_idx -= stride;
3497                 }
3498         }
3499
3500         return sg;
3501 }
3502
3503 static struct sg_table *
3504 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
3505                           struct drm_i915_gem_object *obj)
3506 {
3507         const size_t n_pages = obj->base.size / PAGE_SIZE;
3508         unsigned int size = intel_rotation_info_size(rot_info);
3509         struct sgt_iter sgt_iter;
3510         dma_addr_t dma_addr;
3511         unsigned long i;
3512         dma_addr_t *page_addr_list;
3513         struct sg_table *st;
3514         struct scatterlist *sg;
3515         int ret = -ENOMEM;
3516
3517         /* Allocate a temporary list of source pages for random access. */
3518         page_addr_list = drm_malloc_gfp(n_pages,
3519                                         sizeof(dma_addr_t),
3520                                         GFP_TEMPORARY);
3521         if (!page_addr_list)
3522                 return ERR_PTR(ret);
3523
3524         /* Allocate target SG list. */
3525         st = kmalloc(sizeof(*st), GFP_KERNEL);
3526         if (!st)
3527                 goto err_st_alloc;
3528
3529         ret = sg_alloc_table(st, size, GFP_KERNEL);
3530         if (ret)
3531                 goto err_sg_alloc;
3532
3533         /* Populate source page list from the object. */
3534         i = 0;
3535         for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
3536                 page_addr_list[i++] = dma_addr;
3537
3538         GEM_BUG_ON(i != n_pages);
3539         st->nents = 0;
3540         sg = st->sgl;
3541
3542         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3543                 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3544                                   rot_info->plane[i].width, rot_info->plane[i].height,
3545                                   rot_info->plane[i].stride, st, sg);
3546         }
3547
3548         DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3549                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3550
3551         drm_free_large(page_addr_list);
3552
3553         return st;
3554
3555 err_sg_alloc:
3556         kfree(st);
3557 err_st_alloc:
3558         drm_free_large(page_addr_list);
3559
3560         DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3561                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3562
3563         return ERR_PTR(ret);
3564 }
3565
3566 static struct sg_table *
3567 intel_partial_pages(const struct i915_ggtt_view *view,
3568                     struct drm_i915_gem_object *obj)
3569 {
3570         struct sg_table *st;
3571         struct scatterlist *sg;
3572         struct sg_page_iter obj_sg_iter;
3573         int ret = -ENOMEM;
3574
3575         st = kmalloc(sizeof(*st), GFP_KERNEL);
3576         if (!st)
3577                 goto err_st_alloc;
3578
3579         ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3580         if (ret)
3581                 goto err_sg_alloc;
3582
3583         sg = st->sgl;
3584         st->nents = 0;
3585         for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3586                 view->params.partial.offset)
3587         {
3588                 if (st->nents >= view->params.partial.size)
3589                         break;
3590
3591                 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3592                 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3593                 sg_dma_len(sg) = PAGE_SIZE;
3594
3595                 sg = sg_next(sg);
3596                 st->nents++;
3597         }
3598
3599         return st;
3600
3601 err_sg_alloc:
3602         kfree(st);
3603 err_st_alloc:
3604         return ERR_PTR(ret);
3605 }
3606
3607 static int
3608 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3609 {
3610         int ret = 0;
3611
3612         if (vma->pages)
3613                 return 0;
3614
3615         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3616                 vma->pages = vma->obj->pages;
3617         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3618                 vma->pages =
3619                         intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3620         else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3621                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3622         else
3623                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3624                           vma->ggtt_view.type);
3625
3626         if (!vma->pages) {
3627                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3628                           vma->ggtt_view.type);
3629                 ret = -EINVAL;
3630         } else if (IS_ERR(vma->pages)) {
3631                 ret = PTR_ERR(vma->pages);
3632                 vma->pages = NULL;
3633                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3634                           vma->ggtt_view.type, ret);
3635         }
3636
3637         return ret;
3638 }
3639
3640 /**
3641  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3642  * @vma: VMA to map
3643  * @cache_level: mapping cache level
3644  * @flags: flags like global or local mapping
3645  *
3646  * DMA addresses are taken from the scatter-gather table of this object (or of
3647  * this VMA in case of non-default GGTT views) and PTE entries set up.
3648  * Note that DMA addresses are also the only part of the SG table we care about.
3649  */
3650 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3651                   u32 flags)
3652 {
3653         u32 bind_flags;
3654         u32 vma_flags;
3655         int ret;
3656
3657         if (WARN_ON(flags == 0))
3658                 return -EINVAL;
3659
3660         bind_flags = 0;
3661         if (flags & PIN_GLOBAL)
3662                 bind_flags |= I915_VMA_GLOBAL_BIND;
3663         if (flags & PIN_USER)
3664                 bind_flags |= I915_VMA_LOCAL_BIND;
3665
3666         vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3667         if (flags & PIN_UPDATE)
3668                 bind_flags |= vma_flags;
3669         else
3670                 bind_flags &= ~vma_flags;
3671         if (bind_flags == 0)
3672                 return 0;
3673
3674         if (vma_flags == 0 && vma->vm->allocate_va_range) {
3675                 trace_i915_va_alloc(vma);
3676                 ret = vma->vm->allocate_va_range(vma->vm,
3677                                                  vma->node.start,
3678                                                  vma->node.size);
3679                 if (ret)
3680                         return ret;
3681         }
3682
3683         ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3684         if (ret)
3685                 return ret;
3686
3687         vma->flags |= bind_flags;
3688
3689         if (vma->obj)
3690                 set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
3691
3692         return 0;
3693 }
3694
3695 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3696 {
3697         void __iomem *ptr;
3698
3699         /* Access through the GTT requires the device to be awake. */
3700         assert_rpm_wakelock_held(to_i915(vma->vm->dev));
3701
3702         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3703         if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
3704                 return IO_ERR_PTR(-ENODEV);
3705
3706         GEM_BUG_ON(!i915_vma_is_ggtt(vma));
3707         GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
3708
3709         ptr = vma->iomap;
3710         if (ptr == NULL) {
3711                 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
3712                                         vma->node.start,
3713                                         vma->node.size);
3714                 if (ptr == NULL)
3715                         return IO_ERR_PTR(-ENOMEM);
3716
3717                 vma->iomap = ptr;
3718         }
3719
3720         __i915_vma_pin(vma);
3721         return ptr;
3722 }
3723
3724 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
3725 {
3726         struct i915_vma *vma;
3727
3728         vma = fetch_and_zero(p_vma);
3729         if (!vma)
3730                 return;
3731
3732         i915_vma_unpin(vma);
3733         i915_vma_put(vma);
3734 }