]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/i915_gem_gtt.c
Merge remote-tracking branch 'regmap/for-next'
[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34
35 /**
36  * DOC: Global GTT views
37  *
38  * Background and previous state
39  *
40  * Historically objects could exists (be bound) in global GTT space only as
41  * singular instances with a view representing all of the object's backing pages
42  * in a linear fashion. This view will be called a normal view.
43  *
44  * To support multiple views of the same object, where the number of mapped
45  * pages is not equal to the backing store, or where the layout of the pages
46  * is not linear, concept of a GGTT view was added.
47  *
48  * One example of an alternative view is a stereo display driven by a single
49  * image. In this case we would have a framebuffer looking like this
50  * (2x2 pages):
51  *
52  *    12
53  *    34
54  *
55  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
56  * rendering. In contrast, fed to the display engine would be an alternative
57  * view which could look something like this:
58  *
59  *   1212
60  *   3434
61  *
62  * In this example both the size and layout of pages in the alternative view is
63  * different from the normal view.
64  *
65  * Implementation and usage
66  *
67  * GGTT views are implemented using VMAs and are distinguished via enum
68  * i915_ggtt_view_type and struct i915_ggtt_view.
69  *
70  * A new flavour of core GEM functions which work with GGTT bound objects were
71  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
72  * renaming  in large amounts of code. They take the struct i915_ggtt_view
73  * parameter encapsulating all metadata required to implement a view.
74  *
75  * As a helper for callers which are only interested in the normal view,
76  * globally const i915_ggtt_view_normal singleton instance exists. All old core
77  * GEM API functions, the ones not taking the view parameter, are operating on,
78  * or with the normal GGTT view.
79  *
80  * Code wanting to add or use a new GGTT view needs to:
81  *
82  * 1. Add a new enum with a suitable name.
83  * 2. Extend the metadata in the i915_ggtt_view structure if required.
84  * 3. Add support to i915_get_vma_pages().
85  *
86  * New views are required to build a scatter-gather table from within the
87  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
88  * exists for the lifetime of an VMA.
89  *
90  * Core API is designed to have copy semantics which means that passed in
91  * struct i915_ggtt_view does not need to be persistent (left around after
92  * calling the core API functions).
93  *
94  */
95
96 static int
97 i915_get_ggtt_vma_pages(struct i915_vma *vma);
98
99 const struct i915_ggtt_view i915_ggtt_view_normal = {
100         .type = I915_GGTT_VIEW_NORMAL,
101 };
102 const struct i915_ggtt_view i915_ggtt_view_rotated = {
103         .type = I915_GGTT_VIEW_ROTATED,
104 };
105
106 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
107 {
108         bool has_aliasing_ppgtt;
109         bool has_full_ppgtt;
110         bool has_full_48bit_ppgtt;
111
112         has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
113         has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
114         has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
115
116         if (intel_vgpu_active(dev))
117                 has_full_ppgtt = false; /* emulation is too hard */
118
119         /*
120          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
121          * execlists, the sole mechanism available to submit work.
122          */
123         if (INTEL_INFO(dev)->gen < 9 &&
124             (enable_ppgtt == 0 || !has_aliasing_ppgtt))
125                 return 0;
126
127         if (enable_ppgtt == 1)
128                 return 1;
129
130         if (enable_ppgtt == 2 && has_full_ppgtt)
131                 return 2;
132
133         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
134                 return 3;
135
136 #ifdef CONFIG_INTEL_IOMMU
137         /* Disable ppgtt on SNB if VT-d is on. */
138         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
139                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
140                 return 0;
141         }
142 #endif
143
144         /* Early VLV doesn't have this */
145         if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) {
146                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
147                 return 0;
148         }
149
150         if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
151                 return has_full_48bit_ppgtt ? 3 : 2;
152         else
153                 return has_aliasing_ppgtt ? 1 : 0;
154 }
155
156 static int ppgtt_bind_vma(struct i915_vma *vma,
157                           enum i915_cache_level cache_level,
158                           u32 unused)
159 {
160         u32 pte_flags = 0;
161
162         /* Currently applicable only to VLV */
163         if (vma->obj->gt_ro)
164                 pte_flags |= PTE_READ_ONLY;
165
166         vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
167                                 cache_level, pte_flags);
168
169         return 0;
170 }
171
172 static void ppgtt_unbind_vma(struct i915_vma *vma)
173 {
174         vma->vm->clear_range(vma->vm,
175                              vma->node.start,
176                              vma->obj->base.size,
177                              true);
178 }
179
180 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
181                                   enum i915_cache_level level,
182                                   bool valid)
183 {
184         gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
185         pte |= addr;
186
187         switch (level) {
188         case I915_CACHE_NONE:
189                 pte |= PPAT_UNCACHED_INDEX;
190                 break;
191         case I915_CACHE_WT:
192                 pte |= PPAT_DISPLAY_ELLC_INDEX;
193                 break;
194         default:
195                 pte |= PPAT_CACHED_INDEX;
196                 break;
197         }
198
199         return pte;
200 }
201
202 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
203                                   const enum i915_cache_level level)
204 {
205         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
206         pde |= addr;
207         if (level != I915_CACHE_NONE)
208                 pde |= PPAT_CACHED_PDE_INDEX;
209         else
210                 pde |= PPAT_UNCACHED_INDEX;
211         return pde;
212 }
213
214 #define gen8_pdpe_encode gen8_pde_encode
215 #define gen8_pml4e_encode gen8_pde_encode
216
217 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
218                                  enum i915_cache_level level,
219                                  bool valid, u32 unused)
220 {
221         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
222         pte |= GEN6_PTE_ADDR_ENCODE(addr);
223
224         switch (level) {
225         case I915_CACHE_L3_LLC:
226         case I915_CACHE_LLC:
227                 pte |= GEN6_PTE_CACHE_LLC;
228                 break;
229         case I915_CACHE_NONE:
230                 pte |= GEN6_PTE_UNCACHED;
231                 break;
232         default:
233                 MISSING_CASE(level);
234         }
235
236         return pte;
237 }
238
239 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
240                                  enum i915_cache_level level,
241                                  bool valid, u32 unused)
242 {
243         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
244         pte |= GEN6_PTE_ADDR_ENCODE(addr);
245
246         switch (level) {
247         case I915_CACHE_L3_LLC:
248                 pte |= GEN7_PTE_CACHE_L3_LLC;
249                 break;
250         case I915_CACHE_LLC:
251                 pte |= GEN6_PTE_CACHE_LLC;
252                 break;
253         case I915_CACHE_NONE:
254                 pte |= GEN6_PTE_UNCACHED;
255                 break;
256         default:
257                 MISSING_CASE(level);
258         }
259
260         return pte;
261 }
262
263 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
264                                  enum i915_cache_level level,
265                                  bool valid, u32 flags)
266 {
267         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
268         pte |= GEN6_PTE_ADDR_ENCODE(addr);
269
270         if (!(flags & PTE_READ_ONLY))
271                 pte |= BYT_PTE_WRITEABLE;
272
273         if (level != I915_CACHE_NONE)
274                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
275
276         return pte;
277 }
278
279 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
280                                  enum i915_cache_level level,
281                                  bool valid, u32 unused)
282 {
283         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
284         pte |= HSW_PTE_ADDR_ENCODE(addr);
285
286         if (level != I915_CACHE_NONE)
287                 pte |= HSW_WB_LLC_AGE3;
288
289         return pte;
290 }
291
292 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
293                                   enum i915_cache_level level,
294                                   bool valid, u32 unused)
295 {
296         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
297         pte |= HSW_PTE_ADDR_ENCODE(addr);
298
299         switch (level) {
300         case I915_CACHE_NONE:
301                 break;
302         case I915_CACHE_WT:
303                 pte |= HSW_WT_ELLC_LLC_AGE3;
304                 break;
305         default:
306                 pte |= HSW_WB_ELLC_LLC_AGE3;
307                 break;
308         }
309
310         return pte;
311 }
312
313 static int __setup_page_dma(struct drm_device *dev,
314                             struct i915_page_dma *p, gfp_t flags)
315 {
316         struct device *device = &dev->pdev->dev;
317
318         p->page = alloc_page(flags);
319         if (!p->page)
320                 return -ENOMEM;
321
322         p->daddr = dma_map_page(device,
323                                 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
324
325         if (dma_mapping_error(device, p->daddr)) {
326                 __free_page(p->page);
327                 return -EINVAL;
328         }
329
330         return 0;
331 }
332
333 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
334 {
335         return __setup_page_dma(dev, p, GFP_KERNEL);
336 }
337
338 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
339 {
340         if (WARN_ON(!p->page))
341                 return;
342
343         dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
344         __free_page(p->page);
345         memset(p, 0, sizeof(*p));
346 }
347
348 static void *kmap_page_dma(struct i915_page_dma *p)
349 {
350         return kmap_atomic(p->page);
351 }
352
353 /* We use the flushing unmap only with ppgtt structures:
354  * page directories, page tables and scratch pages.
355  */
356 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
357 {
358         /* There are only few exceptions for gen >=6. chv and bxt.
359          * And we are not sure about the latter so play safe for now.
360          */
361         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
362                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
363
364         kunmap_atomic(vaddr);
365 }
366
367 #define kmap_px(px) kmap_page_dma(px_base(px))
368 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
369
370 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
371 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
372 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
373 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
374
375 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
376                           const uint64_t val)
377 {
378         int i;
379         uint64_t * const vaddr = kmap_page_dma(p);
380
381         for (i = 0; i < 512; i++)
382                 vaddr[i] = val;
383
384         kunmap_page_dma(dev, vaddr);
385 }
386
387 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
388                              const uint32_t val32)
389 {
390         uint64_t v = val32;
391
392         v = v << 32 | val32;
393
394         fill_page_dma(dev, p, v);
395 }
396
397 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
398 {
399         struct i915_page_scratch *sp;
400         int ret;
401
402         sp = kzalloc(sizeof(*sp), GFP_KERNEL);
403         if (sp == NULL)
404                 return ERR_PTR(-ENOMEM);
405
406         ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
407         if (ret) {
408                 kfree(sp);
409                 return ERR_PTR(ret);
410         }
411
412         set_pages_uc(px_page(sp), 1);
413
414         return sp;
415 }
416
417 static void free_scratch_page(struct drm_device *dev,
418                               struct i915_page_scratch *sp)
419 {
420         set_pages_wb(px_page(sp), 1);
421
422         cleanup_px(dev, sp);
423         kfree(sp);
424 }
425
426 static struct i915_page_table *alloc_pt(struct drm_device *dev)
427 {
428         struct i915_page_table *pt;
429         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
430                 GEN8_PTES : GEN6_PTES;
431         int ret = -ENOMEM;
432
433         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
434         if (!pt)
435                 return ERR_PTR(-ENOMEM);
436
437         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
438                                 GFP_KERNEL);
439
440         if (!pt->used_ptes)
441                 goto fail_bitmap;
442
443         ret = setup_px(dev, pt);
444         if (ret)
445                 goto fail_page_m;
446
447         return pt;
448
449 fail_page_m:
450         kfree(pt->used_ptes);
451 fail_bitmap:
452         kfree(pt);
453
454         return ERR_PTR(ret);
455 }
456
457 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
458 {
459         cleanup_px(dev, pt);
460         kfree(pt->used_ptes);
461         kfree(pt);
462 }
463
464 static void gen8_initialize_pt(struct i915_address_space *vm,
465                                struct i915_page_table *pt)
466 {
467         gen8_pte_t scratch_pte;
468
469         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
470                                       I915_CACHE_LLC, true);
471
472         fill_px(vm->dev, pt, scratch_pte);
473 }
474
475 static void gen6_initialize_pt(struct i915_address_space *vm,
476                                struct i915_page_table *pt)
477 {
478         gen6_pte_t scratch_pte;
479
480         WARN_ON(px_dma(vm->scratch_page) == 0);
481
482         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
483                                      I915_CACHE_LLC, true, 0);
484
485         fill32_px(vm->dev, pt, scratch_pte);
486 }
487
488 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
489 {
490         struct i915_page_directory *pd;
491         int ret = -ENOMEM;
492
493         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
494         if (!pd)
495                 return ERR_PTR(-ENOMEM);
496
497         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
498                                 sizeof(*pd->used_pdes), GFP_KERNEL);
499         if (!pd->used_pdes)
500                 goto fail_bitmap;
501
502         ret = setup_px(dev, pd);
503         if (ret)
504                 goto fail_page_m;
505
506         return pd;
507
508 fail_page_m:
509         kfree(pd->used_pdes);
510 fail_bitmap:
511         kfree(pd);
512
513         return ERR_PTR(ret);
514 }
515
516 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
517 {
518         if (px_page(pd)) {
519                 cleanup_px(dev, pd);
520                 kfree(pd->used_pdes);
521                 kfree(pd);
522         }
523 }
524
525 static void gen8_initialize_pd(struct i915_address_space *vm,
526                                struct i915_page_directory *pd)
527 {
528         gen8_pde_t scratch_pde;
529
530         scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
531
532         fill_px(vm->dev, pd, scratch_pde);
533 }
534
535 static int __pdp_init(struct drm_device *dev,
536                       struct i915_page_directory_pointer *pdp)
537 {
538         size_t pdpes = I915_PDPES_PER_PDP(dev);
539
540         pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
541                                   sizeof(unsigned long),
542                                   GFP_KERNEL);
543         if (!pdp->used_pdpes)
544                 return -ENOMEM;
545
546         pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
547                                       GFP_KERNEL);
548         if (!pdp->page_directory) {
549                 kfree(pdp->used_pdpes);
550                 /* the PDP might be the statically allocated top level. Keep it
551                  * as clean as possible */
552                 pdp->used_pdpes = NULL;
553                 return -ENOMEM;
554         }
555
556         return 0;
557 }
558
559 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
560 {
561         kfree(pdp->used_pdpes);
562         kfree(pdp->page_directory);
563         pdp->page_directory = NULL;
564 }
565
566 static struct
567 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
568 {
569         struct i915_page_directory_pointer *pdp;
570         int ret = -ENOMEM;
571
572         WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
573
574         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
575         if (!pdp)
576                 return ERR_PTR(-ENOMEM);
577
578         ret = __pdp_init(dev, pdp);
579         if (ret)
580                 goto fail_bitmap;
581
582         ret = setup_px(dev, pdp);
583         if (ret)
584                 goto fail_page_m;
585
586         return pdp;
587
588 fail_page_m:
589         __pdp_fini(pdp);
590 fail_bitmap:
591         kfree(pdp);
592
593         return ERR_PTR(ret);
594 }
595
596 static void free_pdp(struct drm_device *dev,
597                      struct i915_page_directory_pointer *pdp)
598 {
599         __pdp_fini(pdp);
600         if (USES_FULL_48BIT_PPGTT(dev)) {
601                 cleanup_px(dev, pdp);
602                 kfree(pdp);
603         }
604 }
605
606 static void gen8_initialize_pdp(struct i915_address_space *vm,
607                                 struct i915_page_directory_pointer *pdp)
608 {
609         gen8_ppgtt_pdpe_t scratch_pdpe;
610
611         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
612
613         fill_px(vm->dev, pdp, scratch_pdpe);
614 }
615
616 static void gen8_initialize_pml4(struct i915_address_space *vm,
617                                  struct i915_pml4 *pml4)
618 {
619         gen8_ppgtt_pml4e_t scratch_pml4e;
620
621         scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
622                                           I915_CACHE_LLC);
623
624         fill_px(vm->dev, pml4, scratch_pml4e);
625 }
626
627 static void
628 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
629                           struct i915_page_directory_pointer *pdp,
630                           struct i915_page_directory *pd,
631                           int index)
632 {
633         gen8_ppgtt_pdpe_t *page_directorypo;
634
635         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
636                 return;
637
638         page_directorypo = kmap_px(pdp);
639         page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
640         kunmap_px(ppgtt, page_directorypo);
641 }
642
643 static void
644 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
645                                   struct i915_pml4 *pml4,
646                                   struct i915_page_directory_pointer *pdp,
647                                   int index)
648 {
649         gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
650
651         WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
652         pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
653         kunmap_px(ppgtt, pagemap);
654 }
655
656 /* Broadwell Page Directory Pointer Descriptors */
657 static int gen8_write_pdp(struct drm_i915_gem_request *req,
658                           unsigned entry,
659                           dma_addr_t addr)
660 {
661         struct intel_engine_cs *ring = req->ring;
662         int ret;
663
664         BUG_ON(entry >= 4);
665
666         ret = intel_ring_begin(req, 6);
667         if (ret)
668                 return ret;
669
670         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
671         intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
672         intel_ring_emit(ring, upper_32_bits(addr));
673         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
674         intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
675         intel_ring_emit(ring, lower_32_bits(addr));
676         intel_ring_advance(ring);
677
678         return 0;
679 }
680
681 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
682                                  struct drm_i915_gem_request *req)
683 {
684         int i, ret;
685
686         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
687                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
688
689                 ret = gen8_write_pdp(req, i, pd_daddr);
690                 if (ret)
691                         return ret;
692         }
693
694         return 0;
695 }
696
697 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
698                               struct drm_i915_gem_request *req)
699 {
700         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
701 }
702
703 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
704                                        struct i915_page_directory_pointer *pdp,
705                                        uint64_t start,
706                                        uint64_t length,
707                                        gen8_pte_t scratch_pte)
708 {
709         struct i915_hw_ppgtt *ppgtt =
710                 container_of(vm, struct i915_hw_ppgtt, base);
711         gen8_pte_t *pt_vaddr;
712         unsigned pdpe = gen8_pdpe_index(start);
713         unsigned pde = gen8_pde_index(start);
714         unsigned pte = gen8_pte_index(start);
715         unsigned num_entries = length >> PAGE_SHIFT;
716         unsigned last_pte, i;
717
718         if (WARN_ON(!pdp))
719                 return;
720
721         while (num_entries) {
722                 struct i915_page_directory *pd;
723                 struct i915_page_table *pt;
724
725                 if (WARN_ON(!pdp->page_directory[pdpe]))
726                         break;
727
728                 pd = pdp->page_directory[pdpe];
729
730                 if (WARN_ON(!pd->page_table[pde]))
731                         break;
732
733                 pt = pd->page_table[pde];
734
735                 if (WARN_ON(!px_page(pt)))
736                         break;
737
738                 last_pte = pte + num_entries;
739                 if (last_pte > GEN8_PTES)
740                         last_pte = GEN8_PTES;
741
742                 pt_vaddr = kmap_px(pt);
743
744                 for (i = pte; i < last_pte; i++) {
745                         pt_vaddr[i] = scratch_pte;
746                         num_entries--;
747                 }
748
749                 kunmap_px(ppgtt, pt);
750
751                 pte = 0;
752                 if (++pde == I915_PDES) {
753                         if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
754                                 break;
755                         pde = 0;
756                 }
757         }
758 }
759
760 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
761                                    uint64_t start,
762                                    uint64_t length,
763                                    bool use_scratch)
764 {
765         struct i915_hw_ppgtt *ppgtt =
766                 container_of(vm, struct i915_hw_ppgtt, base);
767         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
768                                                  I915_CACHE_LLC, use_scratch);
769
770         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
771                 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
772                                            scratch_pte);
773         } else {
774                 uint64_t pml4e;
775                 struct i915_page_directory_pointer *pdp;
776
777                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
778                         gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
779                                                    scratch_pte);
780                 }
781         }
782 }
783
784 static void
785 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
786                               struct i915_page_directory_pointer *pdp,
787                               struct sg_page_iter *sg_iter,
788                               uint64_t start,
789                               enum i915_cache_level cache_level)
790 {
791         struct i915_hw_ppgtt *ppgtt =
792                 container_of(vm, struct i915_hw_ppgtt, base);
793         gen8_pte_t *pt_vaddr;
794         unsigned pdpe = gen8_pdpe_index(start);
795         unsigned pde = gen8_pde_index(start);
796         unsigned pte = gen8_pte_index(start);
797
798         pt_vaddr = NULL;
799
800         while (__sg_page_iter_next(sg_iter)) {
801                 if (pt_vaddr == NULL) {
802                         struct i915_page_directory *pd = pdp->page_directory[pdpe];
803                         struct i915_page_table *pt = pd->page_table[pde];
804                         pt_vaddr = kmap_px(pt);
805                 }
806
807                 pt_vaddr[pte] =
808                         gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
809                                         cache_level, true);
810                 if (++pte == GEN8_PTES) {
811                         kunmap_px(ppgtt, pt_vaddr);
812                         pt_vaddr = NULL;
813                         if (++pde == I915_PDES) {
814                                 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
815                                         break;
816                                 pde = 0;
817                         }
818                         pte = 0;
819                 }
820         }
821
822         if (pt_vaddr)
823                 kunmap_px(ppgtt, pt_vaddr);
824 }
825
826 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
827                                       struct sg_table *pages,
828                                       uint64_t start,
829                                       enum i915_cache_level cache_level,
830                                       u32 unused)
831 {
832         struct i915_hw_ppgtt *ppgtt =
833                 container_of(vm, struct i915_hw_ppgtt, base);
834         struct sg_page_iter sg_iter;
835
836         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
837
838         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
839                 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
840                                               cache_level);
841         } else {
842                 struct i915_page_directory_pointer *pdp;
843                 uint64_t pml4e;
844                 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
845
846                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
847                         gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
848                                                       start, cache_level);
849                 }
850         }
851 }
852
853 static void gen8_free_page_tables(struct drm_device *dev,
854                                   struct i915_page_directory *pd)
855 {
856         int i;
857
858         if (!px_page(pd))
859                 return;
860
861         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
862                 if (WARN_ON(!pd->page_table[i]))
863                         continue;
864
865                 free_pt(dev, pd->page_table[i]);
866                 pd->page_table[i] = NULL;
867         }
868 }
869
870 static int gen8_init_scratch(struct i915_address_space *vm)
871 {
872         struct drm_device *dev = vm->dev;
873
874         vm->scratch_page = alloc_scratch_page(dev);
875         if (IS_ERR(vm->scratch_page))
876                 return PTR_ERR(vm->scratch_page);
877
878         vm->scratch_pt = alloc_pt(dev);
879         if (IS_ERR(vm->scratch_pt)) {
880                 free_scratch_page(dev, vm->scratch_page);
881                 return PTR_ERR(vm->scratch_pt);
882         }
883
884         vm->scratch_pd = alloc_pd(dev);
885         if (IS_ERR(vm->scratch_pd)) {
886                 free_pt(dev, vm->scratch_pt);
887                 free_scratch_page(dev, vm->scratch_page);
888                 return PTR_ERR(vm->scratch_pd);
889         }
890
891         if (USES_FULL_48BIT_PPGTT(dev)) {
892                 vm->scratch_pdp = alloc_pdp(dev);
893                 if (IS_ERR(vm->scratch_pdp)) {
894                         free_pd(dev, vm->scratch_pd);
895                         free_pt(dev, vm->scratch_pt);
896                         free_scratch_page(dev, vm->scratch_page);
897                         return PTR_ERR(vm->scratch_pdp);
898                 }
899         }
900
901         gen8_initialize_pt(vm, vm->scratch_pt);
902         gen8_initialize_pd(vm, vm->scratch_pd);
903         if (USES_FULL_48BIT_PPGTT(dev))
904                 gen8_initialize_pdp(vm, vm->scratch_pdp);
905
906         return 0;
907 }
908
909 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
910 {
911         enum vgt_g2v_type msg;
912         struct drm_device *dev = ppgtt->base.dev;
913         struct drm_i915_private *dev_priv = dev->dev_private;
914         int i;
915
916         if (USES_FULL_48BIT_PPGTT(dev)) {
917                 u64 daddr = px_dma(&ppgtt->pml4);
918
919                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
920                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
921
922                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
923                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
924         } else {
925                 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
926                         u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
927
928                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
929                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
930                 }
931
932                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
933                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
934         }
935
936         I915_WRITE(vgtif_reg(g2v_notify), msg);
937
938         return 0;
939 }
940
941 static void gen8_free_scratch(struct i915_address_space *vm)
942 {
943         struct drm_device *dev = vm->dev;
944
945         if (USES_FULL_48BIT_PPGTT(dev))
946                 free_pdp(dev, vm->scratch_pdp);
947         free_pd(dev, vm->scratch_pd);
948         free_pt(dev, vm->scratch_pt);
949         free_scratch_page(dev, vm->scratch_page);
950 }
951
952 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
953                                     struct i915_page_directory_pointer *pdp)
954 {
955         int i;
956
957         for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
958                 if (WARN_ON(!pdp->page_directory[i]))
959                         continue;
960
961                 gen8_free_page_tables(dev, pdp->page_directory[i]);
962                 free_pd(dev, pdp->page_directory[i]);
963         }
964
965         free_pdp(dev, pdp);
966 }
967
968 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
969 {
970         int i;
971
972         for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
973                 if (WARN_ON(!ppgtt->pml4.pdps[i]))
974                         continue;
975
976                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
977         }
978
979         cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
980 }
981
982 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
983 {
984         struct i915_hw_ppgtt *ppgtt =
985                 container_of(vm, struct i915_hw_ppgtt, base);
986
987         if (intel_vgpu_active(vm->dev))
988                 gen8_ppgtt_notify_vgt(ppgtt, false);
989
990         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
991                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
992         else
993                 gen8_ppgtt_cleanup_4lvl(ppgtt);
994
995         gen8_free_scratch(vm);
996 }
997
998 /**
999  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1000  * @vm: Master vm structure.
1001  * @pd: Page directory for this address range.
1002  * @start:      Starting virtual address to begin allocations.
1003  * @length:     Size of the allocations.
1004  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1005  *              caller to free on error.
1006  *
1007  * Allocate the required number of page tables. Extremely similar to
1008  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1009  * the page directory boundary (instead of the page directory pointer). That
1010  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1011  * possible, and likely that the caller will need to use multiple calls of this
1012  * function to achieve the appropriate allocation.
1013  *
1014  * Return: 0 if success; negative error code otherwise.
1015  */
1016 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1017                                      struct i915_page_directory *pd,
1018                                      uint64_t start,
1019                                      uint64_t length,
1020                                      unsigned long *new_pts)
1021 {
1022         struct drm_device *dev = vm->dev;
1023         struct i915_page_table *pt;
1024         uint32_t pde;
1025
1026         gen8_for_each_pde(pt, pd, start, length, pde) {
1027                 /* Don't reallocate page tables */
1028                 if (test_bit(pde, pd->used_pdes)) {
1029                         /* Scratch is never allocated this way */
1030                         WARN_ON(pt == vm->scratch_pt);
1031                         continue;
1032                 }
1033
1034                 pt = alloc_pt(dev);
1035                 if (IS_ERR(pt))
1036                         goto unwind_out;
1037
1038                 gen8_initialize_pt(vm, pt);
1039                 pd->page_table[pde] = pt;
1040                 __set_bit(pde, new_pts);
1041                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1042         }
1043
1044         return 0;
1045
1046 unwind_out:
1047         for_each_set_bit(pde, new_pts, I915_PDES)
1048                 free_pt(dev, pd->page_table[pde]);
1049
1050         return -ENOMEM;
1051 }
1052
1053 /**
1054  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1055  * @vm: Master vm structure.
1056  * @pdp:        Page directory pointer for this address range.
1057  * @start:      Starting virtual address to begin allocations.
1058  * @length:     Size of the allocations.
1059  * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1060  *              caller to free on error.
1061  *
1062  * Allocate the required number of page directories starting at the pde index of
1063  * @start, and ending at the pde index @start + @length. This function will skip
1064  * over already allocated page directories within the range, and only allocate
1065  * new ones, setting the appropriate pointer within the pdp as well as the
1066  * correct position in the bitmap @new_pds.
1067  *
1068  * The function will only allocate the pages within the range for a give page
1069  * directory pointer. In other words, if @start + @length straddles a virtually
1070  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1071  * required by the caller, This is not currently possible, and the BUG in the
1072  * code will prevent it.
1073  *
1074  * Return: 0 if success; negative error code otherwise.
1075  */
1076 static int
1077 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1078                                   struct i915_page_directory_pointer *pdp,
1079                                   uint64_t start,
1080                                   uint64_t length,
1081                                   unsigned long *new_pds)
1082 {
1083         struct drm_device *dev = vm->dev;
1084         struct i915_page_directory *pd;
1085         uint32_t pdpe;
1086         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1087
1088         WARN_ON(!bitmap_empty(new_pds, pdpes));
1089
1090         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1091                 if (test_bit(pdpe, pdp->used_pdpes))
1092                         continue;
1093
1094                 pd = alloc_pd(dev);
1095                 if (IS_ERR(pd))
1096                         goto unwind_out;
1097
1098                 gen8_initialize_pd(vm, pd);
1099                 pdp->page_directory[pdpe] = pd;
1100                 __set_bit(pdpe, new_pds);
1101                 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1102         }
1103
1104         return 0;
1105
1106 unwind_out:
1107         for_each_set_bit(pdpe, new_pds, pdpes)
1108                 free_pd(dev, pdp->page_directory[pdpe]);
1109
1110         return -ENOMEM;
1111 }
1112
1113 /**
1114  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1115  * @vm: Master vm structure.
1116  * @pml4:       Page map level 4 for this address range.
1117  * @start:      Starting virtual address to begin allocations.
1118  * @length:     Size of the allocations.
1119  * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1120  *              caller to free on error.
1121  *
1122  * Allocate the required number of page directory pointers. Extremely similar to
1123  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1124  * The main difference is here we are limited by the pml4 boundary (instead of
1125  * the page directory pointer).
1126  *
1127  * Return: 0 if success; negative error code otherwise.
1128  */
1129 static int
1130 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1131                                   struct i915_pml4 *pml4,
1132                                   uint64_t start,
1133                                   uint64_t length,
1134                                   unsigned long *new_pdps)
1135 {
1136         struct drm_device *dev = vm->dev;
1137         struct i915_page_directory_pointer *pdp;
1138         uint32_t pml4e;
1139
1140         WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1141
1142         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1143                 if (!test_bit(pml4e, pml4->used_pml4es)) {
1144                         pdp = alloc_pdp(dev);
1145                         if (IS_ERR(pdp))
1146                                 goto unwind_out;
1147
1148                         gen8_initialize_pdp(vm, pdp);
1149                         pml4->pdps[pml4e] = pdp;
1150                         __set_bit(pml4e, new_pdps);
1151                         trace_i915_page_directory_pointer_entry_alloc(vm,
1152                                                                       pml4e,
1153                                                                       start,
1154                                                                       GEN8_PML4E_SHIFT);
1155                 }
1156         }
1157
1158         return 0;
1159
1160 unwind_out:
1161         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1162                 free_pdp(dev, pml4->pdps[pml4e]);
1163
1164         return -ENOMEM;
1165 }
1166
1167 static void
1168 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1169 {
1170         kfree(new_pts);
1171         kfree(new_pds);
1172 }
1173
1174 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1175  * of these are based on the number of PDPEs in the system.
1176  */
1177 static
1178 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1179                                          unsigned long **new_pts,
1180                                          uint32_t pdpes)
1181 {
1182         unsigned long *pds;
1183         unsigned long *pts;
1184
1185         pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1186         if (!pds)
1187                 return -ENOMEM;
1188
1189         pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1190                       GFP_TEMPORARY);
1191         if (!pts)
1192                 goto err_out;
1193
1194         *new_pds = pds;
1195         *new_pts = pts;
1196
1197         return 0;
1198
1199 err_out:
1200         free_gen8_temp_bitmaps(pds, pts);
1201         return -ENOMEM;
1202 }
1203
1204 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1205  * the page table structures, we mark them dirty so that
1206  * context switching/execlist queuing code takes extra steps
1207  * to ensure that tlbs are flushed.
1208  */
1209 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1210 {
1211         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1212 }
1213
1214 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1215                                     struct i915_page_directory_pointer *pdp,
1216                                     uint64_t start,
1217                                     uint64_t length)
1218 {
1219         struct i915_hw_ppgtt *ppgtt =
1220                 container_of(vm, struct i915_hw_ppgtt, base);
1221         unsigned long *new_page_dirs, *new_page_tables;
1222         struct drm_device *dev = vm->dev;
1223         struct i915_page_directory *pd;
1224         const uint64_t orig_start = start;
1225         const uint64_t orig_length = length;
1226         uint32_t pdpe;
1227         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1228         int ret;
1229
1230         /* Wrap is never okay since we can only represent 48b, and we don't
1231          * actually use the other side of the canonical address space.
1232          */
1233         if (WARN_ON(start + length < start))
1234                 return -ENODEV;
1235
1236         if (WARN_ON(start + length > vm->total))
1237                 return -ENODEV;
1238
1239         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1240         if (ret)
1241                 return ret;
1242
1243         /* Do the allocations first so we can easily bail out */
1244         ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1245                                                 new_page_dirs);
1246         if (ret) {
1247                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1248                 return ret;
1249         }
1250
1251         /* For every page directory referenced, allocate page tables */
1252         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1253                 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1254                                                 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1255                 if (ret)
1256                         goto err_out;
1257         }
1258
1259         start = orig_start;
1260         length = orig_length;
1261
1262         /* Allocations have completed successfully, so set the bitmaps, and do
1263          * the mappings. */
1264         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1265                 gen8_pde_t *const page_directory = kmap_px(pd);
1266                 struct i915_page_table *pt;
1267                 uint64_t pd_len = length;
1268                 uint64_t pd_start = start;
1269                 uint32_t pde;
1270
1271                 /* Every pd should be allocated, we just did that above. */
1272                 WARN_ON(!pd);
1273
1274                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1275                         /* Same reasoning as pd */
1276                         WARN_ON(!pt);
1277                         WARN_ON(!pd_len);
1278                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
1279
1280                         /* Set our used ptes within the page table */
1281                         bitmap_set(pt->used_ptes,
1282                                    gen8_pte_index(pd_start),
1283                                    gen8_pte_count(pd_start, pd_len));
1284
1285                         /* Our pde is now pointing to the pagetable, pt */
1286                         __set_bit(pde, pd->used_pdes);
1287
1288                         /* Map the PDE to the page table */
1289                         page_directory[pde] = gen8_pde_encode(px_dma(pt),
1290                                                               I915_CACHE_LLC);
1291                         trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1292                                                         gen8_pte_index(start),
1293                                                         gen8_pte_count(start, length),
1294                                                         GEN8_PTES);
1295
1296                         /* NB: We haven't yet mapped ptes to pages. At this
1297                          * point we're still relying on insert_entries() */
1298                 }
1299
1300                 kunmap_px(ppgtt, page_directory);
1301                 __set_bit(pdpe, pdp->used_pdpes);
1302                 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1303         }
1304
1305         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1306         mark_tlbs_dirty(ppgtt);
1307         return 0;
1308
1309 err_out:
1310         while (pdpe--) {
1311                 unsigned long temp;
1312
1313                 for_each_set_bit(temp, new_page_tables + pdpe *
1314                                 BITS_TO_LONGS(I915_PDES), I915_PDES)
1315                         free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1316         }
1317
1318         for_each_set_bit(pdpe, new_page_dirs, pdpes)
1319                 free_pd(dev, pdp->page_directory[pdpe]);
1320
1321         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1322         mark_tlbs_dirty(ppgtt);
1323         return ret;
1324 }
1325
1326 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1327                                     struct i915_pml4 *pml4,
1328                                     uint64_t start,
1329                                     uint64_t length)
1330 {
1331         DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1332         struct i915_hw_ppgtt *ppgtt =
1333                         container_of(vm, struct i915_hw_ppgtt, base);
1334         struct i915_page_directory_pointer *pdp;
1335         uint64_t pml4e;
1336         int ret = 0;
1337
1338         /* Do the pml4 allocations first, so we don't need to track the newly
1339          * allocated tables below the pdp */
1340         bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1341
1342         /* The pagedirectory and pagetable allocations are done in the shared 3
1343          * and 4 level code. Just allocate the pdps.
1344          */
1345         ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1346                                                 new_pdps);
1347         if (ret)
1348                 return ret;
1349
1350         WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1351              "The allocation has spanned more than 512GB. "
1352              "It is highly likely this is incorrect.");
1353
1354         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1355                 WARN_ON(!pdp);
1356
1357                 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1358                 if (ret)
1359                         goto err_out;
1360
1361                 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1362         }
1363
1364         bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1365                   GEN8_PML4ES_PER_PML4);
1366
1367         return 0;
1368
1369 err_out:
1370         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1371                 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1372
1373         return ret;
1374 }
1375
1376 static int gen8_alloc_va_range(struct i915_address_space *vm,
1377                                uint64_t start, uint64_t length)
1378 {
1379         struct i915_hw_ppgtt *ppgtt =
1380                 container_of(vm, struct i915_hw_ppgtt, base);
1381
1382         if (USES_FULL_48BIT_PPGTT(vm->dev))
1383                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1384         else
1385                 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1386 }
1387
1388 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1389                           uint64_t start, uint64_t length,
1390                           gen8_pte_t scratch_pte,
1391                           struct seq_file *m)
1392 {
1393         struct i915_page_directory *pd;
1394         uint32_t pdpe;
1395
1396         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1397                 struct i915_page_table *pt;
1398                 uint64_t pd_len = length;
1399                 uint64_t pd_start = start;
1400                 uint32_t pde;
1401
1402                 if (!test_bit(pdpe, pdp->used_pdpes))
1403                         continue;
1404
1405                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1406                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1407                         uint32_t  pte;
1408                         gen8_pte_t *pt_vaddr;
1409
1410                         if (!test_bit(pde, pd->used_pdes))
1411                                 continue;
1412
1413                         pt_vaddr = kmap_px(pt);
1414                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1415                                 uint64_t va =
1416                                         (pdpe << GEN8_PDPE_SHIFT) |
1417                                         (pde << GEN8_PDE_SHIFT) |
1418                                         (pte << GEN8_PTE_SHIFT);
1419                                 int i;
1420                                 bool found = false;
1421
1422                                 for (i = 0; i < 4; i++)
1423                                         if (pt_vaddr[pte + i] != scratch_pte)
1424                                                 found = true;
1425                                 if (!found)
1426                                         continue;
1427
1428                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1429                                 for (i = 0; i < 4; i++) {
1430                                         if (pt_vaddr[pte + i] != scratch_pte)
1431                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1432                                         else
1433                                                 seq_puts(m, "  SCRATCH ");
1434                                 }
1435                                 seq_puts(m, "\n");
1436                         }
1437                         /* don't use kunmap_px, it could trigger
1438                          * an unnecessary flush.
1439                          */
1440                         kunmap_atomic(pt_vaddr);
1441                 }
1442         }
1443 }
1444
1445 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1446 {
1447         struct i915_address_space *vm = &ppgtt->base;
1448         uint64_t start = ppgtt->base.start;
1449         uint64_t length = ppgtt->base.total;
1450         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1451                                                  I915_CACHE_LLC, true);
1452
1453         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1454                 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1455         } else {
1456                 uint64_t pml4e;
1457                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1458                 struct i915_page_directory_pointer *pdp;
1459
1460                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1461                         if (!test_bit(pml4e, pml4->used_pml4es))
1462                                 continue;
1463
1464                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1465                         gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1466                 }
1467         }
1468 }
1469
1470 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1471 {
1472         unsigned long *new_page_dirs, *new_page_tables;
1473         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1474         int ret;
1475
1476         /* We allocate temp bitmap for page tables for no gain
1477          * but as this is for init only, lets keep the things simple
1478          */
1479         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1480         if (ret)
1481                 return ret;
1482
1483         /* Allocate for all pdps regardless of how the ppgtt
1484          * was defined.
1485          */
1486         ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1487                                                 0, 1ULL << 32,
1488                                                 new_page_dirs);
1489         if (!ret)
1490                 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1491
1492         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1493
1494         return ret;
1495 }
1496
1497 /*
1498  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1499  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1500  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1501  * space.
1502  *
1503  */
1504 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1505 {
1506         int ret;
1507
1508         ret = gen8_init_scratch(&ppgtt->base);
1509         if (ret)
1510                 return ret;
1511
1512         ppgtt->base.start = 0;
1513         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1514         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1515         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1516         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1517         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1518         ppgtt->base.bind_vma = ppgtt_bind_vma;
1519         ppgtt->debug_dump = gen8_dump_ppgtt;
1520
1521         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1522                 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1523                 if (ret)
1524                         goto free_scratch;
1525
1526                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1527
1528                 ppgtt->base.total = 1ULL << 48;
1529                 ppgtt->switch_mm = gen8_48b_mm_switch;
1530         } else {
1531                 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1532                 if (ret)
1533                         goto free_scratch;
1534
1535                 ppgtt->base.total = 1ULL << 32;
1536                 ppgtt->switch_mm = gen8_legacy_mm_switch;
1537                 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1538                                                               0, 0,
1539                                                               GEN8_PML4E_SHIFT);
1540
1541                 if (intel_vgpu_active(ppgtt->base.dev)) {
1542                         ret = gen8_preallocate_top_level_pdps(ppgtt);
1543                         if (ret)
1544                                 goto free_scratch;
1545                 }
1546         }
1547
1548         if (intel_vgpu_active(ppgtt->base.dev))
1549                 gen8_ppgtt_notify_vgt(ppgtt, true);
1550
1551         return 0;
1552
1553 free_scratch:
1554         gen8_free_scratch(&ppgtt->base);
1555         return ret;
1556 }
1557
1558 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1559 {
1560         struct i915_address_space *vm = &ppgtt->base;
1561         struct i915_page_table *unused;
1562         gen6_pte_t scratch_pte;
1563         uint32_t pd_entry;
1564         uint32_t  pte, pde, temp;
1565         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1566
1567         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1568                                      I915_CACHE_LLC, true, 0);
1569
1570         gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1571                 u32 expected;
1572                 gen6_pte_t *pt_vaddr;
1573                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1574                 pd_entry = readl(ppgtt->pd_addr + pde);
1575                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1576
1577                 if (pd_entry != expected)
1578                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1579                                    pde,
1580                                    pd_entry,
1581                                    expected);
1582                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1583
1584                 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1585
1586                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1587                         unsigned long va =
1588                                 (pde * PAGE_SIZE * GEN6_PTES) +
1589                                 (pte * PAGE_SIZE);
1590                         int i;
1591                         bool found = false;
1592                         for (i = 0; i < 4; i++)
1593                                 if (pt_vaddr[pte + i] != scratch_pte)
1594                                         found = true;
1595                         if (!found)
1596                                 continue;
1597
1598                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1599                         for (i = 0; i < 4; i++) {
1600                                 if (pt_vaddr[pte + i] != scratch_pte)
1601                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1602                                 else
1603                                         seq_puts(m, "  SCRATCH ");
1604                         }
1605                         seq_puts(m, "\n");
1606                 }
1607                 kunmap_px(ppgtt, pt_vaddr);
1608         }
1609 }
1610
1611 /* Write pde (index) from the page directory @pd to the page table @pt */
1612 static void gen6_write_pde(struct i915_page_directory *pd,
1613                             const int pde, struct i915_page_table *pt)
1614 {
1615         /* Caller needs to make sure the write completes if necessary */
1616         struct i915_hw_ppgtt *ppgtt =
1617                 container_of(pd, struct i915_hw_ppgtt, pd);
1618         u32 pd_entry;
1619
1620         pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1621         pd_entry |= GEN6_PDE_VALID;
1622
1623         writel(pd_entry, ppgtt->pd_addr + pde);
1624 }
1625
1626 /* Write all the page tables found in the ppgtt structure to incrementing page
1627  * directories. */
1628 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1629                                   struct i915_page_directory *pd,
1630                                   uint32_t start, uint32_t length)
1631 {
1632         struct i915_page_table *pt;
1633         uint32_t pde, temp;
1634
1635         gen6_for_each_pde(pt, pd, start, length, temp, pde)
1636                 gen6_write_pde(pd, pde, pt);
1637
1638         /* Make sure write is complete before other code can use this page
1639          * table. Also require for WC mapped PTEs */
1640         readl(dev_priv->gtt.gsm);
1641 }
1642
1643 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1644 {
1645         BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1646
1647         return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1648 }
1649
1650 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1651                          struct drm_i915_gem_request *req)
1652 {
1653         struct intel_engine_cs *ring = req->ring;
1654         int ret;
1655
1656         /* NB: TLBs must be flushed and invalidated before a switch */
1657         ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1658         if (ret)
1659                 return ret;
1660
1661         ret = intel_ring_begin(req, 6);
1662         if (ret)
1663                 return ret;
1664
1665         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1666         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
1667         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1668         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
1669         intel_ring_emit(ring, get_pd_offset(ppgtt));
1670         intel_ring_emit(ring, MI_NOOP);
1671         intel_ring_advance(ring);
1672
1673         return 0;
1674 }
1675
1676 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1677                           struct drm_i915_gem_request *req)
1678 {
1679         struct intel_engine_cs *ring = req->ring;
1680         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1681
1682         I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1683         I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1684         return 0;
1685 }
1686
1687 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1688                           struct drm_i915_gem_request *req)
1689 {
1690         struct intel_engine_cs *ring = req->ring;
1691         int ret;
1692
1693         /* NB: TLBs must be flushed and invalidated before a switch */
1694         ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1695         if (ret)
1696                 return ret;
1697
1698         ret = intel_ring_begin(req, 6);
1699         if (ret)
1700                 return ret;
1701
1702         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1703         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
1704         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1705         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
1706         intel_ring_emit(ring, get_pd_offset(ppgtt));
1707         intel_ring_emit(ring, MI_NOOP);
1708         intel_ring_advance(ring);
1709
1710         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1711         if (ring->id != RCS) {
1712                 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1713                 if (ret)
1714                         return ret;
1715         }
1716
1717         return 0;
1718 }
1719
1720 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1721                           struct drm_i915_gem_request *req)
1722 {
1723         struct intel_engine_cs *ring = req->ring;
1724         struct drm_device *dev = ppgtt->base.dev;
1725         struct drm_i915_private *dev_priv = dev->dev_private;
1726
1727
1728         I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1729         I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1730
1731         POSTING_READ(RING_PP_DIR_DCLV(ring));
1732
1733         return 0;
1734 }
1735
1736 static void gen8_ppgtt_enable(struct drm_device *dev)
1737 {
1738         struct drm_i915_private *dev_priv = dev->dev_private;
1739         struct intel_engine_cs *ring;
1740         int j;
1741
1742         for_each_ring(ring, dev_priv, j) {
1743                 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1744                 I915_WRITE(RING_MODE_GEN7(ring),
1745                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1746         }
1747 }
1748
1749 static void gen7_ppgtt_enable(struct drm_device *dev)
1750 {
1751         struct drm_i915_private *dev_priv = dev->dev_private;
1752         struct intel_engine_cs *ring;
1753         uint32_t ecochk, ecobits;
1754         int i;
1755
1756         ecobits = I915_READ(GAC_ECO_BITS);
1757         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1758
1759         ecochk = I915_READ(GAM_ECOCHK);
1760         if (IS_HASWELL(dev)) {
1761                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1762         } else {
1763                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1764                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1765         }
1766         I915_WRITE(GAM_ECOCHK, ecochk);
1767
1768         for_each_ring(ring, dev_priv, i) {
1769                 /* GFX_MODE is per-ring on gen7+ */
1770                 I915_WRITE(RING_MODE_GEN7(ring),
1771                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1772         }
1773 }
1774
1775 static void gen6_ppgtt_enable(struct drm_device *dev)
1776 {
1777         struct drm_i915_private *dev_priv = dev->dev_private;
1778         uint32_t ecochk, gab_ctl, ecobits;
1779
1780         ecobits = I915_READ(GAC_ECO_BITS);
1781         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1782                    ECOBITS_PPGTT_CACHE64B);
1783
1784         gab_ctl = I915_READ(GAB_CTL);
1785         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1786
1787         ecochk = I915_READ(GAM_ECOCHK);
1788         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1789
1790         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1791 }
1792
1793 /* PPGTT support for Sandybdrige/Gen6 and later */
1794 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1795                                    uint64_t start,
1796                                    uint64_t length,
1797                                    bool use_scratch)
1798 {
1799         struct i915_hw_ppgtt *ppgtt =
1800                 container_of(vm, struct i915_hw_ppgtt, base);
1801         gen6_pte_t *pt_vaddr, scratch_pte;
1802         unsigned first_entry = start >> PAGE_SHIFT;
1803         unsigned num_entries = length >> PAGE_SHIFT;
1804         unsigned act_pt = first_entry / GEN6_PTES;
1805         unsigned first_pte = first_entry % GEN6_PTES;
1806         unsigned last_pte, i;
1807
1808         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1809                                      I915_CACHE_LLC, true, 0);
1810
1811         while (num_entries) {
1812                 last_pte = first_pte + num_entries;
1813                 if (last_pte > GEN6_PTES)
1814                         last_pte = GEN6_PTES;
1815
1816                 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1817
1818                 for (i = first_pte; i < last_pte; i++)
1819                         pt_vaddr[i] = scratch_pte;
1820
1821                 kunmap_px(ppgtt, pt_vaddr);
1822
1823                 num_entries -= last_pte - first_pte;
1824                 first_pte = 0;
1825                 act_pt++;
1826         }
1827 }
1828
1829 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1830                                       struct sg_table *pages,
1831                                       uint64_t start,
1832                                       enum i915_cache_level cache_level, u32 flags)
1833 {
1834         struct i915_hw_ppgtt *ppgtt =
1835                 container_of(vm, struct i915_hw_ppgtt, base);
1836         gen6_pte_t *pt_vaddr;
1837         unsigned first_entry = start >> PAGE_SHIFT;
1838         unsigned act_pt = first_entry / GEN6_PTES;
1839         unsigned act_pte = first_entry % GEN6_PTES;
1840         struct sg_page_iter sg_iter;
1841
1842         pt_vaddr = NULL;
1843         for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1844                 if (pt_vaddr == NULL)
1845                         pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1846
1847                 pt_vaddr[act_pte] =
1848                         vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1849                                        cache_level, true, flags);
1850
1851                 if (++act_pte == GEN6_PTES) {
1852                         kunmap_px(ppgtt, pt_vaddr);
1853                         pt_vaddr = NULL;
1854                         act_pt++;
1855                         act_pte = 0;
1856                 }
1857         }
1858         if (pt_vaddr)
1859                 kunmap_px(ppgtt, pt_vaddr);
1860 }
1861
1862 static int gen6_alloc_va_range(struct i915_address_space *vm,
1863                                uint64_t start_in, uint64_t length_in)
1864 {
1865         DECLARE_BITMAP(new_page_tables, I915_PDES);
1866         struct drm_device *dev = vm->dev;
1867         struct drm_i915_private *dev_priv = dev->dev_private;
1868         struct i915_hw_ppgtt *ppgtt =
1869                                 container_of(vm, struct i915_hw_ppgtt, base);
1870         struct i915_page_table *pt;
1871         uint32_t start, length, start_save, length_save;
1872         uint32_t pde, temp;
1873         int ret;
1874
1875         if (WARN_ON(start_in + length_in > ppgtt->base.total))
1876                 return -ENODEV;
1877
1878         start = start_save = start_in;
1879         length = length_save = length_in;
1880
1881         bitmap_zero(new_page_tables, I915_PDES);
1882
1883         /* The allocation is done in two stages so that we can bail out with
1884          * minimal amount of pain. The first stage finds new page tables that
1885          * need allocation. The second stage marks use ptes within the page
1886          * tables.
1887          */
1888         gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1889                 if (pt != vm->scratch_pt) {
1890                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1891                         continue;
1892                 }
1893
1894                 /* We've already allocated a page table */
1895                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1896
1897                 pt = alloc_pt(dev);
1898                 if (IS_ERR(pt)) {
1899                         ret = PTR_ERR(pt);
1900                         goto unwind_out;
1901                 }
1902
1903                 gen6_initialize_pt(vm, pt);
1904
1905                 ppgtt->pd.page_table[pde] = pt;
1906                 __set_bit(pde, new_page_tables);
1907                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1908         }
1909
1910         start = start_save;
1911         length = length_save;
1912
1913         gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1914                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1915
1916                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1917                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1918                            gen6_pte_count(start, length));
1919
1920                 if (__test_and_clear_bit(pde, new_page_tables))
1921                         gen6_write_pde(&ppgtt->pd, pde, pt);
1922
1923                 trace_i915_page_table_entry_map(vm, pde, pt,
1924                                          gen6_pte_index(start),
1925                                          gen6_pte_count(start, length),
1926                                          GEN6_PTES);
1927                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1928                                 GEN6_PTES);
1929         }
1930
1931         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1932
1933         /* Make sure write is complete before other code can use this page
1934          * table. Also require for WC mapped PTEs */
1935         readl(dev_priv->gtt.gsm);
1936
1937         mark_tlbs_dirty(ppgtt);
1938         return 0;
1939
1940 unwind_out:
1941         for_each_set_bit(pde, new_page_tables, I915_PDES) {
1942                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1943
1944                 ppgtt->pd.page_table[pde] = vm->scratch_pt;
1945                 free_pt(vm->dev, pt);
1946         }
1947
1948         mark_tlbs_dirty(ppgtt);
1949         return ret;
1950 }
1951
1952 static int gen6_init_scratch(struct i915_address_space *vm)
1953 {
1954         struct drm_device *dev = vm->dev;
1955
1956         vm->scratch_page = alloc_scratch_page(dev);
1957         if (IS_ERR(vm->scratch_page))
1958                 return PTR_ERR(vm->scratch_page);
1959
1960         vm->scratch_pt = alloc_pt(dev);
1961         if (IS_ERR(vm->scratch_pt)) {
1962                 free_scratch_page(dev, vm->scratch_page);
1963                 return PTR_ERR(vm->scratch_pt);
1964         }
1965
1966         gen6_initialize_pt(vm, vm->scratch_pt);
1967
1968         return 0;
1969 }
1970
1971 static void gen6_free_scratch(struct i915_address_space *vm)
1972 {
1973         struct drm_device *dev = vm->dev;
1974
1975         free_pt(dev, vm->scratch_pt);
1976         free_scratch_page(dev, vm->scratch_page);
1977 }
1978
1979 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1980 {
1981         struct i915_hw_ppgtt *ppgtt =
1982                 container_of(vm, struct i915_hw_ppgtt, base);
1983         struct i915_page_table *pt;
1984         uint32_t pde;
1985
1986         drm_mm_remove_node(&ppgtt->node);
1987
1988         gen6_for_all_pdes(pt, ppgtt, pde) {
1989                 if (pt != vm->scratch_pt)
1990                         free_pt(ppgtt->base.dev, pt);
1991         }
1992
1993         gen6_free_scratch(vm);
1994 }
1995
1996 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1997 {
1998         struct i915_address_space *vm = &ppgtt->base;
1999         struct drm_device *dev = ppgtt->base.dev;
2000         struct drm_i915_private *dev_priv = dev->dev_private;
2001         bool retried = false;
2002         int ret;
2003
2004         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2005          * allocator works in address space sizes, so it's multiplied by page
2006          * size. We allocate at the top of the GTT to avoid fragmentation.
2007          */
2008         BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
2009
2010         ret = gen6_init_scratch(vm);
2011         if (ret)
2012                 return ret;
2013
2014 alloc:
2015         ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2016                                                   &ppgtt->node, GEN6_PD_SIZE,
2017                                                   GEN6_PD_ALIGN, 0,
2018                                                   0, dev_priv->gtt.base.total,
2019                                                   DRM_MM_TOPDOWN);
2020         if (ret == -ENOSPC && !retried) {
2021                 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2022                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
2023                                                I915_CACHE_NONE,
2024                                                0, dev_priv->gtt.base.total,
2025                                                0);
2026                 if (ret)
2027                         goto err_out;
2028
2029                 retried = true;
2030                 goto alloc;
2031         }
2032
2033         if (ret)
2034                 goto err_out;
2035
2036
2037         if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2038                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2039
2040         return 0;
2041
2042 err_out:
2043         gen6_free_scratch(vm);
2044         return ret;
2045 }
2046
2047 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2048 {
2049         return gen6_ppgtt_allocate_page_directories(ppgtt);
2050 }
2051
2052 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2053                                   uint64_t start, uint64_t length)
2054 {
2055         struct i915_page_table *unused;
2056         uint32_t pde, temp;
2057
2058         gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2059                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2060 }
2061
2062 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2063 {
2064         struct drm_device *dev = ppgtt->base.dev;
2065         struct drm_i915_private *dev_priv = dev->dev_private;
2066         int ret;
2067
2068         ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2069         if (IS_GEN6(dev)) {
2070                 ppgtt->switch_mm = gen6_mm_switch;
2071         } else if (IS_HASWELL(dev)) {
2072                 ppgtt->switch_mm = hsw_mm_switch;
2073         } else if (IS_GEN7(dev)) {
2074                 ppgtt->switch_mm = gen7_mm_switch;
2075         } else
2076                 BUG();
2077
2078         if (intel_vgpu_active(dev))
2079                 ppgtt->switch_mm = vgpu_mm_switch;
2080
2081         ret = gen6_ppgtt_alloc(ppgtt);
2082         if (ret)
2083                 return ret;
2084
2085         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2086         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2087         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2088         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2089         ppgtt->base.bind_vma = ppgtt_bind_vma;
2090         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2091         ppgtt->base.start = 0;
2092         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2093         ppgtt->debug_dump = gen6_dump_ppgtt;
2094
2095         ppgtt->pd.base.ggtt_offset =
2096                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2097
2098         ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2099                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2100
2101         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2102
2103         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2104
2105         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2106                          ppgtt->node.size >> 20,
2107                          ppgtt->node.start / PAGE_SIZE);
2108
2109         DRM_DEBUG("Adding PPGTT at offset %x\n",
2110                   ppgtt->pd.base.ggtt_offset << 10);
2111
2112         return 0;
2113 }
2114
2115 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2116 {
2117         ppgtt->base.dev = dev;
2118
2119         if (INTEL_INFO(dev)->gen < 8)
2120                 return gen6_ppgtt_init(ppgtt);
2121         else
2122                 return gen8_ppgtt_init(ppgtt);
2123 }
2124
2125 static void i915_address_space_init(struct i915_address_space *vm,
2126                                     struct drm_i915_private *dev_priv)
2127 {
2128         drm_mm_init(&vm->mm, vm->start, vm->total);
2129         vm->dev = dev_priv->dev;
2130         INIT_LIST_HEAD(&vm->active_list);
2131         INIT_LIST_HEAD(&vm->inactive_list);
2132         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2133 }
2134
2135 static void gtt_write_workarounds(struct drm_device *dev)
2136 {
2137         struct drm_i915_private *dev_priv = dev->dev_private;
2138
2139         /* This function is for gtt related workarounds. This function is
2140          * called on driver load and after a GPU reset, so you can place
2141          * workarounds here even if they get overwritten by GPU reset.
2142          */
2143         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2144         if (IS_BROADWELL(dev))
2145                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2146         else if (IS_CHERRYVIEW(dev))
2147                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2148         else if (IS_SKYLAKE(dev))
2149                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2150         else if (IS_BROXTON(dev))
2151                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2152 }
2153
2154 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2155 {
2156         struct drm_i915_private *dev_priv = dev->dev_private;
2157         int ret = 0;
2158
2159         ret = __hw_ppgtt_init(dev, ppgtt);
2160         if (ret == 0) {
2161                 kref_init(&ppgtt->ref);
2162                 i915_address_space_init(&ppgtt->base, dev_priv);
2163         }
2164
2165         return ret;
2166 }
2167
2168 int i915_ppgtt_init_hw(struct drm_device *dev)
2169 {
2170         gtt_write_workarounds(dev);
2171
2172         /* In the case of execlists, PPGTT is enabled by the context descriptor
2173          * and the PDPs are contained within the context itself.  We don't
2174          * need to do anything here. */
2175         if (i915.enable_execlists)
2176                 return 0;
2177
2178         if (!USES_PPGTT(dev))
2179                 return 0;
2180
2181         if (IS_GEN6(dev))
2182                 gen6_ppgtt_enable(dev);
2183         else if (IS_GEN7(dev))
2184                 gen7_ppgtt_enable(dev);
2185         else if (INTEL_INFO(dev)->gen >= 8)
2186                 gen8_ppgtt_enable(dev);
2187         else
2188                 MISSING_CASE(INTEL_INFO(dev)->gen);
2189
2190         return 0;
2191 }
2192
2193 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2194 {
2195         struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2196         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2197
2198         if (i915.enable_execlists)
2199                 return 0;
2200
2201         if (!ppgtt)
2202                 return 0;
2203
2204         return ppgtt->switch_mm(ppgtt, req);
2205 }
2206
2207 struct i915_hw_ppgtt *
2208 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2209 {
2210         struct i915_hw_ppgtt *ppgtt;
2211         int ret;
2212
2213         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2214         if (!ppgtt)
2215                 return ERR_PTR(-ENOMEM);
2216
2217         ret = i915_ppgtt_init(dev, ppgtt);
2218         if (ret) {
2219                 kfree(ppgtt);
2220                 return ERR_PTR(ret);
2221         }
2222
2223         ppgtt->file_priv = fpriv;
2224
2225         trace_i915_ppgtt_create(&ppgtt->base);
2226
2227         return ppgtt;
2228 }
2229
2230 void  i915_ppgtt_release(struct kref *kref)
2231 {
2232         struct i915_hw_ppgtt *ppgtt =
2233                 container_of(kref, struct i915_hw_ppgtt, ref);
2234
2235         trace_i915_ppgtt_release(&ppgtt->base);
2236
2237         /* vmas should already be unbound */
2238         WARN_ON(!list_empty(&ppgtt->base.active_list));
2239         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2240
2241         list_del(&ppgtt->base.global_link);
2242         drm_mm_takedown(&ppgtt->base.mm);
2243
2244         ppgtt->base.cleanup(&ppgtt->base);
2245         kfree(ppgtt);
2246 }
2247
2248 extern int intel_iommu_gfx_mapped;
2249 /* Certain Gen5 chipsets require require idling the GPU before
2250  * unmapping anything from the GTT when VT-d is enabled.
2251  */
2252 static bool needs_idle_maps(struct drm_device *dev)
2253 {
2254 #ifdef CONFIG_INTEL_IOMMU
2255         /* Query intel_iommu to see if we need the workaround. Presumably that
2256          * was loaded first.
2257          */
2258         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2259                 return true;
2260 #endif
2261         return false;
2262 }
2263
2264 static bool do_idling(struct drm_i915_private *dev_priv)
2265 {
2266         bool ret = dev_priv->mm.interruptible;
2267
2268         if (unlikely(dev_priv->gtt.do_idle_maps)) {
2269                 dev_priv->mm.interruptible = false;
2270                 if (i915_gpu_idle(dev_priv->dev)) {
2271                         DRM_ERROR("Couldn't idle GPU\n");
2272                         /* Wait a bit, in hopes it avoids the hang */
2273                         udelay(10);
2274                 }
2275         }
2276
2277         return ret;
2278 }
2279
2280 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2281 {
2282         if (unlikely(dev_priv->gtt.do_idle_maps))
2283                 dev_priv->mm.interruptible = interruptible;
2284 }
2285
2286 void i915_check_and_clear_faults(struct drm_device *dev)
2287 {
2288         struct drm_i915_private *dev_priv = dev->dev_private;
2289         struct intel_engine_cs *ring;
2290         int i;
2291
2292         if (INTEL_INFO(dev)->gen < 6)
2293                 return;
2294
2295         for_each_ring(ring, dev_priv, i) {
2296                 u32 fault_reg;
2297                 fault_reg = I915_READ(RING_FAULT_REG(ring));
2298                 if (fault_reg & RING_FAULT_VALID) {
2299                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2300                                          "\tAddr: 0x%08lx\n"
2301                                          "\tAddress space: %s\n"
2302                                          "\tSource ID: %d\n"
2303                                          "\tType: %d\n",
2304                                          fault_reg & PAGE_MASK,
2305                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2306                                          RING_FAULT_SRCID(fault_reg),
2307                                          RING_FAULT_FAULT_TYPE(fault_reg));
2308                         I915_WRITE(RING_FAULT_REG(ring),
2309                                    fault_reg & ~RING_FAULT_VALID);
2310                 }
2311         }
2312         POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2313 }
2314
2315 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2316 {
2317         if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2318                 intel_gtt_chipset_flush();
2319         } else {
2320                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2321                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2322         }
2323 }
2324
2325 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2326 {
2327         struct drm_i915_private *dev_priv = dev->dev_private;
2328
2329         /* Don't bother messing with faults pre GEN6 as we have little
2330          * documentation supporting that it's a good idea.
2331          */
2332         if (INTEL_INFO(dev)->gen < 6)
2333                 return;
2334
2335         i915_check_and_clear_faults(dev);
2336
2337         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2338                                        dev_priv->gtt.base.start,
2339                                        dev_priv->gtt.base.total,
2340                                        true);
2341
2342         i915_ggtt_flush(dev_priv);
2343 }
2344
2345 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2346 {
2347         if (!dma_map_sg(&obj->base.dev->pdev->dev,
2348                         obj->pages->sgl, obj->pages->nents,
2349                         PCI_DMA_BIDIRECTIONAL))
2350                 return -ENOSPC;
2351
2352         return 0;
2353 }
2354
2355 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2356 {
2357 #ifdef writeq
2358         writeq(pte, addr);
2359 #else
2360         iowrite32((u32)pte, addr);
2361         iowrite32(pte >> 32, addr + 4);
2362 #endif
2363 }
2364
2365 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2366                                      struct sg_table *st,
2367                                      uint64_t start,
2368                                      enum i915_cache_level level, u32 unused)
2369 {
2370         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2371         unsigned first_entry = start >> PAGE_SHIFT;
2372         gen8_pte_t __iomem *gtt_entries =
2373                 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2374         int i = 0;
2375         struct sg_page_iter sg_iter;
2376         dma_addr_t addr = 0; /* shut up gcc */
2377         int rpm_atomic_seq;
2378
2379         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2380
2381         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2382                 addr = sg_dma_address(sg_iter.sg) +
2383                         (sg_iter.sg_pgoffset << PAGE_SHIFT);
2384                 gen8_set_pte(&gtt_entries[i],
2385                              gen8_pte_encode(addr, level, true));
2386                 i++;
2387         }
2388
2389         /*
2390          * XXX: This serves as a posting read to make sure that the PTE has
2391          * actually been updated. There is some concern that even though
2392          * registers and PTEs are within the same BAR that they are potentially
2393          * of NUMA access patterns. Therefore, even with the way we assume
2394          * hardware should work, we must keep this posting read for paranoia.
2395          */
2396         if (i != 0)
2397                 WARN_ON(readq(&gtt_entries[i-1])
2398                         != gen8_pte_encode(addr, level, true));
2399
2400         /* This next bit makes the above posting read even more important. We
2401          * want to flush the TLBs only after we're certain all the PTE updates
2402          * have finished.
2403          */
2404         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2405         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2406
2407         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2408 }
2409
2410 struct insert_entries {
2411         struct i915_address_space *vm;
2412         struct sg_table *st;
2413         uint64_t start;
2414         enum i915_cache_level level;
2415         u32 flags;
2416 };
2417
2418 static int gen8_ggtt_insert_entries__cb(void *_arg)
2419 {
2420         struct insert_entries *arg = _arg;
2421         gen8_ggtt_insert_entries(arg->vm, arg->st,
2422                                  arg->start, arg->level, arg->flags);
2423         return 0;
2424 }
2425
2426 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2427                                           struct sg_table *st,
2428                                           uint64_t start,
2429                                           enum i915_cache_level level,
2430                                           u32 flags)
2431 {
2432         struct insert_entries arg = { vm, st, start, level, flags };
2433         stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2434 }
2435
2436 /*
2437  * Binds an object into the global gtt with the specified cache level. The object
2438  * will be accessible to the GPU via commands whose operands reference offsets
2439  * within the global GTT as well as accessible by the GPU through the GMADR
2440  * mapped BAR (dev_priv->mm.gtt->gtt).
2441  */
2442 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2443                                      struct sg_table *st,
2444                                      uint64_t start,
2445                                      enum i915_cache_level level, u32 flags)
2446 {
2447         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2448         unsigned first_entry = start >> PAGE_SHIFT;
2449         gen6_pte_t __iomem *gtt_entries =
2450                 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2451         int i = 0;
2452         struct sg_page_iter sg_iter;
2453         dma_addr_t addr = 0;
2454         int rpm_atomic_seq;
2455
2456         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2457
2458         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2459                 addr = sg_page_iter_dma_address(&sg_iter);
2460                 iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
2461                 i++;
2462         }
2463
2464         /* XXX: This serves as a posting read to make sure that the PTE has
2465          * actually been updated. There is some concern that even though
2466          * registers and PTEs are within the same BAR that they are potentially
2467          * of NUMA access patterns. Therefore, even with the way we assume
2468          * hardware should work, we must keep this posting read for paranoia.
2469          */
2470         if (i != 0) {
2471                 unsigned long gtt = readl(&gtt_entries[i-1]);
2472                 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2473         }
2474
2475         /* This next bit makes the above posting read even more important. We
2476          * want to flush the TLBs only after we're certain all the PTE updates
2477          * have finished.
2478          */
2479         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2480         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2481
2482         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2483 }
2484
2485 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2486                                   uint64_t start,
2487                                   uint64_t length,
2488                                   bool use_scratch)
2489 {
2490         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2491         unsigned first_entry = start >> PAGE_SHIFT;
2492         unsigned num_entries = length >> PAGE_SHIFT;
2493         gen8_pte_t scratch_pte, __iomem *gtt_base =
2494                 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2495         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2496         int i;
2497         int rpm_atomic_seq;
2498
2499         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2500
2501         if (WARN(num_entries > max_entries,
2502                  "First entry = %d; Num entries = %d (max=%d)\n",
2503                  first_entry, num_entries, max_entries))
2504                 num_entries = max_entries;
2505
2506         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2507                                       I915_CACHE_LLC,
2508                                       use_scratch);
2509         for (i = 0; i < num_entries; i++)
2510                 gen8_set_pte(&gtt_base[i], scratch_pte);
2511         readl(gtt_base);
2512
2513         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2514 }
2515
2516 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2517                                   uint64_t start,
2518                                   uint64_t length,
2519                                   bool use_scratch)
2520 {
2521         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2522         unsigned first_entry = start >> PAGE_SHIFT;
2523         unsigned num_entries = length >> PAGE_SHIFT;
2524         gen6_pte_t scratch_pte, __iomem *gtt_base =
2525                 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2526         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2527         int i;
2528         int rpm_atomic_seq;
2529
2530         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2531
2532         if (WARN(num_entries > max_entries,
2533                  "First entry = %d; Num entries = %d (max=%d)\n",
2534                  first_entry, num_entries, max_entries))
2535                 num_entries = max_entries;
2536
2537         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2538                                      I915_CACHE_LLC, use_scratch, 0);
2539
2540         for (i = 0; i < num_entries; i++)
2541                 iowrite32(scratch_pte, &gtt_base[i]);
2542         readl(gtt_base);
2543
2544         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2545 }
2546
2547 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2548                                      struct sg_table *pages,
2549                                      uint64_t start,
2550                                      enum i915_cache_level cache_level, u32 unused)
2551 {
2552         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2553         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2554                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2555         int rpm_atomic_seq;
2556
2557         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2558
2559         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2560
2561         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2562
2563 }
2564
2565 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2566                                   uint64_t start,
2567                                   uint64_t length,
2568                                   bool unused)
2569 {
2570         struct drm_i915_private *dev_priv = vm->dev->dev_private;
2571         unsigned first_entry = start >> PAGE_SHIFT;
2572         unsigned num_entries = length >> PAGE_SHIFT;
2573         int rpm_atomic_seq;
2574
2575         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2576
2577         intel_gtt_clear_range(first_entry, num_entries);
2578
2579         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2580 }
2581
2582 static int ggtt_bind_vma(struct i915_vma *vma,
2583                          enum i915_cache_level cache_level,
2584                          u32 flags)
2585 {
2586         struct drm_i915_gem_object *obj = vma->obj;
2587         u32 pte_flags = 0;
2588         int ret;
2589
2590         ret = i915_get_ggtt_vma_pages(vma);
2591         if (ret)
2592                 return ret;
2593
2594         /* Currently applicable only to VLV */
2595         if (obj->gt_ro)
2596                 pte_flags |= PTE_READ_ONLY;
2597
2598         vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2599                                 vma->node.start,
2600                                 cache_level, pte_flags);
2601
2602         /*
2603          * Without aliasing PPGTT there's no difference between
2604          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2605          * upgrade to both bound if we bind either to avoid double-binding.
2606          */
2607         vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2608
2609         return 0;
2610 }
2611
2612 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2613                                  enum i915_cache_level cache_level,
2614                                  u32 flags)
2615 {
2616         struct drm_device *dev = vma->vm->dev;
2617         struct drm_i915_private *dev_priv = dev->dev_private;
2618         struct drm_i915_gem_object *obj = vma->obj;
2619         struct sg_table *pages = obj->pages;
2620         u32 pte_flags = 0;
2621         int ret;
2622
2623         ret = i915_get_ggtt_vma_pages(vma);
2624         if (ret)
2625                 return ret;
2626         pages = vma->ggtt_view.pages;
2627
2628         /* Currently applicable only to VLV */
2629         if (obj->gt_ro)
2630                 pte_flags |= PTE_READ_ONLY;
2631
2632
2633         if (flags & GLOBAL_BIND) {
2634                 vma->vm->insert_entries(vma->vm, pages,
2635                                         vma->node.start,
2636                                         cache_level, pte_flags);
2637         }
2638
2639         if (flags & LOCAL_BIND) {
2640                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2641                 appgtt->base.insert_entries(&appgtt->base, pages,
2642                                             vma->node.start,
2643                                             cache_level, pte_flags);
2644         }
2645
2646         return 0;
2647 }
2648
2649 static void ggtt_unbind_vma(struct i915_vma *vma)
2650 {
2651         struct drm_device *dev = vma->vm->dev;
2652         struct drm_i915_private *dev_priv = dev->dev_private;
2653         struct drm_i915_gem_object *obj = vma->obj;
2654         const uint64_t size = min_t(uint64_t,
2655                                     obj->base.size,
2656                                     vma->node.size);
2657
2658         if (vma->bound & GLOBAL_BIND) {
2659                 vma->vm->clear_range(vma->vm,
2660                                      vma->node.start,
2661                                      size,
2662                                      true);
2663         }
2664
2665         if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2666                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2667
2668                 appgtt->base.clear_range(&appgtt->base,
2669                                          vma->node.start,
2670                                          size,
2671                                          true);
2672         }
2673 }
2674
2675 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2676 {
2677         struct drm_device *dev = obj->base.dev;
2678         struct drm_i915_private *dev_priv = dev->dev_private;
2679         bool interruptible;
2680
2681         interruptible = do_idling(dev_priv);
2682
2683         dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2684                      PCI_DMA_BIDIRECTIONAL);
2685
2686         undo_idling(dev_priv, interruptible);
2687 }
2688
2689 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2690                                   unsigned long color,
2691                                   u64 *start,
2692                                   u64 *end)
2693 {
2694         if (node->color != color)
2695                 *start += 4096;
2696
2697         if (!list_empty(&node->node_list)) {
2698                 node = list_entry(node->node_list.next,
2699                                   struct drm_mm_node,
2700                                   node_list);
2701                 if (node->allocated && node->color != color)
2702                         *end -= 4096;
2703         }
2704 }
2705
2706 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2707                                      u64 start,
2708                                      u64 mappable_end,
2709                                      u64 end)
2710 {
2711         /* Let GEM Manage all of the aperture.
2712          *
2713          * However, leave one page at the end still bound to the scratch page.
2714          * There are a number of places where the hardware apparently prefetches
2715          * past the end of the object, and we've seen multiple hangs with the
2716          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2717          * aperture.  One page should be enough to keep any prefetching inside
2718          * of the aperture.
2719          */
2720         struct drm_i915_private *dev_priv = dev->dev_private;
2721         struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2722         struct drm_mm_node *entry;
2723         struct drm_i915_gem_object *obj;
2724         unsigned long hole_start, hole_end;
2725         int ret;
2726
2727         BUG_ON(mappable_end > end);
2728
2729         ggtt_vm->start = start;
2730
2731         /* Subtract the guard page before address space initialization to
2732          * shrink the range used by drm_mm */
2733         ggtt_vm->total = end - start - PAGE_SIZE;
2734         i915_address_space_init(ggtt_vm, dev_priv);
2735         ggtt_vm->total += PAGE_SIZE;
2736
2737         if (intel_vgpu_active(dev)) {
2738                 ret = intel_vgt_balloon(dev);
2739                 if (ret)
2740                         return ret;
2741         }
2742
2743         if (!HAS_LLC(dev))
2744                 ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
2745
2746         /* Mark any preallocated objects as occupied */
2747         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2748                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2749
2750                 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2751                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
2752
2753                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2754                 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2755                 if (ret) {
2756                         DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2757                         return ret;
2758                 }
2759                 vma->bound |= GLOBAL_BIND;
2760                 __i915_vma_set_map_and_fenceable(vma);
2761                 list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
2762         }
2763
2764         /* Clear any non-preallocated blocks */
2765         drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2766                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2767                               hole_start, hole_end);
2768                 ggtt_vm->clear_range(ggtt_vm, hole_start,
2769                                      hole_end - hole_start, true);
2770         }
2771
2772         /* And finally clear the reserved guard page */
2773         ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2774
2775         if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2776                 struct i915_hw_ppgtt *ppgtt;
2777
2778                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2779                 if (!ppgtt)
2780                         return -ENOMEM;
2781
2782                 ret = __hw_ppgtt_init(dev, ppgtt);
2783                 if (ret) {
2784                         ppgtt->base.cleanup(&ppgtt->base);
2785                         kfree(ppgtt);
2786                         return ret;
2787                 }
2788
2789                 if (ppgtt->base.allocate_va_range)
2790                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2791                                                             ppgtt->base.total);
2792                 if (ret) {
2793                         ppgtt->base.cleanup(&ppgtt->base);
2794                         kfree(ppgtt);
2795                         return ret;
2796                 }
2797
2798                 ppgtt->base.clear_range(&ppgtt->base,
2799                                         ppgtt->base.start,
2800                                         ppgtt->base.total,
2801                                         true);
2802
2803                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2804                 WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
2805                 dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
2806         }
2807
2808         return 0;
2809 }
2810
2811 void i915_gem_init_global_gtt(struct drm_device *dev)
2812 {
2813         struct drm_i915_private *dev_priv = dev->dev_private;
2814         u64 gtt_size, mappable_size;
2815
2816         gtt_size = dev_priv->gtt.base.total;
2817         mappable_size = dev_priv->gtt.mappable_end;
2818
2819         i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2820 }
2821
2822 void i915_global_gtt_cleanup(struct drm_device *dev)
2823 {
2824         struct drm_i915_private *dev_priv = dev->dev_private;
2825         struct i915_address_space *vm = &dev_priv->gtt.base;
2826
2827         if (dev_priv->mm.aliasing_ppgtt) {
2828                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2829
2830                 ppgtt->base.cleanup(&ppgtt->base);
2831         }
2832
2833         i915_gem_cleanup_stolen(dev);
2834
2835         if (drm_mm_initialized(&vm->mm)) {
2836                 if (intel_vgpu_active(dev))
2837                         intel_vgt_deballoon();
2838
2839                 drm_mm_takedown(&vm->mm);
2840                 list_del(&vm->global_link);
2841         }
2842
2843         vm->cleanup(vm);
2844 }
2845
2846 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2847 {
2848         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2849         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2850         return snb_gmch_ctl << 20;
2851 }
2852
2853 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2854 {
2855         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2856         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2857         if (bdw_gmch_ctl)
2858                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2859
2860 #ifdef CONFIG_X86_32
2861         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2862         if (bdw_gmch_ctl > 4)
2863                 bdw_gmch_ctl = 4;
2864 #endif
2865
2866         return bdw_gmch_ctl << 20;
2867 }
2868
2869 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2870 {
2871         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2872         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2873
2874         if (gmch_ctrl)
2875                 return 1 << (20 + gmch_ctrl);
2876
2877         return 0;
2878 }
2879
2880 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2881 {
2882         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2883         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2884         return snb_gmch_ctl << 25; /* 32 MB units */
2885 }
2886
2887 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2888 {
2889         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2890         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2891         return bdw_gmch_ctl << 25; /* 32 MB units */
2892 }
2893
2894 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2895 {
2896         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2897         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2898
2899         /*
2900          * 0x0  to 0x10: 32MB increments starting at 0MB
2901          * 0x11 to 0x16: 4MB increments starting at 8MB
2902          * 0x17 to 0x1d: 4MB increments start at 36MB
2903          */
2904         if (gmch_ctrl < 0x11)
2905                 return gmch_ctrl << 25;
2906         else if (gmch_ctrl < 0x17)
2907                 return (gmch_ctrl - 0x11 + 2) << 22;
2908         else
2909                 return (gmch_ctrl - 0x17 + 9) << 22;
2910 }
2911
2912 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2913 {
2914         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2915         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2916
2917         if (gen9_gmch_ctl < 0xf0)
2918                 return gen9_gmch_ctl << 25; /* 32 MB units */
2919         else
2920                 /* 4MB increments starting at 0xf0 for 4MB */
2921                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2922 }
2923
2924 static int ggtt_probe_common(struct drm_device *dev,
2925                              size_t gtt_size)
2926 {
2927         struct drm_i915_private *dev_priv = dev->dev_private;
2928         struct i915_page_scratch *scratch_page;
2929         phys_addr_t gtt_phys_addr;
2930
2931         /* For Modern GENs the PTEs and register space are split in the BAR */
2932         gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2933                 (pci_resource_len(dev->pdev, 0) / 2);
2934
2935         /*
2936          * On BXT writes larger than 64 bit to the GTT pagetable range will be
2937          * dropped. For WC mappings in general we have 64 byte burst writes
2938          * when the WC buffer is flushed, so we can't use it, but have to
2939          * resort to an uncached mapping. The WC issue is easily caught by the
2940          * readback check when writing GTT PTE entries.
2941          */
2942         if (IS_BROXTON(dev))
2943                 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2944         else
2945                 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2946         if (!dev_priv->gtt.gsm) {
2947                 DRM_ERROR("Failed to map the gtt page table\n");
2948                 return -ENOMEM;
2949         }
2950
2951         scratch_page = alloc_scratch_page(dev);
2952         if (IS_ERR(scratch_page)) {
2953                 DRM_ERROR("Scratch setup failed\n");
2954                 /* iounmap will also get called at remove, but meh */
2955                 iounmap(dev_priv->gtt.gsm);
2956                 return PTR_ERR(scratch_page);
2957         }
2958
2959         dev_priv->gtt.base.scratch_page = scratch_page;
2960
2961         return 0;
2962 }
2963
2964 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2965  * bits. When using advanced contexts each context stores its own PAT, but
2966  * writing this data shouldn't be harmful even in those cases. */
2967 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2968 {
2969         uint64_t pat;
2970
2971         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2972               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2973               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2974               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2975               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2976               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2977               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2978               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2979
2980         if (!USES_PPGTT(dev_priv->dev))
2981                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2982                  * so RTL will always use the value corresponding to
2983                  * pat_sel = 000".
2984                  * So let's disable cache for GGTT to avoid screen corruptions.
2985                  * MOCS still can be used though.
2986                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2987                  * before this patch, i.e. the same uncached + snooping access
2988                  * like on gen6/7 seems to be in effect.
2989                  * - So this just fixes blitter/render access. Again it looks
2990                  * like it's not just uncached access, but uncached + snooping.
2991                  * So we can still hold onto all our assumptions wrt cpu
2992                  * clflushing on LLC machines.
2993                  */
2994                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2995
2996         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2997          * write would work. */
2998         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2999         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3000 }
3001
3002 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3003 {
3004         uint64_t pat;
3005
3006         /*
3007          * Map WB on BDW to snooped on CHV.
3008          *
3009          * Only the snoop bit has meaning for CHV, the rest is
3010          * ignored.
3011          *
3012          * The hardware will never snoop for certain types of accesses:
3013          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3014          * - PPGTT page tables
3015          * - some other special cycles
3016          *
3017          * As with BDW, we also need to consider the following for GT accesses:
3018          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3019          * so RTL will always use the value corresponding to
3020          * pat_sel = 000".
3021          * Which means we must set the snoop bit in PAT entry 0
3022          * in order to keep the global status page working.
3023          */
3024         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3025               GEN8_PPAT(1, 0) |
3026               GEN8_PPAT(2, 0) |
3027               GEN8_PPAT(3, 0) |
3028               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3029               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3030               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3031               GEN8_PPAT(7, CHV_PPAT_SNOOP);
3032
3033         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3034         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3035 }
3036
3037 static int gen8_gmch_probe(struct drm_device *dev,
3038                            u64 *gtt_total,
3039                            size_t *stolen,
3040                            phys_addr_t *mappable_base,
3041                            u64 *mappable_end)
3042 {
3043         struct drm_i915_private *dev_priv = dev->dev_private;
3044         u64 gtt_size;
3045         u16 snb_gmch_ctl;
3046         int ret;
3047
3048         /* TODO: We're not aware of mappable constraints on gen8 yet */
3049         *mappable_base = pci_resource_start(dev->pdev, 2);
3050         *mappable_end = pci_resource_len(dev->pdev, 2);
3051
3052         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3053                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3054
3055         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3056
3057         if (INTEL_INFO(dev)->gen >= 9) {
3058                 *stolen = gen9_get_stolen_size(snb_gmch_ctl);
3059                 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3060         } else if (IS_CHERRYVIEW(dev)) {
3061                 *stolen = chv_get_stolen_size(snb_gmch_ctl);
3062                 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3063         } else {
3064                 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
3065                 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3066         }
3067
3068         *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3069
3070         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3071                 chv_setup_private_ppat(dev_priv);
3072         else
3073                 bdw_setup_private_ppat(dev_priv);
3074
3075         ret = ggtt_probe_common(dev, gtt_size);
3076
3077         dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3078         dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
3079         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3080         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3081
3082         if (IS_CHERRYVIEW(dev_priv))
3083                 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries__BKL;
3084
3085         return ret;
3086 }
3087
3088 static int gen6_gmch_probe(struct drm_device *dev,
3089                            u64 *gtt_total,
3090                            size_t *stolen,
3091                            phys_addr_t *mappable_base,
3092                            u64 *mappable_end)
3093 {
3094         struct drm_i915_private *dev_priv = dev->dev_private;
3095         unsigned int gtt_size;
3096         u16 snb_gmch_ctl;
3097         int ret;
3098
3099         *mappable_base = pci_resource_start(dev->pdev, 2);
3100         *mappable_end = pci_resource_len(dev->pdev, 2);
3101
3102         /* 64/512MB is the current min/max we actually know of, but this is just
3103          * a coarse sanity check.
3104          */
3105         if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
3106                 DRM_ERROR("Unknown GMADR size (%llx)\n",
3107                           dev_priv->gtt.mappable_end);
3108                 return -ENXIO;
3109         }
3110
3111         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3112                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3113         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3114
3115         *stolen = gen6_get_stolen_size(snb_gmch_ctl);
3116
3117         gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
3118         *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3119
3120         ret = ggtt_probe_common(dev, gtt_size);
3121
3122         dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3123         dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
3124         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3125         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3126
3127         return ret;
3128 }
3129
3130 static void gen6_gmch_remove(struct i915_address_space *vm)
3131 {
3132
3133         struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
3134
3135         iounmap(gtt->gsm);
3136         free_scratch_page(vm->dev, vm->scratch_page);
3137 }
3138
3139 static int i915_gmch_probe(struct drm_device *dev,
3140                            u64 *gtt_total,
3141                            size_t *stolen,
3142                            phys_addr_t *mappable_base,
3143                            u64 *mappable_end)
3144 {
3145         struct drm_i915_private *dev_priv = dev->dev_private;
3146         int ret;
3147
3148         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3149         if (!ret) {
3150                 DRM_ERROR("failed to set up gmch\n");
3151                 return -EIO;
3152         }
3153
3154         intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3155
3156         dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
3157         dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
3158         dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
3159         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3160         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3161
3162         if (unlikely(dev_priv->gtt.do_idle_maps))
3163                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3164
3165         return 0;
3166 }
3167
3168 static void i915_gmch_remove(struct i915_address_space *vm)
3169 {
3170         intel_gmch_remove();
3171 }
3172
3173 int i915_gem_gtt_init(struct drm_device *dev)
3174 {
3175         struct drm_i915_private *dev_priv = dev->dev_private;
3176         struct i915_gtt *gtt = &dev_priv->gtt;
3177         int ret;
3178
3179         if (INTEL_INFO(dev)->gen <= 5) {
3180                 gtt->gtt_probe = i915_gmch_probe;
3181                 gtt->base.cleanup = i915_gmch_remove;
3182         } else if (INTEL_INFO(dev)->gen < 8) {
3183                 gtt->gtt_probe = gen6_gmch_probe;
3184                 gtt->base.cleanup = gen6_gmch_remove;
3185                 if (IS_HASWELL(dev) && dev_priv->ellc_size)
3186                         gtt->base.pte_encode = iris_pte_encode;
3187                 else if (IS_HASWELL(dev))
3188                         gtt->base.pte_encode = hsw_pte_encode;
3189                 else if (IS_VALLEYVIEW(dev))
3190                         gtt->base.pte_encode = byt_pte_encode;
3191                 else if (INTEL_INFO(dev)->gen >= 7)
3192                         gtt->base.pte_encode = ivb_pte_encode;
3193                 else
3194                         gtt->base.pte_encode = snb_pte_encode;
3195         } else {
3196                 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3197                 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3198         }
3199
3200         gtt->base.dev = dev;
3201
3202         ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
3203                              &gtt->mappable_base, &gtt->mappable_end);
3204         if (ret)
3205                 return ret;
3206
3207         /*
3208          * Initialise stolen early so that we may reserve preallocated
3209          * objects for the BIOS to KMS transition.
3210          */
3211         ret = i915_gem_init_stolen(dev);
3212         if (ret)
3213                 goto out_gtt_cleanup;
3214
3215         /* GMADR is the PCI mmio aperture into the global GTT. */
3216         DRM_INFO("Memory usable by graphics device = %lluM\n",
3217                  gtt->base.total >> 20);
3218         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
3219         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
3220 #ifdef CONFIG_INTEL_IOMMU
3221         if (intel_iommu_gfx_mapped)
3222                 DRM_INFO("VT-d active for gfx access\n");
3223 #endif
3224         /*
3225          * i915.enable_ppgtt is read-only, so do an early pass to validate the
3226          * user's requested state against the hardware/driver capabilities.  We
3227          * do this now so that we can print out any log messages once rather
3228          * than every time we check intel_enable_ppgtt().
3229          */
3230         i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3231         DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3232
3233         return 0;
3234
3235 out_gtt_cleanup:
3236         gtt->base.cleanup(&dev_priv->gtt.base);
3237
3238         return ret;
3239 }
3240
3241 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3242 {
3243         struct drm_i915_private *dev_priv = dev->dev_private;
3244         struct drm_i915_gem_object *obj;
3245         struct i915_address_space *vm;
3246         struct i915_vma *vma;
3247         bool flush;
3248
3249         i915_check_and_clear_faults(dev);
3250
3251         /* First fill our portion of the GTT with scratch pages */
3252         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3253                                        dev_priv->gtt.base.start,
3254                                        dev_priv->gtt.base.total,
3255                                        true);
3256
3257         /* Cache flush objects bound into GGTT and rebind them. */
3258         vm = &dev_priv->gtt.base;
3259         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3260                 flush = false;
3261                 list_for_each_entry(vma, &obj->vma_list, vma_link) {
3262                         if (vma->vm != vm)
3263                                 continue;
3264
3265                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3266                                               PIN_UPDATE));
3267
3268                         flush = true;
3269                 }
3270
3271                 if (flush)
3272                         i915_gem_clflush_object(obj, obj->pin_display);
3273         }
3274
3275         if (INTEL_INFO(dev)->gen >= 8) {
3276                 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3277                         chv_setup_private_ppat(dev_priv);
3278                 else
3279                         bdw_setup_private_ppat(dev_priv);
3280
3281                 return;
3282         }
3283
3284         if (USES_PPGTT(dev)) {
3285                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3286                         /* TODO: Perhaps it shouldn't be gen6 specific */
3287
3288                         struct i915_hw_ppgtt *ppgtt =
3289                                         container_of(vm, struct i915_hw_ppgtt,
3290                                                      base);
3291
3292                         if (i915_is_ggtt(vm))
3293                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3294
3295                         gen6_write_page_range(dev_priv, &ppgtt->pd,
3296                                               0, ppgtt->base.total);
3297                 }
3298         }
3299
3300         i915_ggtt_flush(dev_priv);
3301 }
3302
3303 static struct i915_vma *
3304 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3305                       struct i915_address_space *vm,
3306                       const struct i915_ggtt_view *ggtt_view)
3307 {
3308         struct i915_vma *vma;
3309
3310         if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3311                 return ERR_PTR(-EINVAL);
3312
3313         vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3314         if (vma == NULL)
3315                 return ERR_PTR(-ENOMEM);
3316
3317         INIT_LIST_HEAD(&vma->vma_link);
3318         INIT_LIST_HEAD(&vma->mm_list);
3319         INIT_LIST_HEAD(&vma->exec_list);
3320         vma->vm = vm;
3321         vma->obj = obj;
3322
3323         if (i915_is_ggtt(vm))
3324                 vma->ggtt_view = *ggtt_view;
3325
3326         list_add_tail(&vma->vma_link, &obj->vma_list);
3327         if (!i915_is_ggtt(vm))
3328                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3329
3330         return vma;
3331 }
3332
3333 struct i915_vma *
3334 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3335                                   struct i915_address_space *vm)
3336 {
3337         struct i915_vma *vma;
3338
3339         vma = i915_gem_obj_to_vma(obj, vm);
3340         if (!vma)
3341                 vma = __i915_gem_vma_create(obj, vm,
3342                                             i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3343
3344         return vma;
3345 }
3346
3347 struct i915_vma *
3348 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3349                                        const struct i915_ggtt_view *view)
3350 {
3351         struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3352         struct i915_vma *vma;
3353
3354         if (WARN_ON(!view))
3355                 return ERR_PTR(-EINVAL);
3356
3357         vma = i915_gem_obj_to_ggtt_view(obj, view);
3358
3359         if (IS_ERR(vma))
3360                 return vma;
3361
3362         if (!vma)
3363                 vma = __i915_gem_vma_create(obj, ggtt, view);
3364
3365         return vma;
3366
3367 }
3368
3369 static struct scatterlist *
3370 rotate_pages(const dma_addr_t *in, unsigned int offset,
3371              unsigned int width, unsigned int height,
3372              unsigned int stride,
3373              struct sg_table *st, struct scatterlist *sg)
3374 {
3375         unsigned int column, row;
3376         unsigned int src_idx;
3377
3378         if (!sg) {
3379                 st->nents = 0;
3380                 sg = st->sgl;
3381         }
3382
3383         for (column = 0; column < width; column++) {
3384                 src_idx = stride * (height - 1) + column;
3385                 for (row = 0; row < height; row++) {
3386                         st->nents++;
3387                         /* We don't need the pages, but need to initialize
3388                          * the entries so the sg list can be happily traversed.
3389                          * The only thing we need are DMA addresses.
3390                          */
3391                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3392                         sg_dma_address(sg) = in[offset + src_idx];
3393                         sg_dma_len(sg) = PAGE_SIZE;
3394                         sg = sg_next(sg);
3395                         src_idx -= stride;
3396                 }
3397         }
3398
3399         return sg;
3400 }
3401
3402 static struct sg_table *
3403 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3404                           struct drm_i915_gem_object *obj)
3405 {
3406         unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3407         unsigned int size_pages_uv;
3408         struct sg_page_iter sg_iter;
3409         unsigned long i;
3410         dma_addr_t *page_addr_list;
3411         struct sg_table *st;
3412         unsigned int uv_start_page;
3413         struct scatterlist *sg;
3414         int ret = -ENOMEM;
3415
3416         /* Allocate a temporary list of source pages for random access. */
3417         page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3418                                        sizeof(dma_addr_t));
3419         if (!page_addr_list)
3420                 return ERR_PTR(ret);
3421
3422         /* Account for UV plane with NV12. */
3423         if (rot_info->pixel_format == DRM_FORMAT_NV12)
3424                 size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3425         else
3426                 size_pages_uv = 0;
3427
3428         /* Allocate target SG list. */
3429         st = kmalloc(sizeof(*st), GFP_KERNEL);
3430         if (!st)
3431                 goto err_st_alloc;
3432
3433         ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3434         if (ret)
3435                 goto err_sg_alloc;
3436
3437         /* Populate source page list from the object. */
3438         i = 0;
3439         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3440                 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3441                 i++;
3442         }
3443
3444         /* Rotate the pages. */
3445         sg = rotate_pages(page_addr_list, 0,
3446                      rot_info->width_pages, rot_info->height_pages,
3447                      rot_info->width_pages,
3448                      st, NULL);
3449
3450         /* Append the UV plane if NV12. */
3451         if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3452                 uv_start_page = size_pages;
3453
3454                 /* Check for tile-row un-alignment. */
3455                 if (offset_in_page(rot_info->uv_offset))
3456                         uv_start_page--;
3457
3458                 rot_info->uv_start_page = uv_start_page;
3459
3460                 rotate_pages(page_addr_list, uv_start_page,
3461                              rot_info->width_pages_uv,
3462                              rot_info->height_pages_uv,
3463                              rot_info->width_pages_uv,
3464                              st, sg);
3465         }
3466
3467         DRM_DEBUG_KMS(
3468                       "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3469                       obj->base.size, rot_info->pitch, rot_info->height,
3470                       rot_info->pixel_format, rot_info->width_pages,
3471                       rot_info->height_pages, size_pages + size_pages_uv,
3472                       size_pages);
3473
3474         drm_free_large(page_addr_list);
3475
3476         return st;
3477
3478 err_sg_alloc:
3479         kfree(st);
3480 err_st_alloc:
3481         drm_free_large(page_addr_list);
3482
3483         DRM_DEBUG_KMS(
3484                       "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3485                       obj->base.size, ret, rot_info->pitch, rot_info->height,
3486                       rot_info->pixel_format, rot_info->width_pages,
3487                       rot_info->height_pages, size_pages + size_pages_uv,
3488                       size_pages);
3489         return ERR_PTR(ret);
3490 }
3491
3492 static struct sg_table *
3493 intel_partial_pages(const struct i915_ggtt_view *view,
3494                     struct drm_i915_gem_object *obj)
3495 {
3496         struct sg_table *st;
3497         struct scatterlist *sg;
3498         struct sg_page_iter obj_sg_iter;
3499         int ret = -ENOMEM;
3500
3501         st = kmalloc(sizeof(*st), GFP_KERNEL);
3502         if (!st)
3503                 goto err_st_alloc;
3504
3505         ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3506         if (ret)
3507                 goto err_sg_alloc;
3508
3509         sg = st->sgl;
3510         st->nents = 0;
3511         for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3512                 view->params.partial.offset)
3513         {
3514                 if (st->nents >= view->params.partial.size)
3515                         break;
3516
3517                 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3518                 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3519                 sg_dma_len(sg) = PAGE_SIZE;
3520
3521                 sg = sg_next(sg);
3522                 st->nents++;
3523         }
3524
3525         return st;
3526
3527 err_sg_alloc:
3528         kfree(st);
3529 err_st_alloc:
3530         return ERR_PTR(ret);
3531 }
3532
3533 static int
3534 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3535 {
3536         int ret = 0;
3537
3538         if (vma->ggtt_view.pages)
3539                 return 0;
3540
3541         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3542                 vma->ggtt_view.pages = vma->obj->pages;
3543         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3544                 vma->ggtt_view.pages =
3545                         intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3546         else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3547                 vma->ggtt_view.pages =
3548                         intel_partial_pages(&vma->ggtt_view, vma->obj);
3549         else
3550                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3551                           vma->ggtt_view.type);
3552
3553         if (!vma->ggtt_view.pages) {
3554                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3555                           vma->ggtt_view.type);
3556                 ret = -EINVAL;
3557         } else if (IS_ERR(vma->ggtt_view.pages)) {
3558                 ret = PTR_ERR(vma->ggtt_view.pages);
3559                 vma->ggtt_view.pages = NULL;
3560                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3561                           vma->ggtt_view.type, ret);
3562         }
3563
3564         return ret;
3565 }
3566
3567 /**
3568  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3569  * @vma: VMA to map
3570  * @cache_level: mapping cache level
3571  * @flags: flags like global or local mapping
3572  *
3573  * DMA addresses are taken from the scatter-gather table of this object (or of
3574  * this VMA in case of non-default GGTT views) and PTE entries set up.
3575  * Note that DMA addresses are also the only part of the SG table we care about.
3576  */
3577 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3578                   u32 flags)
3579 {
3580         int ret;
3581         u32 bind_flags;
3582
3583         if (WARN_ON(flags == 0))
3584                 return -EINVAL;
3585
3586         bind_flags = 0;
3587         if (flags & PIN_GLOBAL)
3588                 bind_flags |= GLOBAL_BIND;
3589         if (flags & PIN_USER)
3590                 bind_flags |= LOCAL_BIND;
3591
3592         if (flags & PIN_UPDATE)
3593                 bind_flags |= vma->bound;
3594         else
3595                 bind_flags &= ~vma->bound;
3596
3597         if (bind_flags == 0)
3598                 return 0;
3599
3600         if (vma->bound == 0 && vma->vm->allocate_va_range) {
3601                 trace_i915_va_alloc(vma->vm,
3602                                     vma->node.start,
3603                                     vma->node.size,
3604                                     VM_TO_TRACE_NAME(vma->vm));
3605
3606                 /* XXX: i915_vma_pin() will fix this +- hack */
3607                 vma->pin_count++;
3608                 ret = vma->vm->allocate_va_range(vma->vm,
3609                                                  vma->node.start,
3610                                                  vma->node.size);
3611                 vma->pin_count--;
3612                 if (ret)
3613                         return ret;
3614         }
3615
3616         ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3617         if (ret)
3618                 return ret;
3619
3620         vma->bound |= bind_flags;
3621
3622         return 0;
3623 }
3624
3625 /**
3626  * i915_ggtt_view_size - Get the size of a GGTT view.
3627  * @obj: Object the view is of.
3628  * @view: The view in question.
3629  *
3630  * @return The size of the GGTT view in bytes.
3631  */
3632 size_t
3633 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3634                     const struct i915_ggtt_view *view)
3635 {
3636         if (view->type == I915_GGTT_VIEW_NORMAL) {
3637                 return obj->base.size;
3638         } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3639                 return view->params.rotated.size;
3640         } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3641                 return view->params.partial.size << PAGE_SHIFT;
3642         } else {
3643                 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3644                 return obj->base.size;
3645         }
3646 }