]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/gpu/drm/i915/i915_gem_gtt.c
drm/i915: eliminate 'temp' in gen8_for_each_{pdd, pdpe, pml4e} macros
[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
index 96054a560f4f8da59f40a1fc532013a1b7c5b374..c25e8b0178750c47fec5e540e4a43f42778ebdad 100644 (file)
@@ -24,6 +24,7 @@
  */
 
 #include <linux/seq_file.h>
+#include <linux/stop_machine.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -104,9 +105,11 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 {
        bool has_aliasing_ppgtt;
        bool has_full_ppgtt;
+       bool has_full_48bit_ppgtt;
 
        has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
        has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
+       has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
 
        if (intel_vgpu_active(dev))
                has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +128,9 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
        if (enable_ppgtt == 2 && has_full_ppgtt)
                return 2;
 
+       if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
+               return 3;
+
 #ifdef CONFIG_INTEL_IOMMU
        /* Disable ppgtt on SNB if VT-d is on. */
        if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
@@ -141,7 +147,7 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
        }
 
        if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
-               return 2;
+               return has_full_48bit_ppgtt ? 3 : 2;
        else
                return has_aliasing_ppgtt ? 1 : 0;
 }
@@ -204,6 +210,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
        return pde;
 }
 
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
                                 enum i915_cache_level level,
                                 bool valid, u32 unused)
@@ -522,6 +531,127 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
        fill_px(vm->dev, pd, scratch_pde);
 }
 
+static int __pdp_init(struct drm_device *dev,
+                     struct i915_page_directory_pointer *pdp)
+{
+       size_t pdpes = I915_PDPES_PER_PDP(dev);
+
+       pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
+                                 sizeof(unsigned long),
+                                 GFP_KERNEL);
+       if (!pdp->used_pdpes)
+               return -ENOMEM;
+
+       pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
+                                     GFP_KERNEL);
+       if (!pdp->page_directory) {
+               kfree(pdp->used_pdpes);
+               /* the PDP might be the statically allocated top level. Keep it
+                * as clean as possible */
+               pdp->used_pdpes = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+{
+       kfree(pdp->used_pdpes);
+       kfree(pdp->page_directory);
+       pdp->page_directory = NULL;
+}
+
+static struct
+i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
+{
+       struct i915_page_directory_pointer *pdp;
+       int ret = -ENOMEM;
+
+       WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+       pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
+       if (!pdp)
+               return ERR_PTR(-ENOMEM);
+
+       ret = __pdp_init(dev, pdp);
+       if (ret)
+               goto fail_bitmap;
+
+       ret = setup_px(dev, pdp);
+       if (ret)
+               goto fail_page_m;
+
+       return pdp;
+
+fail_page_m:
+       __pdp_fini(pdp);
+fail_bitmap:
+       kfree(pdp);
+
+       return ERR_PTR(ret);
+}
+
+static void free_pdp(struct drm_device *dev,
+                    struct i915_page_directory_pointer *pdp)
+{
+       __pdp_fini(pdp);
+       if (USES_FULL_48BIT_PPGTT(dev)) {
+               cleanup_px(dev, pdp);
+               kfree(pdp);
+       }
+}
+
+static void gen8_initialize_pdp(struct i915_address_space *vm,
+                               struct i915_page_directory_pointer *pdp)
+{
+       gen8_ppgtt_pdpe_t scratch_pdpe;
+
+       scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+
+       fill_px(vm->dev, pdp, scratch_pdpe);
+}
+
+static void gen8_initialize_pml4(struct i915_address_space *vm,
+                                struct i915_pml4 *pml4)
+{
+       gen8_ppgtt_pml4e_t scratch_pml4e;
+
+       scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
+                                         I915_CACHE_LLC);
+
+       fill_px(vm->dev, pml4, scratch_pml4e);
+}
+
+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+                         struct i915_page_directory_pointer *pdp,
+                         struct i915_page_directory *pd,
+                         int index)
+{
+       gen8_ppgtt_pdpe_t *page_directorypo;
+
+       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+               return;
+
+       page_directorypo = kmap_px(pdp);
+       page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+       kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+                                 struct i915_pml4 *pml4,
+                                 struct i915_page_directory_pointer *pdp,
+                                 int index)
+{
+       gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+       WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
+       pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+       kunmap_px(ppgtt, pagemap);
+}
+
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct drm_i915_gem_request *req,
                          unsigned entry,
@@ -537,18 +667,18 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
                return ret;
 
        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-       intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
+       intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
        intel_ring_emit(ring, upper_32_bits(addr));
        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-       intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
+       intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
        intel_ring_emit(ring, lower_32_bits(addr));
        intel_ring_advance(ring);
 
        return 0;
 }
 
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct drm_i915_gem_request *req)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+                                struct drm_i915_gem_request *req)
 {
        int i, ret;
 
@@ -563,31 +693,38 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
        return 0;
 }
 
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-                                  uint64_t start,
-                                  uint64_t length,
-                                  bool use_scratch)
+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+                             struct drm_i915_gem_request *req)
+{
+       return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
+}
+
+static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
+                                      struct i915_page_directory_pointer *pdp,
+                                      uint64_t start,
+                                      uint64_t length,
+                                      gen8_pte_t scratch_pte)
 {
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
-       gen8_pte_t *pt_vaddr, scratch_pte;
-       unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-       unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-       unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+       gen8_pte_t *pt_vaddr;
+       unsigned pdpe = gen8_pdpe_index(start);
+       unsigned pde = gen8_pde_index(start);
+       unsigned pte = gen8_pte_index(start);
        unsigned num_entries = length >> PAGE_SHIFT;
        unsigned last_pte, i;
 
-       scratch_pte = gen8_pte_encode(px_dma(ppgtt->base.scratch_page),
-                                     I915_CACHE_LLC, use_scratch);
+       if (WARN_ON(!pdp))
+               return;
 
        while (num_entries) {
                struct i915_page_directory *pd;
                struct i915_page_table *pt;
 
-               if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
+               if (WARN_ON(!pdp->page_directory[pdpe]))
                        break;
 
-               pd = ppgtt->pdp.page_directory[pdpe];
+               pd = pdp->page_directory[pdpe];
 
                if (WARN_ON(!pd->page_table[pde]))
                        break;
@@ -612,45 +749,69 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 
                pte = 0;
                if (++pde == I915_PDES) {
-                       pdpe++;
+                       if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+                               break;
                        pde = 0;
                }
        }
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-                                     struct sg_table *pages,
-                                     uint64_t start,
-                                     enum i915_cache_level cache_level, u32 unused)
+static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
+                                  uint64_t start,
+                                  uint64_t length,
+                                  bool use_scratch)
+{
+       struct i915_hw_ppgtt *ppgtt =
+               container_of(vm, struct i915_hw_ppgtt, base);
+       gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+                                                I915_CACHE_LLC, use_scratch);
+
+       if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+               gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
+                                          scratch_pte);
+       } else {
+               uint64_t pml4e;
+               struct i915_page_directory_pointer *pdp;
+
+               gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
+                       gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
+                                                  scratch_pte);
+               }
+       }
+}
+
+static void
+gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+                             struct i915_page_directory_pointer *pdp,
+                             struct sg_page_iter *sg_iter,
+                             uint64_t start,
+                             enum i915_cache_level cache_level)
 {
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
        gen8_pte_t *pt_vaddr;
-       unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-       unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-       unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
-       struct sg_page_iter sg_iter;
+       unsigned pdpe = gen8_pdpe_index(start);
+       unsigned pde = gen8_pde_index(start);
+       unsigned pte = gen8_pte_index(start);
 
        pt_vaddr = NULL;
 
-       for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
-               if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
-                       break;
-
+       while (__sg_page_iter_next(sg_iter)) {
                if (pt_vaddr == NULL) {
-                       struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
+                       struct i915_page_directory *pd = pdp->page_directory[pdpe];
                        struct i915_page_table *pt = pd->page_table[pde];
                        pt_vaddr = kmap_px(pt);
                }
 
                pt_vaddr[pte] =
-                       gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
+                       gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
                                        cache_level, true);
                if (++pte == GEN8_PTES) {
                        kunmap_px(ppgtt, pt_vaddr);
                        pt_vaddr = NULL;
                        if (++pde == I915_PDES) {
-                               pdpe++;
+                               if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+                                       break;
                                pde = 0;
                        }
                        pte = 0;
@@ -661,6 +822,33 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
                kunmap_px(ppgtt, pt_vaddr);
 }
 
+static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
+                                     struct sg_table *pages,
+                                     uint64_t start,
+                                     enum i915_cache_level cache_level,
+                                     u32 unused)
+{
+       struct i915_hw_ppgtt *ppgtt =
+               container_of(vm, struct i915_hw_ppgtt, base);
+       struct sg_page_iter sg_iter;
+
+       __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+
+       if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+               gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
+                                             cache_level);
+       } else {
+               struct i915_page_directory_pointer *pdp;
+               uint64_t pml4e;
+               uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+
+               gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
+                       gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
+                                                     start, cache_level);
+               }
+       }
+}
+
 static void gen8_free_page_tables(struct drm_device *dev,
                                  struct i915_page_directory *pd)
 {
@@ -699,8 +887,52 @@ static int gen8_init_scratch(struct i915_address_space *vm)
                return PTR_ERR(vm->scratch_pd);
        }
 
+       if (USES_FULL_48BIT_PPGTT(dev)) {
+               vm->scratch_pdp = alloc_pdp(dev);
+               if (IS_ERR(vm->scratch_pdp)) {
+                       free_pd(dev, vm->scratch_pd);
+                       free_pt(dev, vm->scratch_pt);
+                       free_scratch_page(dev, vm->scratch_page);
+                       return PTR_ERR(vm->scratch_pdp);
+               }
+       }
+
        gen8_initialize_pt(vm, vm->scratch_pt);
        gen8_initialize_pd(vm, vm->scratch_pd);
+       if (USES_FULL_48BIT_PPGTT(dev))
+               gen8_initialize_pdp(vm, vm->scratch_pdp);
+
+       return 0;
+}
+
+static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
+{
+       enum vgt_g2v_type msg;
+       struct drm_device *dev = ppgtt->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       if (USES_FULL_48BIT_PPGTT(dev)) {
+               u64 daddr = px_dma(&ppgtt->pml4);
+
+               I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+               I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+               msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+                               VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
+       } else {
+               for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
+                       u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+                       I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
+                       I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
+               }
+
+               msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+                               VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
+       }
+
+       I915_WRITE(vgtif_reg(g2v_notify), msg);
 
        return 0;
 }
@@ -709,35 +941,65 @@ static void gen8_free_scratch(struct i915_address_space *vm)
 {
        struct drm_device *dev = vm->dev;
 
+       if (USES_FULL_48BIT_PPGTT(dev))
+               free_pdp(dev, vm->scratch_pdp);
        free_pd(dev, vm->scratch_pd);
        free_pt(dev, vm->scratch_pt);
        free_scratch_page(dev, vm->scratch_page);
 }
 
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
+                                   struct i915_page_directory_pointer *pdp)
 {
-       struct i915_hw_ppgtt *ppgtt =
-               container_of(vm, struct i915_hw_ppgtt, base);
        int i;
 
-       for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
-               if (WARN_ON(!ppgtt->pdp.page_directory[i]))
+       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
+               if (WARN_ON(!pdp->page_directory[i]))
                        continue;
 
-               gen8_free_page_tables(ppgtt->base.dev,
-                                     ppgtt->pdp.page_directory[i]);
-               free_pd(ppgtt->base.dev, ppgtt->pdp.page_directory[i]);
+               gen8_free_page_tables(dev, pdp->page_directory[i]);
+               free_pd(dev, pdp->page_directory[i]);
        }
 
+       free_pdp(dev, pdp);
+}
+
+static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
+{
+       int i;
+
+       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+               if (WARN_ON(!ppgtt->pml4.pdps[i]))
+                       continue;
+
+               gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
+       }
+
+       cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+       struct i915_hw_ppgtt *ppgtt =
+               container_of(vm, struct i915_hw_ppgtt, base);
+
+       if (intel_vgpu_active(vm->dev))
+               gen8_ppgtt_notify_vgt(ppgtt, false);
+
+       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+               gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
+       else
+               gen8_ppgtt_cleanup_4lvl(ppgtt);
+
        gen8_free_scratch(vm);
 }
 
 /**
  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
- * @ppgtt:     Master ppgtt structure.
- * @pd:                Page directory for this address range.
+ * @vm:        Master vm structure.
+ * @pd:        Page directory for this address range.
  * @start:     Starting virtual address to begin allocations.
- * @length     Size of the allocations.
+ * @length:    Size of the allocations.
  * @new_pts:   Bitmap set by function with new allocations. Likely used by the
  *             caller to free on error.
  *
@@ -750,22 +1012,21 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  *
  * Return: 0 if success; negative error code otherwise.
  */
-static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
+static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
                                     struct i915_page_directory *pd,
                                     uint64_t start,
                                     uint64_t length,
                                     unsigned long *new_pts)
 {
-       struct drm_device *dev = ppgtt->base.dev;
+       struct drm_device *dev = vm->dev;
        struct i915_page_table *pt;
-       uint64_t temp;
        uint32_t pde;
 
-       gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+       gen8_for_each_pde(pt, pd, start, length, pde) {
                /* Don't reallocate page tables */
-               if (pt) {
+               if (test_bit(pde, pd->used_pdes)) {
                        /* Scratch is never allocated this way */
-                       WARN_ON(pt == ppgtt->base.scratch_pt);
+                       WARN_ON(pt == vm->scratch_pt);
                        continue;
                }
 
@@ -773,9 +1034,10 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
                if (IS_ERR(pt))
                        goto unwind_out;
 
-               gen8_initialize_pt(&ppgtt->base, pt);
+               gen8_initialize_pt(vm, pt);
                pd->page_table[pde] = pt;
                __set_bit(pde, new_pts);
+               trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
        }
 
        return 0;
@@ -789,11 +1051,11 @@ unwind_out:
 
 /**
  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
- * @ppgtt:     Master ppgtt structure.
+ * @vm:        Master vm structure.
  * @pdp:       Page directory pointer for this address range.
  * @start:     Starting virtual address to begin allocations.
- * @length     Size of the allocations.
- * @new_pds    Bitmap set by function with new allocations. Likely used by the
+ * @length:    Size of the allocations.
+ * @new_pds:   Bitmap set by function with new allocations. Likely used by the
  *             caller to free on error.
  *
  * Allocate the required number of page directories starting at the pde index of
@@ -810,48 +1072,100 @@ unwind_out:
  *
  * Return: 0 if success; negative error code otherwise.
  */
-static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
-                                    struct i915_page_directory_pointer *pdp,
-                                    uint64_t start,
-                                    uint64_t length,
-                                    unsigned long *new_pds)
+static int
+gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
+                                 struct i915_page_directory_pointer *pdp,
+                                 uint64_t start,
+                                 uint64_t length,
+                                 unsigned long *new_pds)
 {
-       struct drm_device *dev = ppgtt->base.dev;
+       struct drm_device *dev = vm->dev;
        struct i915_page_directory *pd;
-       uint64_t temp;
        uint32_t pdpe;
+       uint32_t pdpes = I915_PDPES_PER_PDP(dev);
 
-       WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+       WARN_ON(!bitmap_empty(new_pds, pdpes));
 
-       gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
-               if (pd)
+       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+               if (test_bit(pdpe, pdp->used_pdpes))
                        continue;
 
                pd = alloc_pd(dev);
                if (IS_ERR(pd))
                        goto unwind_out;
 
-               gen8_initialize_pd(&ppgtt->base, pd);
+               gen8_initialize_pd(vm, pd);
                pdp->page_directory[pdpe] = pd;
                __set_bit(pdpe, new_pds);
+               trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
        }
 
        return 0;
 
 unwind_out:
-       for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+       for_each_set_bit(pdpe, new_pds, pdpes)
                free_pd(dev, pdp->page_directory[pdpe]);
 
        return -ENOMEM;
 }
 
-static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
+/**
+ * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
+ * @vm:        Master vm structure.
+ * @pml4:      Page map level 4 for this address range.
+ * @start:     Starting virtual address to begin allocations.
+ * @length:    Size of the allocations.
+ * @new_pdps:  Bitmap set by function with new allocations. Likely used by the
+ *             caller to free on error.
+ *
+ * Allocate the required number of page directory pointers. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * The main difference is here we are limited by the pml4 boundary (instead of
+ * the page directory pointer).
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int
+gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
+                                 struct i915_pml4 *pml4,
+                                 uint64_t start,
+                                 uint64_t length,
+                                 unsigned long *new_pdps)
 {
-       int i;
+       struct drm_device *dev = vm->dev;
+       struct i915_page_directory_pointer *pdp;
+       uint32_t pml4e;
+
+       WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
+
+       gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+               if (!test_bit(pml4e, pml4->used_pml4es)) {
+                       pdp = alloc_pdp(dev);
+                       if (IS_ERR(pdp))
+                               goto unwind_out;
+
+                       gen8_initialize_pdp(vm, pdp);
+                       pml4->pdps[pml4e] = pdp;
+                       __set_bit(pml4e, new_pdps);
+                       trace_i915_page_directory_pointer_entry_alloc(vm,
+                                                                     pml4e,
+                                                                     start,
+                                                                     GEN8_PML4E_SHIFT);
+               }
+       }
+
+       return 0;
+
+unwind_out:
+       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+               free_pdp(dev, pml4->pdps[pml4e]);
 
-       for (i = 0; i < GEN8_LEGACY_PDPES; i++)
-               kfree(new_pts[i]);
+       return -ENOMEM;
+}
+
+static void
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+{
        kfree(new_pts);
        kfree(new_pds);
 }
@@ -861,28 +1175,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
  */
 static
 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-                                        unsigned long ***new_pts)
+                                        unsigned long **new_pts,
+                                        uint32_t pdpes)
 {
-       int i;
        unsigned long *pds;
-       unsigned long **pts;
+       unsigned long *pts;
 
-       pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
+       pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
        if (!pds)
                return -ENOMEM;
 
-       pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
-       if (!pts) {
-               kfree(pds);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
-               pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
-                                sizeof(unsigned long), GFP_KERNEL);
-               if (!pts[i])
-                       goto err_out;
-       }
+       pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
+                     GFP_TEMPORARY);
+       if (!pts)
+               goto err_out;
 
        *new_pds = pds;
        *new_pts = pts;
@@ -904,18 +1210,20 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
        ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
 }
 
-static int gen8_alloc_va_range(struct i915_address_space *vm,
-                              uint64_t start,
-                              uint64_t length)
+static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
+                                   struct i915_page_directory_pointer *pdp,
+                                   uint64_t start,
+                                   uint64_t length)
 {
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
-       unsigned long *new_page_dirs, **new_page_tables;
+       unsigned long *new_page_dirs, *new_page_tables;
+       struct drm_device *dev = vm->dev;
        struct i915_page_directory *pd;
        const uint64_t orig_start = start;
        const uint64_t orig_length = length;
-       uint64_t temp;
        uint32_t pdpe;
+       uint32_t pdpes = I915_PDPES_PER_PDP(dev);
        int ret;
 
        /* Wrap is never okay since we can only represent 48b, and we don't
@@ -924,25 +1232,25 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
        if (WARN_ON(start + length < start))
                return -ENODEV;
 
-       if (WARN_ON(start + length > ppgtt->base.total))
+       if (WARN_ON(start + length > vm->total))
                return -ENODEV;
 
-       ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
+       ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
        if (ret)
                return ret;
 
        /* Do the allocations first so we can easily bail out */
-       ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
-                                       new_page_dirs);
+       ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
+                                               new_page_dirs);
        if (ret) {
                free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
                return ret;
        }
 
        /* For every page directory referenced, allocate page tables */
-       gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
-               ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
-                                               new_page_tables[pdpe]);
+       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+               ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
+                                               new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
                if (ret)
                        goto err_out;
        }
@@ -952,17 +1260,17 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
 
        /* Allocations have completed successfully, so set the bitmaps, and do
         * the mappings. */
-       gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
                gen8_pde_t *const page_directory = kmap_px(pd);
                struct i915_page_table *pt;
-               uint64_t pd_len = gen8_clamp_pd(start, length);
+               uint64_t pd_len = length;
                uint64_t pd_start = start;
                uint32_t pde;
 
                /* Every pd should be allocated, we just did that above. */
                WARN_ON(!pd);
 
-               gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+               gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
                        /* Same reasoning as pd */
                        WARN_ON(!pt);
                        WARN_ON(!pd_len);
@@ -979,14 +1287,18 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
                        /* Map the PDE to the page table */
                        page_directory[pde] = gen8_pde_encode(px_dma(pt),
                                                              I915_CACHE_LLC);
+                       trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
+                                                       gen8_pte_index(start),
+                                                       gen8_pte_count(start, length),
+                                                       GEN8_PTES);
 
                        /* NB: We haven't yet mapped ptes to pages. At this
                         * point we're still relying on insert_entries() */
                }
 
                kunmap_px(ppgtt, page_directory);
-
-               __set_bit(pdpe, ppgtt->pdp.used_pdpes);
+               __set_bit(pdpe, pdp->used_pdpes);
+               gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
        }
 
        free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
@@ -995,18 +1307,192 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
 
 err_out:
        while (pdpe--) {
-               for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
-                       free_pt(vm->dev, ppgtt->pdp.page_directory[pdpe]->page_table[temp]);
+               unsigned long temp;
+
+               for_each_set_bit(temp, new_page_tables + pdpe *
+                               BITS_TO_LONGS(I915_PDES), I915_PDES)
+                       free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
        }
 
-       for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
-               free_pd(vm->dev, ppgtt->pdp.page_directory[pdpe]);
+       for_each_set_bit(pdpe, new_page_dirs, pdpes)
+               free_pd(dev, pdp->page_directory[pdpe]);
 
        free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
        mark_tlbs_dirty(ppgtt);
        return ret;
 }
 
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+                                   struct i915_pml4 *pml4,
+                                   uint64_t start,
+                                   uint64_t length)
+{
+       DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+       struct i915_hw_ppgtt *ppgtt =
+                       container_of(vm, struct i915_hw_ppgtt, base);
+       struct i915_page_directory_pointer *pdp;
+       uint64_t pml4e;
+       int ret = 0;
+
+       /* Do the pml4 allocations first, so we don't need to track the newly
+        * allocated tables below the pdp */
+       bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+
+       /* The pagedirectory and pagetable allocations are done in the shared 3
+        * and 4 level code. Just allocate the pdps.
+        */
+       ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
+                                               new_pdps);
+       if (ret)
+               return ret;
+
+       WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
+            "The allocation has spanned more than 512GB. "
+            "It is highly likely this is incorrect.");
+
+       gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+               WARN_ON(!pdp);
+
+               ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+               if (ret)
+                       goto err_out;
+
+               gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
+       }
+
+       bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+                 GEN8_PML4ES_PER_PML4);
+
+       return 0;
+
+err_out:
+       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+               gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
+
+       return ret;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+                              uint64_t start, uint64_t length)
+{
+       struct i915_hw_ppgtt *ppgtt =
+               container_of(vm, struct i915_hw_ppgtt, base);
+
+       if (USES_FULL_48BIT_PPGTT(vm->dev))
+               return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+       else
+               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+}
+
+static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+                         uint64_t start, uint64_t length,
+                         gen8_pte_t scratch_pte,
+                         struct seq_file *m)
+{
+       struct i915_page_directory *pd;
+       uint32_t pdpe;
+
+       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+               struct i915_page_table *pt;
+               uint64_t pd_len = length;
+               uint64_t pd_start = start;
+               uint32_t pde;
+
+               if (!test_bit(pdpe, pdp->used_pdpes))
+                       continue;
+
+               seq_printf(m, "\tPDPE #%d\n", pdpe);
+               gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
+                       uint32_t  pte;
+                       gen8_pte_t *pt_vaddr;
+
+                       if (!test_bit(pde, pd->used_pdes))
+                               continue;
+
+                       pt_vaddr = kmap_px(pt);
+                       for (pte = 0; pte < GEN8_PTES; pte += 4) {
+                               uint64_t va =
+                                       (pdpe << GEN8_PDPE_SHIFT) |
+                                       (pde << GEN8_PDE_SHIFT) |
+                                       (pte << GEN8_PTE_SHIFT);
+                               int i;
+                               bool found = false;
+
+                               for (i = 0; i < 4; i++)
+                                       if (pt_vaddr[pte + i] != scratch_pte)
+                                               found = true;
+                               if (!found)
+                                       continue;
+
+                               seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
+                               for (i = 0; i < 4; i++) {
+                                       if (pt_vaddr[pte + i] != scratch_pte)
+                                               seq_printf(m, " %llx", pt_vaddr[pte + i]);
+                                       else
+                                               seq_puts(m, "  SCRATCH ");
+                               }
+                               seq_puts(m, "\n");
+                       }
+                       /* don't use kunmap_px, it could trigger
+                        * an unnecessary flush.
+                        */
+                       kunmap_atomic(pt_vaddr);
+               }
+       }
+}
+
+static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
+{
+       struct i915_address_space *vm = &ppgtt->base;
+       uint64_t start = ppgtt->base.start;
+       uint64_t length = ppgtt->base.total;
+       gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+                                                I915_CACHE_LLC, true);
+
+       if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+               gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
+       } else {
+               uint64_t pml4e;
+               struct i915_pml4 *pml4 = &ppgtt->pml4;
+               struct i915_page_directory_pointer *pdp;
+
+               gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+                       if (!test_bit(pml4e, pml4->used_pml4es))
+                               continue;
+
+                       seq_printf(m, "    PML4E #%llu\n", pml4e);
+                       gen8_dump_pdp(pdp, start, length, scratch_pte, m);
+               }
+       }
+}
+
+static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
+{
+       unsigned long *new_page_dirs, *new_page_tables;
+       uint32_t pdpes = I915_PDPES_PER_PDP(dev);
+       int ret;
+
+       /* We allocate temp bitmap for page tables for no gain
+        * but as this is for init only, lets keep the things simple
+        */
+       ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+       if (ret)
+               return ret;
+
+       /* Allocate for all pdps regardless of how the ppgtt
+        * was defined.
+        */
+       ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
+                                               0, 1ULL << 32,
+                                               new_page_dirs);
+       if (!ret)
+               *ppgtt->pdp.used_pdpes = *new_page_dirs;
+
+       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+
+       return ret;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1023,24 +1509,49 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
                return ret;
 
        ppgtt->base.start = 0;
-       ppgtt->base.total = 1ULL << 32;
-       if (IS_ENABLED(CONFIG_X86_32))
-               /* While we have a proliferation of size_t variables
-                * we cannot represent the full ppgtt size on 32bit,
-                * so limit it to the same size as the GGTT (currently
-                * 2GiB).
-                */
-               ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
        ppgtt->base.cleanup = gen8_ppgtt_cleanup;
        ppgtt->base.allocate_va_range = gen8_alloc_va_range;
        ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
        ppgtt->base.clear_range = gen8_ppgtt_clear_range;
        ppgtt->base.unbind_vma = ppgtt_unbind_vma;
        ppgtt->base.bind_vma = ppgtt_bind_vma;
+       ppgtt->debug_dump = gen8_dump_ppgtt;
+
+       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+               ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
+               if (ret)
+                       goto free_scratch;
 
-       ppgtt->switch_mm = gen8_mm_switch;
+               gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
+
+               ppgtt->base.total = 1ULL << 48;
+               ppgtt->switch_mm = gen8_48b_mm_switch;
+       } else {
+               ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
+               if (ret)
+                       goto free_scratch;
+
+               ppgtt->base.total = 1ULL << 32;
+               ppgtt->switch_mm = gen8_legacy_mm_switch;
+               trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
+                                                             0, 0,
+                                                             GEN8_PML4E_SHIFT);
+
+               if (intel_vgpu_active(ppgtt->base.dev)) {
+                       ret = gen8_preallocate_top_level_pdps(ppgtt);
+                       if (ret)
+                               goto free_scratch;
+               }
+       }
+
+       if (intel_vgpu_active(ppgtt->base.dev))
+               gen8_ppgtt_notify_vgt(ppgtt, true);
 
        return 0;
+
+free_scratch:
+       gen8_free_scratch(&ppgtt->base);
+       return ret;
 }
 
 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
@@ -1151,9 +1662,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
                return ret;
 
        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-       intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+       intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
        intel_ring_emit(ring, PP_DIR_DCLV_2G);
-       intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+       intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
        intel_ring_emit(ring, get_pd_offset(ppgtt));
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);
@@ -1188,9 +1699,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
                return ret;
 
        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-       intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+       intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
        intel_ring_emit(ring, PP_DIR_DCLV_2G);
-       intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+       intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
        intel_ring_emit(ring, get_pd_offset(ppgtt));
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);
@@ -1228,8 +1739,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
        int j;
 
        for_each_ring(ring, dev_priv, j) {
+               u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
                I915_WRITE(RING_MODE_GEN7(ring),
-                          _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+                          _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
        }
 }
 
@@ -1609,6 +2121,16 @@ static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
                return gen8_ppgtt_init(ppgtt);
 }
 
+static void i915_address_space_init(struct i915_address_space *vm,
+                                   struct drm_i915_private *dev_priv)
+{
+       drm_mm_init(&vm->mm, vm->start, vm->total);
+       vm->dev = dev_priv->dev;
+       INIT_LIST_HEAD(&vm->active_list);
+       INIT_LIST_HEAD(&vm->inactive_list);
+       list_add_tail(&vm->global_link, &dev_priv->vm_list);
+}
+
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1617,9 +2139,7 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
        ret = __hw_ppgtt_init(dev, ppgtt);
        if (ret == 0) {
                kref_init(&ppgtt->ref);
-               drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
-                           ppgtt->base.total);
-               i915_init_vm(dev_priv, &ppgtt->base);
+               i915_address_space_init(&ppgtt->base, dev_priv);
        }
 
        return ret;
@@ -1981,6 +2501,56 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 static int ggtt_bind_vma(struct i915_vma *vma,
                         enum i915_cache_level cache_level,
                         u32 flags)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+       u32 pte_flags = 0;
+       int ret;
+
+       ret = i915_get_ggtt_vma_pages(vma);
+       if (ret)
+               return ret;
+
+       /* Currently applicable only to VLV */
+       if (obj->gt_ro)
+               pte_flags |= PTE_READ_ONLY;
+
+       vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
+                               vma->node.start,
+                               cache_level, pte_flags);
+
+       /*
+        * Without aliasing PPGTT there's no difference between
+        * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+        * upgrade to both bound if we bind either to avoid double-binding.
+        */
+       vma->bound |= GLOBAL_BIND | LOCAL_BIND;
+
+       return 0;
+}
+
+struct ggtt_bind_vma__cb {
+       struct i915_vma *vma;
+       enum i915_cache_level cache_level;
+       u32 flags;
+};
+
+static int ggtt_bind_vma__cb(void *_arg)
+{
+       struct ggtt_bind_vma__cb *arg = _arg;
+       return ggtt_bind_vma(arg->vma, arg->cache_level, arg->flags);
+}
+
+static int ggtt_bind_vma__BKL(struct i915_vma *vma,
+                             enum i915_cache_level cache_level,
+                             u32 flags)
+{
+       struct ggtt_bind_vma__cb arg = { vma, cache_level, flags };
+       return stop_machine(ggtt_bind_vma__cb, &arg, NULL);
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+                                enum i915_cache_level cache_level,
+                                u32 flags)
 {
        struct drm_device *dev = vma->vm->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1999,24 +2569,13 @@ static int ggtt_bind_vma(struct i915_vma *vma,
                pte_flags |= PTE_READ_ONLY;
 
 
-       if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
+       if (flags & GLOBAL_BIND) {
                vma->vm->insert_entries(vma->vm, pages,
                                        vma->node.start,
                                        cache_level, pte_flags);
-
-               /* Note the inconsistency here is due to absence of the
-                * aliasing ppgtt on gen4 and earlier. Though we always
-                * request PIN_USER for execbuffer (translated to LOCAL_BIND),
-                * without the appgtt, we cannot honour that request and so
-                * must substitute it with a global binding. Since we do this
-                * behind the upper layers back, we need to explicitly set
-                * the bound flag ourselves.
-                */
-               vma->bound |= GLOBAL_BIND;
-
        }
 
-       if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
+       if (flags & LOCAL_BIND) {
                struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
                appgtt->base.insert_entries(&appgtt->base, pages,
                                            vma->node.start,
@@ -2084,9 +2643,9 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
 }
 
 static int i915_gem_setup_global_gtt(struct drm_device *dev,
-                                    unsigned long start,
-                                    unsigned long mappable_end,
-                                    unsigned long end)
+                                    u64 start,
+                                    u64 mappable_end,
+                                    u64 end)
 {
        /* Let GEM Manage all of the aperture.
         *
@@ -2106,11 +2665,13 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
 
        BUG_ON(mappable_end > end);
 
-       /* Subtract the guard page ... */
-       drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
+       ggtt_vm->start = start;
 
-       dev_priv->gtt.base.start = start;
-       dev_priv->gtt.base.total = end - start;
+       /* Subtract the guard page before address space initialization to
+        * shrink the range used by drm_mm */
+       ggtt_vm->total = end - start - PAGE_SIZE;
+       i915_address_space_init(ggtt_vm, dev_priv);
+       ggtt_vm->total += PAGE_SIZE;
 
        if (intel_vgpu_active(dev)) {
                ret = intel_vgt_balloon(dev);
@@ -2119,13 +2680,13 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
        }
 
        if (!HAS_LLC(dev))
-               dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
+               ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
 
        /* Mark any preallocated objects as occupied */
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
                struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
 
-               DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
+               DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
                              i915_gem_obj_ggtt_offset(obj), obj->base.size);
 
                WARN_ON(i915_gem_obj_ggtt_bound(obj));
@@ -2135,6 +2696,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
                        return ret;
                }
                vma->bound |= GLOBAL_BIND;
+               list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
        }
 
        /* Clear any non-preallocated blocks */
@@ -2177,6 +2739,8 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
                                        true);
 
                dev_priv->mm.aliasing_ppgtt = ppgtt;
+               WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
+               dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
        }
 
        return 0;
@@ -2367,8 +2931,8 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
 
        /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
         * write would work. */
-       I915_WRITE(GEN8_PRIVATE_PAT, pat);
-       I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+       I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
+       I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
 }
 
 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
@@ -2402,8 +2966,8 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
              GEN8_PPAT(6, CHV_PPAT_SNOOP) |
              GEN8_PPAT(7, CHV_PPAT_SNOOP);
 
-       I915_WRITE(GEN8_PRIVATE_PAT, pat);
-       I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+       I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
+       I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
 }
 
 static int gen8_gmch_probe(struct drm_device *dev,
@@ -2451,6 +3015,9 @@ static int gen8_gmch_probe(struct drm_device *dev,
        dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
        dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
+       if (IS_CHERRYVIEW(dev))
+               dev_priv->gtt.base.bind_vma = ggtt_bind_vma__BKL;
+
        return ret;
 }
 
@@ -2722,15 +3289,18 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
 
 }
 
-static void
-rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
-            struct sg_table *st)
+static struct scatterlist *
+rotate_pages(dma_addr_t *in, unsigned int offset,
+            unsigned int width, unsigned int height,
+            struct sg_table *st, struct scatterlist *sg)
 {
        unsigned int column, row;
        unsigned int src_idx;
-       struct scatterlist *sg = st->sgl;
 
-       st->nents = 0;
+       if (!sg) {
+               st->nents = 0;
+               sg = st->sgl;
+       }
 
        for (column = 0; column < width; column++) {
                src_idx = width * (height - 1) + column;
@@ -2741,24 +3311,29 @@ rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
                         * The only thing we need are DMA addresses.
                         */
                        sg_set_page(sg, NULL, PAGE_SIZE, 0);
-                       sg_dma_address(sg) = in[src_idx];
+                       sg_dma_address(sg) = in[offset + src_idx];
                        sg_dma_len(sg) = PAGE_SIZE;
                        sg = sg_next(sg);
                        src_idx -= width;
                }
        }
+
+       return sg;
 }
 
 static struct sg_table *
 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
                          struct drm_i915_gem_object *obj)
 {
-       struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
+       struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info;
        unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
+       unsigned int size_pages_uv;
        struct sg_page_iter sg_iter;
        unsigned long i;
        dma_addr_t *page_addr_list;
        struct sg_table *st;
+       unsigned int uv_start_page;
+       struct scatterlist *sg;
        int ret = -ENOMEM;
 
        /* Allocate a temporary list of source pages for random access. */
@@ -2767,12 +3342,18 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
        if (!page_addr_list)
                return ERR_PTR(ret);
 
+       /* Account for UV plane with NV12. */
+       if (rot_info->pixel_format == DRM_FORMAT_NV12)
+               size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
+       else
+               size_pages_uv = 0;
+
        /* Allocate target SG list. */
        st = kmalloc(sizeof(*st), GFP_KERNEL);
        if (!st)
                goto err_st_alloc;
 
-       ret = sg_alloc_table(st, size_pages, GFP_KERNEL);
+       ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
        if (ret)
                goto err_sg_alloc;
 
@@ -2784,15 +3365,32 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
        }
 
        /* Rotate the pages. */
-       rotate_pages(page_addr_list,
+       sg = rotate_pages(page_addr_list, 0,
                     rot_info->width_pages, rot_info->height_pages,
-                    st);
+                    st, NULL);
+
+       /* Append the UV plane if NV12. */
+       if (rot_info->pixel_format == DRM_FORMAT_NV12) {
+               uv_start_page = size_pages;
+
+               /* Check for tile-row un-alignment. */
+               if (offset_in_page(rot_info->uv_offset))
+                       uv_start_page--;
+
+               rot_info->uv_start_page = uv_start_page;
+
+               rotate_pages(page_addr_list, uv_start_page,
+                            rot_info->width_pages_uv,
+                            rot_info->height_pages_uv,
+                            st, sg);
+       }
 
        DRM_DEBUG_KMS(
-                     "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages).\n",
+                     "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
                      obj->base.size, rot_info->pitch, rot_info->height,
                      rot_info->pixel_format, rot_info->width_pages,
-                     rot_info->height_pages, size_pages);
+                     rot_info->height_pages, size_pages + size_pages_uv,
+                     size_pages);
 
        drm_free_large(page_addr_list);
 
@@ -2804,10 +3402,11 @@ err_st_alloc:
        drm_free_large(page_addr_list);
 
        DRM_DEBUG_KMS(
-                     "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages)\n",
+                     "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
                      obj->base.size, ret, rot_info->pitch, rot_info->height,
                      rot_info->pixel_format, rot_info->width_pages,
-                     rot_info->height_pages, size_pages);
+                     rot_info->height_pages, size_pages + size_pages_uv,
+                     size_pages);
        return ERR_PTR(ret);
 }
 
@@ -2958,7 +3557,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
        if (view->type == I915_GGTT_VIEW_NORMAL) {
                return obj->base.size;
        } else if (view->type == I915_GGTT_VIEW_ROTATED) {
-               return view->rotation_info.size;
+               return view->params.rotation_info.size;
        } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
                return view->params.partial.size << PAGE_SHIFT;
        } else {