arm64: KVM: Do not use pgd_index to index stage-2 pgd

[karo-tx-linux.git] / arch / arm / kvm / mmu.c
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 3e6859bc3e1170fc83489be9ba51a15c97b148a5..5656d79c5a44f4d2ca816e15b647abf29a114e0b 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
         phys_addr_t addr = start, end = start + size;
         phys_addr_t next;
  
-       pgd = pgdp + pgd_index(addr);
+       pgd = pgdp + kvm_pgd_index(addr);
         do {
                 next = kvm_pgd_addr_end(addr, end);
                 if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
         phys_addr_t next;
         pgd_t *pgd;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         do {
                 next = kvm_pgd_addr_end(addr, end);
                 stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
                                      __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
  }
  
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+       free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+       unsigned int size = kvm_get_hwpgd_size();
+
+       return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
  /**
   * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
   * @kvm:       The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
   */
  int kvm_alloc_stage2_pgd(struct kvm *kvm)
  {
-       int ret;
         pgd_t *pgd;
+       void *hwpgd;
  
         if (kvm->arch.pgd != NULL) {
                 kvm_err("kvm_arch already initialized?\n");
                 return -EINVAL;
         }
  
+       hwpgd = kvm_alloc_hwpgd();
+       if (!hwpgd)
+               return -ENOMEM;
+
+       /* When the kernel uses more levels of page tables than the
+        * guest, we allocate a fake PGD and pre-populate it to point
+        * to the next-level page table, which will be the real
+        * initial page table pointed to by the VTTBR.
+        *
+        * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+        * the PMD and the kernel will use folded pud.
+        * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+        * pages.
+        */
         if (KVM_PREALLOC_LEVEL > 0) {
+               int i;
+
                 /*
                  * Allocate fake pgd for the page table manipulation macros to
                  * work.  This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                  */
                 pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
                                        GFP_KERNEL | __GFP_ZERO);
+
+               if (!pgd) {
+                       kvm_free_hwpgd(hwpgd);
+                       return -ENOMEM;
+               }
+
+               /* Plug the HW PGD into the fake one. */
+               for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+                       if (KVM_PREALLOC_LEVEL == 1)
+                               pgd_populate(NULL, pgd + i,
+                                            (pud_t *)hwpgd + i * PTRS_PER_PUD);
+                       else if (KVM_PREALLOC_LEVEL == 2)
+                               pud_populate(NULL, pud_offset(pgd, 0) + i,
+                                            (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+               }
         } else {
                 /*
                  * Allocate actual first-level Stage-2 page table used by the
                  * hardware for Stage-2 page table walks.
                  */
-               pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+               pgd = (pgd_t *)hwpgd;
         }
  
-       if (!pgd)
-               return -ENOMEM;
-
-       ret = kvm_prealloc_hwpgd(kvm, pgd);
-       if (ret)
-               goto out_err;
-
         kvm_clean_pgd(pgd);
         kvm->arch.pgd = pgd;
         return 0;
-out_err:
-       if (KVM_PREALLOC_LEVEL > 0)
-               kfree(pgd);
-       else
-               free_pages((unsigned long)pgd, S2_PGD_ORDER);
-       return ret;
  }
  
  /**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                 return;
  
         unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-       kvm_free_hwpgd(kvm);
+       kvm_free_hwpgd(kvm_get_hwpgd(kvm));
         if (KVM_PREALLOC_LEVEL > 0)
                 kfree(kvm->arch.pgd);
-       else
-               free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
         kvm->arch.pgd = NULL;
  }
  
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
         pgd_t *pgd;
         pud_t *pud;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         if (WARN_ON(pgd_none(*pgd))) {
                 if (!cache)
                         return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
         pgd_t *pgd;
         phys_addr_t next;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         do {
                 /*
                  * Release kvm_mmu_lock periodically if the memory region is