]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - arch/arm/kvm/mmu.c
arm64: KVM: Do not use pgd_index to index stage-2 pgd
[karo-tx-linux.git] / arch / arm / kvm / mmu.c
index 3e6859bc3e1170fc83489be9ba51a15c97b148a5..5656d79c5a44f4d2ca816e15b647abf29a114e0b 100644 (file)
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
        phys_addr_t addr = start, end = start + size;
        phys_addr_t next;
 
-       pgd = pgdp + pgd_index(addr);
+       pgd = pgdp + kvm_pgd_index(addr);
        do {
                next = kvm_pgd_addr_end(addr, end);
                if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
        phys_addr_t next;
        pgd_t *pgd;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        do {
                next = kvm_pgd_addr_end(addr, end);
                stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
                                     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+       free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+       unsigned int size = kvm_get_hwpgd_size();
+
+       return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
 /**
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:       The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
-       int ret;
        pgd_t *pgd;
+       void *hwpgd;
 
        if (kvm->arch.pgd != NULL) {
                kvm_err("kvm_arch already initialized?\n");
                return -EINVAL;
        }
 
+       hwpgd = kvm_alloc_hwpgd();
+       if (!hwpgd)
+               return -ENOMEM;
+
+       /* When the kernel uses more levels of page tables than the
+        * guest, we allocate a fake PGD and pre-populate it to point
+        * to the next-level page table, which will be the real
+        * initial page table pointed to by the VTTBR.
+        *
+        * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+        * the PMD and the kernel will use folded pud.
+        * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+        * pages.
+        */
        if (KVM_PREALLOC_LEVEL > 0) {
+               int i;
+
                /*
                 * Allocate fake pgd for the page table manipulation macros to
                 * work.  This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                 */
                pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
                                       GFP_KERNEL | __GFP_ZERO);
+
+               if (!pgd) {
+                       kvm_free_hwpgd(hwpgd);
+                       return -ENOMEM;
+               }
+
+               /* Plug the HW PGD into the fake one. */
+               for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+                       if (KVM_PREALLOC_LEVEL == 1)
+                               pgd_populate(NULL, pgd + i,
+                                            (pud_t *)hwpgd + i * PTRS_PER_PUD);
+                       else if (KVM_PREALLOC_LEVEL == 2)
+                               pud_populate(NULL, pud_offset(pgd, 0) + i,
+                                            (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+               }
        } else {
                /*
                 * Allocate actual first-level Stage-2 page table used by the
                 * hardware for Stage-2 page table walks.
                 */
-               pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+               pgd = (pgd_t *)hwpgd;
        }
 
-       if (!pgd)
-               return -ENOMEM;
-
-       ret = kvm_prealloc_hwpgd(kvm, pgd);
-       if (ret)
-               goto out_err;
-
        kvm_clean_pgd(pgd);
        kvm->arch.pgd = pgd;
        return 0;
-out_err:
-       if (KVM_PREALLOC_LEVEL > 0)
-               kfree(pgd);
-       else
-               free_pages((unsigned long)pgd, S2_PGD_ORDER);
-       return ret;
 }
 
 /**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                return;
 
        unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-       kvm_free_hwpgd(kvm);
+       kvm_free_hwpgd(kvm_get_hwpgd(kvm));
        if (KVM_PREALLOC_LEVEL > 0)
                kfree(kvm->arch.pgd);
-       else
-               free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
        kvm->arch.pgd = NULL;
 }
 
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
        pgd_t *pgd;
        pud_t *pud;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        if (WARN_ON(pgd_none(*pgd))) {
                if (!cache)
                        return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
        pgd_t *pgd;
        phys_addr_t next;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        do {
                /*
                 * Release kvm_mmu_lock periodically if the memory region is