kvm: Faults which trigger IO release the mmap_sem

author Andres Lagar-Cavilla <andreslc@google.com>

Wed, 17 Sep 2014 17:51:48 +0000 (10:51 -0700)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 24 Sep 2014 12:07:54 +0000 (14:07 +0200)
author Andres Lagar-Cavilla <andreslc@google.com>
Wed, 17 Sep 2014 17:51:48 +0000 (10:51 -0700)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 24 Sep 2014 12:07:54 +0000 (14:07 +0200)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index d44a2d6405519a599159b22297ed9efc763bce88..45aaeb3360c9d277dac950de7400f7603505193a 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
  int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
  #endif
  
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+                        unsigned long addr, bool write_fault,
+                        struct page **pagep);
+
  enum {
         OUTSIDE_GUEST_MODE,
         IN_GUEST_MODE,
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 8981cc882ed2eb0d2dfdebfbc5355e8c1cc2eb16..0f4196a0bc20c8267d8f5eade59f0d14abaaa284 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
  #define FOLL_HWPOISON  0x100   /* check page is hwpoisoned */
  #define FOLL_NUMA      0x200   /* force NUMA hinting page fault */
  #define FOLL_MIGRATION 0x400   /* wait for page to replace migration entry */
+#define FOLL_TRIED     0x800   /* a retry, previous pass started an IO */
  
  typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                         void *data);
diff --git a/mm/gup.c b/mm/gup.c

index 91d044b1600dd6b216decb62ca9f1bd7ef152c52..af7ea3e0826bfff7c1ba0b0ebdfe2fef41cf8c1e 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
                 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
         if (*flags & FOLL_NOWAIT)
                 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+       if (*flags & FOLL_TRIED) {
+               VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+               fault_flags |= FAULT_FLAG_TRIED;
+       }
  
         ret = handle_mm_fault(mm, vma, address, fault_flags);
         if (ret & VM_FAULT_ERROR) {
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c

index d6a3d0993d8812c8527274d01e8c08ce942746a4..5ff7f7f2689afec2887fe9c387aaaca2d616a119 100644 (file)
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
  
         might_sleep();
  
-       down_read(&mm->mmap_sem);
-       get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
-       up_read(&mm->mmap_sem);
+       kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
         kvm_async_page_present_sync(vcpu, apf);
  
         spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 499db0977f3c958e32a86af5fc8e7bc68481a3d6..1c6e8476b244577098a697e30424894b8d11b038 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
         return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
  }
  
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+                        unsigned long addr, bool write_fault,
+                        struct page **pagep)
+{
+       int npages;
+       int locked = 1;
+       int flags = FOLL_TOUCH | FOLL_HWPOISON |
+                   (pagep ? FOLL_GET : 0) |
+                   (write_fault ? FOLL_WRITE : 0);
+
+       /*
+        * If retrying the fault, we get here *not* having allowed the filemap
+        * to wait on the page lock. We should now allow waiting on the IO with
+        * the mmap semaphore released.
+        */
+       down_read(&mm->mmap_sem);
+       npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+                                 &locked);
+       if (!locked) {
+               VM_BUG_ON(npages != -EBUSY);
+
+               if (!pagep)
+                       return 0;
+
+               /*
+                * The previous call has now waited on the IO. Now we can
+                * retry and complete. Pass TRIED to ensure we do not re
+                * schedule async IO (see e.g. filemap_fault).
+                */
+               down_read(&mm->mmap_sem);
+               npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+                                         pagep, NULL, NULL);
+       }
+       up_read(&mm->mmap_sem);
+       return npages;
+}
+
  static inline int check_user_page_hwpoison(unsigned long addr)
  {
         int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
                 npages = get_user_page_nowait(current, current->mm,
                                               addr, write_fault, page);
                 up_read(&current->mm->mmap_sem);
-       } else
-               npages = get_user_pages_fast(addr, 1, write_fault,
-                                            page);
+       } else {
+               /*
+                * By now we have tried gup_fast, and possibly async_pf, and we
+                * are certainly not atomic. Time to retry the gup, allowing
+                * mmap semaphore to be relinquished in the case of IO.
+                */
+               npages = kvm_get_user_page_io(current, current->mm, addr,
+                                             write_fault, page);
+       }
         if (npages != 1)
                 return npages;
author	Andres Lagar-Cavilla <andreslc@google.com>
	Wed, 17 Sep 2014 17:51:48 +0000 (10:51 -0700)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 24 Sep 2014 12:07:54 +0000 (14:07 +0200)
include/linux/kvm_host.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/gup.c		patch \| blob \| history
virt/kvm/async_pf.c		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history