]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - mm/hugetlb.c
mm: alloc_contig_range: allow to specify GFP mask
[karo-tx-linux.git] / mm / hugetlb.c
index c7025c132670a4d8e3279d1ebb7730718fb6aa8a..2e0e8159ce8e06652f43890ffe3d7f2e0a5665f9 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
 #include <linux/node.h>
+#include <linux/userfaultfd_k.h>
 #include "internal.h"
 
 int hugepages_treat_as_movable;
@@ -1051,7 +1052,8 @@ static int __alloc_gigantic_page(unsigned long start_pfn,
                                unsigned long nr_pages)
 {
        unsigned long end_pfn = start_pfn + nr_pages;
-       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
+                                 GFP_KERNEL);
 }
 
 static bool pfn_range_valid_gigantic(struct zone *z,
@@ -3141,7 +3143,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
  * this far.
  */
-static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int hugetlb_vm_op_fault(struct vm_fault *vmf)
 {
        BUG();
        return 0;
@@ -3680,6 +3682,38 @@ retry:
                size = i_size_read(mapping->host) >> huge_page_shift(h);
                if (idx >= size)
                        goto out;
+
+               /*
+                * Check for page in userfault range
+                */
+               if (userfaultfd_missing(vma)) {
+                       u32 hash;
+                       struct vm_fault vmf = {
+                               .vma = vma,
+                               .address = address,
+                               .flags = flags,
+                               /*
+                                * Hard to debug if it ends up being
+                                * used by a callee that assumes
+                                * something about the other
+                                * uninitialized fields... same as in
+                                * memory.c
+                                */
+                       };
+
+                       /*
+                        * hugetlb_fault_mutex must be dropped before
+                        * handling userfault.  Reacquire after handling
+                        * fault to make calling code simpler.
+                        */
+                       hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
+                                                       idx, address);
+                       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+                       ret = handle_userfault(&vmf, VM_UFFD_MISSING);
+                       mutex_lock(&hugetlb_fault_mutex_table[hash]);
+                       goto out;
+               }
+
                page = alloc_huge_page(vma, address, 0);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
@@ -3948,10 +3982,113 @@ out_mutex:
        return ret;
 }
 
+/*
+ * Used by userfaultfd UFFDIO_COPY.  Based on mcopy_atomic_pte with
+ * modifications for huge pages.
+ */
+int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+                           pte_t *dst_pte,
+                           struct vm_area_struct *dst_vma,
+                           unsigned long dst_addr,
+                           unsigned long src_addr,
+                           struct page **pagep)
+{
+       int vm_shared = dst_vma->vm_flags & VM_SHARED;
+       struct hstate *h = hstate_vma(dst_vma);
+       pte_t _dst_pte;
+       spinlock_t *ptl;
+       int ret;
+       struct page *page;
+
+       if (!*pagep) {
+               ret = -ENOMEM;
+               page = alloc_huge_page(dst_vma, dst_addr, 0);
+               if (IS_ERR(page))
+                       goto out;
+
+               ret = copy_huge_page_from_user(page,
+                                               (const void __user *) src_addr,
+                                               pages_per_huge_page(h), false);
+
+               /* fallback to copy_from_user outside mmap_sem */
+               if (unlikely(ret)) {
+                       ret = -EFAULT;
+                       *pagep = page;
+                       /* don't free the page */
+                       goto out;
+               }
+       } else {
+               page = *pagep;
+               *pagep = NULL;
+       }
+
+       /*
+        * The memory barrier inside __SetPageUptodate makes sure that
+        * preceding stores to the page contents become visible before
+        * the set_pte_at() write.
+        */
+       __SetPageUptodate(page);
+       set_page_huge_active(page);
+
+       /*
+        * If shared, add to page cache
+        */
+       if (vm_shared) {
+               struct address_space *mapping = dst_vma->vm_file->f_mapping;
+               pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+
+               ret = huge_add_to_page_cache(page, mapping, idx);
+               if (ret)
+                       goto out_release_nounlock;
+       }
+
+       ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
+       spin_lock(ptl);
+
+       ret = -EEXIST;
+       if (!huge_pte_none(huge_ptep_get(dst_pte)))
+               goto out_release_unlock;
+
+       if (vm_shared) {
+               page_dup_rmap(page, true);
+       } else {
+               ClearPagePrivate(page);
+               hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+       }
+
+       _dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
+       if (dst_vma->vm_flags & VM_WRITE)
+               _dst_pte = huge_pte_mkdirty(_dst_pte);
+       _dst_pte = pte_mkyoung(_dst_pte);
+
+       set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+       (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
+                                       dst_vma->vm_flags & VM_WRITE);
+       hugetlb_count_add(pages_per_huge_page(h), dst_mm);
+
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(dst_vma, dst_addr, dst_pte);
+
+       spin_unlock(ptl);
+       if (vm_shared)
+               unlock_page(page);
+       ret = 0;
+out:
+       return ret;
+out_release_unlock:
+       spin_unlock(ptl);
+out_release_nounlock:
+       if (vm_shared)
+               unlock_page(page);
+       put_page(page);
+       goto out;
+}
+
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         struct page **pages, struct vm_area_struct **vmas,
                         unsigned long *position, unsigned long *nr_pages,
-                        long i, unsigned int flags)
+                        long i, unsigned int flags, int *nonblocking)
 {
        unsigned long pfn_offset;
        unsigned long vaddr = *position;
@@ -4014,16 +4151,43 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                    ((flags & FOLL_WRITE) &&
                      !huge_pte_write(huge_ptep_get(pte)))) {
                        int ret;
+                       unsigned int fault_flags = 0;
 
                        if (pte)
                                spin_unlock(ptl);
-                       ret = hugetlb_fault(mm, vma, vaddr,
-                               (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
-                       if (!(ret & VM_FAULT_ERROR))
-                               continue;
-
-                       remainder = 0;
-                       break;
+                       if (flags & FOLL_WRITE)
+                               fault_flags |= FAULT_FLAG_WRITE;
+                       if (nonblocking)
+                               fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+                       if (flags & FOLL_NOWAIT)
+                               fault_flags |= FAULT_FLAG_ALLOW_RETRY |
+                                       FAULT_FLAG_RETRY_NOWAIT;
+                       if (flags & FOLL_TRIED) {
+                               VM_WARN_ON_ONCE(fault_flags &
+                                               FAULT_FLAG_ALLOW_RETRY);
+                               fault_flags |= FAULT_FLAG_TRIED;
+                       }
+                       ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
+                       if (ret & VM_FAULT_ERROR) {
+                               remainder = 0;
+                               break;
+                       }
+                       if (ret & VM_FAULT_RETRY) {
+                               if (nonblocking)
+                                       *nonblocking = 0;
+                               *nr_pages = 0;
+                               /*
+                                * VM_FAULT_RETRY must not return an
+                                * error, it will return zero
+                                * instead.
+                                *
+                                * No need to update "position" as the
+                                * caller will not check it after
+                                * *nr_pages is set to 0.
+                                */
+                               return i;
+                       }
+                       continue;
                }
 
                pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
@@ -4052,6 +4216,11 @@ same_page:
                spin_unlock(ptl);
        }
        *nr_pages = remainder;
+       /*
+        * setting position is actually required only if remainder is
+        * not zero but it's faster not to add a "if (remainder)"
+        * branch.
+        */
        *position = vaddr;
 
        return i ? i : -EFAULT;