]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - mm/memory.c
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[mv-sheeva.git] / mm / memory.c
index 02e48aa0ed136ff8e4d808d954a20d0b46e6d23d..5823698c2b71a9dd79b43986c9929f2cd7485a04 100644 (file)
@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
        }
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address)
 {
        pgtable_t new = pte_alloc_one(mm, address);
+       int wait_split_huge_page;
        if (!new)
                return -ENOMEM;
 
@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
        smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
 
        spin_lock(&mm->page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       wait_split_huge_page = 0;
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                mm->nr_ptes++;
                pmd_populate(mm, pmd, new);
                new = NULL;
-       }
+       } else if (unlikely(pmd_trans_splitting(*pmd)))
+               wait_split_huge_page = 1;
        spin_unlock(&mm->page_table_lock);
        if (new)
                pte_free(mm, new);
+       if (wait_split_huge_page)
+               wait_split_huge_page(vma->anon_vma, pmd);
        return 0;
 }
 
@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
        smp_wmb(); /* See comment in __pte_alloc */
 
        spin_lock(&init_mm.page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
-       }
+       } else
+               VM_BUG_ON(pmd_trans_splitting(*pmd));
        spin_unlock(&init_mm.page_table_lock);
        if (new)
                pte_free_kernel(&init_mm, new);
@@ -719,9 +726,9 @@ out_set_pte:
        return 0;
 }
 
-static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
-               unsigned long addr, unsigned long end)
+int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+                  pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+                  unsigned long addr, unsigned long end)
 {
        pte_t *orig_src_pte, *orig_dst_pte;
        pte_t *src_pte, *dst_pte;
@@ -795,6 +802,17 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
        src_pmd = pmd_offset(src_pud, addr);
        do {
                next = pmd_addr_end(addr, end);
+               if (pmd_trans_huge(*src_pmd)) {
+                       int err;
+                       VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
+                       err = copy_huge_pmd(dst_mm, src_mm,
+                                           dst_pmd, src_pmd, addr, vma);
+                       if (err == -ENOMEM)
+                               return -ENOMEM;
+                       if (!err)
+                               continue;
+                       /* fall through */
+               }
                if (pmd_none_or_clear_bad(src_pmd))
                        continue;
                if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
@@ -997,6 +1015,16 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
+               if (pmd_trans_huge(*pmd)) {
+                       if (next-addr != HPAGE_PMD_SIZE) {
+                               VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
+                               split_huge_page_pmd(vma->vm_mm, pmd);
+                       } else if (zap_huge_pmd(tlb, vma, pmd)) {
+                               (*zap_work)--;
+                               continue;
+                       }
+                       /* fall through */
+               }
                if (pmd_none_or_clear_bad(pmd)) {
                        (*zap_work)--;
                        continue;
@@ -1262,7 +1290,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        pud = pud_offset(pgd, address);
        if (pud_none(*pud))
                goto no_page_table;
-       if (pud_huge(*pud)) {
+       if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
                BUG_ON(flags & FOLL_GET);
                page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
                goto out;
@@ -1273,11 +1301,32 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        pmd = pmd_offset(pud, address);
        if (pmd_none(*pmd))
                goto no_page_table;
-       if (pmd_huge(*pmd)) {
+       if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
                BUG_ON(flags & FOLL_GET);
                page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
                goto out;
        }
+       if (pmd_trans_huge(*pmd)) {
+               if (flags & FOLL_SPLIT) {
+                       split_huge_page_pmd(mm, pmd);
+                       goto split_fallthrough;
+               }
+               spin_lock(&mm->page_table_lock);
+               if (likely(pmd_trans_huge(*pmd))) {
+                       if (unlikely(pmd_trans_splitting(*pmd))) {
+                               spin_unlock(&mm->page_table_lock);
+                               wait_split_huge_page(vma->anon_vma, pmd);
+                       } else {
+                               page = follow_trans_huge_pmd(mm, address,
+                                                            pmd, flags);
+                               spin_unlock(&mm->page_table_lock);
+                               goto out;
+                       }
+               } else
+                       spin_unlock(&mm->page_table_lock);
+               /* fall through */
+       }
+split_fallthrough:
        if (unlikely(pmd_bad(*pmd)))
                goto no_page_table;
 
@@ -1310,6 +1359,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                 */
                mark_page_accessed(page);
        }
+       if (flags & FOLL_MLOCK) {
+               /*
+                * The preliminary mapping check is mainly to avoid the
+                * pointless overhead of lock_page on the ZERO_PAGE
+                * which might bounce very badly if there is contention.
+                *
+                * If the page is already locked, we don't need to
+                * handle it now - vmscan will handle it later if and
+                * when it attempts to reclaim the page.
+                */
+               if (page->mapping && trylock_page(page)) {
+                       lru_add_drain();  /* push cached pages to LRU */
+                       /*
+                        * Because we lock page here and migration is
+                        * blocked by the pte's page reference, we need
+                        * only check for file-cache page truncation.
+                        */
+                       if (page->mapping)
+                               mlock_vma_page(page);
+                       unlock_page(page);
+               }
+       }
 unlock:
        pte_unmap_unlock(ptep, ptl);
 out:
@@ -1341,7 +1412,8 @@ no_page_table:
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long start, int nr_pages, unsigned int gup_flags,
-                    struct page **pages, struct vm_area_struct **vmas)
+                    struct page **pages, struct vm_area_struct **vmas,
+                    int *nonblocking)
 {
        int i;
        unsigned long vm_flags;
@@ -1386,6 +1458,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        pmd = pmd_offset(pud, pg);
                        if (pmd_none(*pmd))
                                return i ? : -EFAULT;
+                       VM_BUG_ON(pmd_trans_huge(*pmd));
                        pte = pte_offset_map(pmd, pg);
                        if (pte_none(*pte)) {
                                pte_unmap(pte);
@@ -1441,10 +1514,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        cond_resched();
                        while (!(page = follow_page(vma, start, foll_flags))) {
                                int ret;
+                               unsigned int fault_flags = 0;
+
+                               if (foll_flags & FOLL_WRITE)
+                                       fault_flags |= FAULT_FLAG_WRITE;
+                               if (nonblocking)
+                                       fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 
                                ret = handle_mm_fault(mm, vma, start,
-                                       (foll_flags & FOLL_WRITE) ?
-                                       FAULT_FLAG_WRITE : 0);
+                                                       fault_flags);
 
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM)
@@ -1460,6 +1538,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                else
                                        tsk->min_flt++;
 
+                               if (ret & VM_FAULT_RETRY) {
+                                       *nonblocking = 0;
+                                       return i;
+                               }
+
                                /*
                                 * The VM_FAULT_WRITE bit tells us that
                                 * do_wp_page has broken COW when necessary,
@@ -1559,7 +1642,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        if (force)
                flags |= FOLL_FORCE;
 
-       return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
+       return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
+                               NULL);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -1584,7 +1668,8 @@ struct page *get_dump_page(unsigned long addr)
        struct page *page;
 
        if (__get_user_pages(current, current->mm, addr, 1,
-                       FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
+                            FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
+                            NULL) < 1)
                return NULL;
        flush_cache_page(vma, addr, page_to_pfn(page));
        return page;
@@ -1598,8 +1683,10 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
        pud_t * pud = pud_alloc(mm, pgd, addr);
        if (pud) {
                pmd_t * pmd = pmd_alloc(mm, pud, addr);
-               if (pmd)
+               if (pmd) {
+                       VM_BUG_ON(pmd_trans_huge(*pmd));
                        return pte_alloc_map_lock(mm, pmd, addr, ptl);
+               }
        }
        return NULL;
 }
@@ -1818,6 +1905,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
        pmd = pmd_alloc(mm, pud, addr);
        if (!pmd)
                return -ENOMEM;
+       VM_BUG_ON(pmd_trans_huge(*pmd));
        do {
                next = pmd_addr_end(addr, end);
                if (remap_pte_range(mm, pmd, addr, next,
@@ -2048,19 +2136,6 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
        return same;
 }
 
-/*
- * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
- * servicing faults for write access.  In the normal case, do always want
- * pte_mkwrite.  But get_user_pages can cause write faults for mappings
- * that do not have writing enabled, when used by access_process_vm.
- */
-static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
-{
-       if (likely(vma->vm_flags & VM_WRITE))
-               pte = pte_mkwrite(pte);
-       return pte;
-}
-
 static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
 {
        /*
@@ -2112,7 +2187,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct page *old_page, *new_page;
        pte_t entry;
-       int reuse = 0, ret = 0;
+       int ret = 0;
        int page_mkwrite = 0;
        struct page *dirty_page = NULL;
 
@@ -2144,19 +2219,20 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                                         &ptl);
                        if (!pte_same(*page_table, orig_pte)) {
                                unlock_page(old_page);
-                               page_cache_release(old_page);
                                goto unlock;
                        }
                        page_cache_release(old_page);
                }
-               reuse = reuse_swap_page(old_page);
-               if (reuse)
+               if (reuse_swap_page(old_page)) {
                        /*
                         * The page is all ours.  Move it to our anon_vma so
                         * the rmap code will not search our parent or siblings.
                         * Protected against the rmap code by the page lock.
                         */
                        page_move_anon_rmap(old_page, vma, address);
+                       unlock_page(old_page);
+                       goto reuse;
+               }
                unlock_page(old_page);
        } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
                                        (VM_WRITE|VM_SHARED))) {
@@ -2212,7 +2288,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                                         &ptl);
                        if (!pte_same(*page_table, orig_pte)) {
                                unlock_page(old_page);
-                               page_cache_release(old_page);
                                goto unlock;
                        }
 
@@ -2220,18 +2295,52 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                }
                dirty_page = old_page;
                get_page(dirty_page);
-               reuse = 1;
-       }
 
-       if (reuse) {
 reuse:
                flush_cache_page(vma, address, pte_pfn(orig_pte));
                entry = pte_mkyoung(orig_pte);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                if (ptep_set_access_flags(vma, address, page_table, entry,1))
                        update_mmu_cache(vma, address, page_table);
+               pte_unmap_unlock(page_table, ptl);
                ret |= VM_FAULT_WRITE;
-               goto unlock;
+
+               if (!dirty_page)
+                       return ret;
+
+               /*
+                * Yes, Virginia, this is actually required to prevent a race
+                * with clear_page_dirty_for_io() from clearing the page dirty
+                * bit after it clear all dirty ptes, but before a racing
+                * do_wp_page installs a dirty pte.
+                *
+                * do_no_page is protected similarly.
+                */
+               if (!page_mkwrite) {
+                       wait_on_page_locked(dirty_page);
+                       set_page_dirty_balance(dirty_page, page_mkwrite);
+               }
+               put_page(dirty_page);
+               if (page_mkwrite) {
+                       struct address_space *mapping = dirty_page->mapping;
+
+                       set_page_dirty(dirty_page);
+                       unlock_page(dirty_page);
+                       page_cache_release(dirty_page);
+                       if (mapping)    {
+                               /*
+                                * Some device drivers do not set page.mapping
+                                * but still dirty their pages
+                                */
+                               balance_dirty_pages_ratelimited(mapping);
+                       }
+               }
+
+               /* file_update_time outside page_lock */
+               if (vma->vm_file)
+                       file_update_time(vma->vm_file);
+
+               return ret;
        }
 
        /*
@@ -2256,16 +2365,6 @@ gotten:
        }
        __SetPageUptodate(new_page);
 
-       /*
-        * Don't let another task, with possibly unlocked vma,
-        * keep the mlocked page.
-        */
-       if ((vma->vm_flags & VM_LOCKED) && old_page) {
-               lock_page(old_page);    /* for LRU manipulation */
-               clear_page_mlock(old_page);
-               unlock_page(old_page);
-       }
-
        if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
                goto oom_free_new;
 
@@ -2333,42 +2432,19 @@ gotten:
 
        if (new_page)
                page_cache_release(new_page);
-       if (old_page)
-               page_cache_release(old_page);
 unlock:
        pte_unmap_unlock(page_table, ptl);
-       if (dirty_page) {
+       if (old_page) {
                /*
-                * Yes, Virginia, this is actually required to prevent a race
-                * with clear_page_dirty_for_io() from clearing the page dirty
-                * bit after it clear all dirty ptes, but before a racing
-                * do_wp_page installs a dirty pte.
-                *
-                * do_no_page is protected similarly.
+                * Don't let another task, with possibly unlocked vma,
+                * keep the mlocked page.
                 */
-               if (!page_mkwrite) {
-                       wait_on_page_locked(dirty_page);
-                       set_page_dirty_balance(dirty_page, page_mkwrite);
-               }
-               put_page(dirty_page);
-               if (page_mkwrite) {
-                       struct address_space *mapping = dirty_page->mapping;
-
-                       set_page_dirty(dirty_page);
-                       unlock_page(dirty_page);
-                       page_cache_release(dirty_page);
-                       if (mapping)    {
-                               /*
-                                * Some device drivers do not set page.mapping
-                                * but still dirty their pages
-                                */
-                               balance_dirty_pages_ratelimited(mapping);
-                       }
+               if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) {
+                       lock_page(old_page);    /* LRU manipulation */
+                       munlock_vma_page(old_page);
+                       unlock_page(old_page);
                }
-
-               /* file_update_time outside page_lock */
-               if (vma->vm_file)
-                       file_update_time(vma->vm_file);
+               page_cache_release(old_page);
        }
        return ret;
 oom_free_new:
@@ -2572,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
                details.last_index = ULONG_MAX;
        details.i_mmap_lock = &mapping->i_mmap_lock;
 
+       mutex_lock(&mapping->unmap_mutex);
        spin_lock(&mapping->i_mmap_lock);
 
        /* Protect against endless unmapping loops */
@@ -2588,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
        if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
                unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
        spin_unlock(&mapping->i_mmap_lock);
+       mutex_unlock(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
@@ -2975,12 +3053,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                goto out;
                        }
                        charged = 1;
-                       /*
-                        * Don't let another task, with possibly unlocked vma,
-                        * keep the mlocked page.
-                        */
-                       if (vma->vm_flags & VM_LOCKED)
-                               clear_page_mlock(vmf.page);
                        copy_user_highpage(page, vmf.page, address, vma);
                        __SetPageUptodate(page);
                } else {
@@ -3147,9 +3219,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static inline int handle_pte_fault(struct mm_struct *mm,
-               struct vm_area_struct *vma, unsigned long address,
-               pte_t *pte, pmd_t *pmd, unsigned int flags)
+int handle_pte_fault(struct mm_struct *mm,
+                    struct vm_area_struct *vma, unsigned long address,
+                    pte_t *pte, pmd_t *pmd, unsigned int flags)
 {
        pte_t entry;
        spinlock_t *ptl;
@@ -3228,9 +3300,40 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        pmd = pmd_alloc(mm, pud, address);
        if (!pmd)
                return VM_FAULT_OOM;
-       pte = pte_alloc_map(mm, pmd, address);
-       if (!pte)
+       if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
+               if (!vma->vm_ops)
+                       return do_huge_pmd_anonymous_page(mm, vma, address,
+                                                         pmd, flags);
+       } else {
+               pmd_t orig_pmd = *pmd;
+               barrier();
+               if (pmd_trans_huge(orig_pmd)) {
+                       if (flags & FAULT_FLAG_WRITE &&
+                           !pmd_write(orig_pmd) &&
+                           !pmd_trans_splitting(orig_pmd))
+                               return do_huge_pmd_wp_page(mm, vma, address,
+                                                          pmd, orig_pmd);
+                       return 0;
+               }
+       }
+
+       /*
+        * Use __pte_alloc instead of pte_alloc_map, because we can't
+        * run pte_offset_map on the pmd, if an huge pmd could
+        * materialize from under us from a different thread.
+        */
+       if (unlikely(__pte_alloc(mm, vma, pmd, address)))
                return VM_FAULT_OOM;
+       /* if an huge pmd materialized from under us just retry later */
+       if (unlikely(pmd_trans_huge(*pmd)))
+               return 0;
+       /*
+        * A regular pmd is established and it can't morph into a huge pmd
+        * from under us anymore at this point because we hold the mmap_sem
+        * read mode and khugepaged takes it in write mode. So now it's
+        * safe to run pte_offset_map().
+        */
+       pte = pte_offset_map(pmd, address);
 
        return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
@@ -3296,7 +3399,12 @@ int make_pages_present(unsigned long addr, unsigned long end)
        vma = find_vma(current->mm, addr);
        if (!vma)
                return -ENOMEM;
-       write = (vma->vm_flags & VM_WRITE) != 0;
+       /*
+        * We want to touch writable mappings with a write fault in order
+        * to break COW, except for shared mappings because these don't COW
+        * and we would not want to dirty them for nothing.
+        */
+       write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
        BUG_ON(addr >= end);
        BUG_ON(end > vma->vm_end);
        len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
@@ -3368,6 +3476,7 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
                goto out;
 
        pmd = pmd_offset(pud, address);
+       VM_BUG_ON(pmd_trans_huge(*pmd));
        if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
                goto out;
 
@@ -3608,3 +3717,74 @@ void might_fault(void)
 }
 EXPORT_SYMBOL(might_fault);
 #endif
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+static void clear_gigantic_page(struct page *page,
+                               unsigned long addr,
+                               unsigned int pages_per_huge_page)
+{
+       int i;
+       struct page *p = page;
+
+       might_sleep();
+       for (i = 0; i < pages_per_huge_page;
+            i++, p = mem_map_next(p, page, i)) {
+               cond_resched();
+               clear_user_highpage(p, addr + i * PAGE_SIZE);
+       }
+}
+void clear_huge_page(struct page *page,
+                    unsigned long addr, unsigned int pages_per_huge_page)
+{
+       int i;
+
+       if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+               clear_gigantic_page(page, addr, pages_per_huge_page);
+               return;
+       }
+
+       might_sleep();
+       for (i = 0; i < pages_per_huge_page; i++) {
+               cond_resched();
+               clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+       }
+}
+
+static void copy_user_gigantic_page(struct page *dst, struct page *src,
+                                   unsigned long addr,
+                                   struct vm_area_struct *vma,
+                                   unsigned int pages_per_huge_page)
+{
+       int i;
+       struct page *dst_base = dst;
+       struct page *src_base = src;
+
+       for (i = 0; i < pages_per_huge_page; ) {
+               cond_resched();
+               copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
+
+               i++;
+               dst = mem_map_next(dst, dst_base, i);
+               src = mem_map_next(src, src_base, i);
+       }
+}
+
+void copy_user_huge_page(struct page *dst, struct page *src,
+                        unsigned long addr, struct vm_area_struct *vma,
+                        unsigned int pages_per_huge_page)
+{
+       int i;
+
+       if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+               copy_user_gigantic_page(dst, src, addr, vma,
+                                       pages_per_huge_page);
+               return;
+       }
+
+       might_sleep();
+       for (i = 0; i < pages_per_huge_page; i++) {
+               cond_resched();
+               copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
+       }
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */