thp: add numa awareness to hugepage allocations

author Andrea Arcangeli <aarcange@redhat.com>

Thu, 13 Jan 2011 23:47:05 +0000 (15:47 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 14 Jan 2011 01:32:45 +0000 (17:32 -0800)
author Andrea Arcangeli <aarcange@redhat.com>
Thu, 13 Jan 2011 23:47:05 +0000 (15:47 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Jan 2011 01:32:45 +0000 (17:32 -0800)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index d95082cc6f4a44065eade40283184254ce609cd7..a3b148a918740c509494b44cf2fda690d3261f96 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -331,14 +331,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
  {
         return alloc_pages_current(gfp_mask, order);
  }
-extern struct page *alloc_page_vma(gfp_t gfp_mask,
+extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                         struct vm_area_struct *vma, unsigned long addr);
  #else
  #define alloc_pages(gfp_mask, order) \
                 alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#define alloc_pages_vma(gfp_mask, order, vma, addr)    \
+       alloc_pages(gfp_mask, order)
  #endif
  #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+#define alloc_page_vma(gfp_mask, vma, addr)    \
+       alloc_pages_vma(gfp_mask, 0, vma, addr)
  
  extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
  extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 0415a83afd66ce6ea2cd2532b8dbc4e8aae324c9..f6559e7711bd0c4316cde419cc49772e0d12d3c2 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -620,11 +620,26 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
         return ret;
  }
  
+static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+{
+       return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+}
+
+static inline struct page *alloc_hugepage_vma(int defrag,
+                                             struct vm_area_struct *vma,
+                                             unsigned long haddr)
+{
+       return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
+                              HPAGE_PMD_ORDER, vma, haddr);
+}
+
+#ifndef CONFIG_NUMA
  static inline struct page *alloc_hugepage(int defrag)
  {
-       return alloc_pages(GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT),
+       return alloc_pages(alloc_hugepage_gfpmask(defrag),
                            HPAGE_PMD_ORDER);
  }
+#endif
  
  int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unsigned long address, pmd_t *pmd,
@@ -639,7 +654,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         return VM_FAULT_OOM;
                 if (unlikely(khugepaged_enter(vma)))
                         return VM_FAULT_OOM;
-               page = alloc_hugepage(transparent_hugepage_defrag(vma));
+               page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+                                         vma, haddr);
                 if (unlikely(!page))
                         goto out;
                 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -862,7 +878,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         if (transparent_hugepage_enabled(vma) &&
             !transparent_hugepage_debug_cow())
-               new_page = alloc_hugepage(transparent_hugepage_defrag(vma));
+               new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+                                             vma, haddr);
         else
                 new_page = NULL;
  
@@ -1661,7 +1678,11 @@ static void collapse_huge_page(struct mm_struct *mm,
         unsigned long hstart, hend;
  
         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+#ifndef CONFIG_NUMA
         VM_BUG_ON(!*hpage);
+#else
+       VM_BUG_ON(*hpage);
+#endif
  
         /*
          * Prevent all access to pagetables with the exception of
@@ -1699,9 +1720,17 @@ static void collapse_huge_page(struct mm_struct *mm,
         if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
                 goto out;
  
+#ifndef CONFIG_NUMA
         new_page = *hpage;
-       if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
+#else
+       new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+       if (unlikely(!new_page)) {
+               *hpage = ERR_PTR(-ENOMEM);
                 goto out;
+       }
+#endif
+       if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
+               goto out_put_page;
  
         anon_vma_lock(vma->anon_vma);
  
@@ -1730,7 +1759,7 @@ static void collapse_huge_page(struct mm_struct *mm,
                 spin_unlock(&mm->page_table_lock);
                 anon_vma_unlock(vma->anon_vma);
                 mem_cgroup_uncharge_page(new_page);
-               goto out;
+               goto out_put_page;
         }
  
         /*
@@ -1765,10 +1794,19 @@ static void collapse_huge_page(struct mm_struct *mm,
         mm->nr_ptes--;
         spin_unlock(&mm->page_table_lock);
  
+#ifndef CONFIG_NUMA
         *hpage = NULL;
+#endif
         khugepaged_pages_collapsed++;
  out:
         up_write(&mm->mmap_sem);
+       return;
+
+out_put_page:
+#ifdef CONFIG_NUMA
+       put_page(new_page);
+#endif
+       goto out;
  }
  
  static int khugepaged_scan_pmd(struct mm_struct *mm,
@@ -2001,11 +2039,16 @@ static void khugepaged_do_scan(struct page **hpage)
         while (progress < pages) {
                 cond_resched();
  
+#ifndef CONFIG_NUMA
                 if (!*hpage) {
                         *hpage = alloc_hugepage(khugepaged_defrag());
                         if (unlikely(!*hpage))
                                 break;
                 }
+#else
+               if (IS_ERR(*hpage))
+                       break;
+#endif
  
                 spin_lock(&khugepaged_mm_lock);
                 if (!khugepaged_scan.mm_slot)
@@ -2020,37 +2063,55 @@ static void khugepaged_do_scan(struct page **hpage)
         }
  }
  
+static void khugepaged_alloc_sleep(void)
+{
+       DEFINE_WAIT(wait);
+       add_wait_queue(&khugepaged_wait, &wait);
+       schedule_timeout_interruptible(
+               msecs_to_jiffies(
+                       khugepaged_alloc_sleep_millisecs));
+       remove_wait_queue(&khugepaged_wait, &wait);
+}
+
+#ifndef CONFIG_NUMA
  static struct page *khugepaged_alloc_hugepage(void)
  {
         struct page *hpage;
  
         do {
                 hpage = alloc_hugepage(khugepaged_defrag());
-               if (!hpage) {
-                       DEFINE_WAIT(wait);
-                       add_wait_queue(&khugepaged_wait, &wait);
-                       schedule_timeout_interruptible(
-                               msecs_to_jiffies(
-                                       khugepaged_alloc_sleep_millisecs));
-                       remove_wait_queue(&khugepaged_wait, &wait);
-               }
+               if (!hpage)
+                       khugepaged_alloc_sleep();
         } while (unlikely(!hpage) &&
                  likely(khugepaged_enabled()));
         return hpage;
  }
+#endif
  
  static void khugepaged_loop(void)
  {
         struct page *hpage;
  
+#ifdef CONFIG_NUMA
+       hpage = NULL;
+#endif
         while (likely(khugepaged_enabled())) {
+#ifndef CONFIG_NUMA
                 hpage = khugepaged_alloc_hugepage();
                 if (unlikely(!hpage))
                         break;
+#else
+               if (IS_ERR(hpage)) {
+                       khugepaged_alloc_sleep();
+                       hpage = NULL;
+               }
+#endif
  
                 khugepaged_do_scan(&hpage);
+#ifndef CONFIG_NUMA
                 if (hpage)
                         put_page(hpage);
+#endif
                 if (khugepaged_has_work()) {
                         DEFINE_WAIT(wait);
                         if (!khugepaged_scan_sleep_millisecs)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 83b7df309fc4937ee4de27be733018d955980221..368fc9d23610eb57dc2359b1aba57d9458616a31 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1796,7 +1796,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  }
  
  /**
- *     alloc_page_vma  - Allocate a page for a VMA.
+ *     alloc_pages_vma - Allocate a page for a VMA.
   *
   *     @gfp:
   *      %GFP_USER    user allocation.
@@ -1805,6 +1805,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *      %GFP_FS      allocation should not call back into a file system.
   *      %GFP_ATOMIC  don't sleep.
   *
+ *     @order:Order of the GFP allocation.
   *     @vma:  Pointer to VMA or NULL if not available.
   *     @addr: Virtual Address of the allocation. Must be inside the VMA.
   *
@@ -1818,7 +1819,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *     Should be called with the mm_sem of the vma hold.
   */
  struct page *
-alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
+alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+               unsigned long addr)
  {
         struct mempolicy *pol = get_vma_policy(current, vma, addr);
         struct zonelist *zl;
@@ -1830,7 +1832,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
  
                 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
                 mpol_cond_put(pol);
-               page = alloc_page_interleave(gfp, 0, nid);
+               page = alloc_page_interleave(gfp, order, nid);
                 put_mems_allowed();
                 return page;
         }
@@ -1839,7 +1841,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
                 /*
                  * slow path: ref counted shared policy
                  */
-               struct page *page =  __alloc_pages_nodemask(gfp, 0,
+               struct page *page =  __alloc_pages_nodemask(gfp, order,
                                                 zl, policy_nodemask(gfp, pol));
                 __mpol_put(pol);
                 put_mems_allowed();
@@ -1848,7 +1850,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
         /*
          * fast path:  default or task policy
          */
-       page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
+       page = __alloc_pages_nodemask(gfp, order, zl,
+                                     policy_nodemask(gfp, pol));
         put_mems_allowed();
         return page;
  }
author	Andrea Arcangeli <aarcange@redhat.com>
	Thu, 13 Jan 2011 23:47:05 +0000 (15:47 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 14 Jan 2011 01:32:45 +0000 (17:32 -0800)
include/linux/gfp.h		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history