hugetlb: add support for gigantic page allocation at runtime

author Luiz Capitulino <lcapitulino@redhat.com>

Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)
author Luiz Capitulino <lcapitulino@redhat.com>
Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 13eef24cf1f329c82fac44bd5507148531a2833f..86591b4ccdc50746c82de93eefbb300732d581f6 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -681,11 +681,150 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                 ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
                 nr_nodes--)
  
+#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+static void destroy_compound_gigantic_page(struct page *page,
+                                       unsigned long order)
+{
+       int i;
+       int nr_pages = 1 << order;
+       struct page *p = page + 1;
+
+       for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+               __ClearPageTail(p);
+               set_page_refcounted(p);
+               p->first_page = NULL;
+       }
+
+       set_compound_order(page, 0);
+       __ClearPageHead(page);
+}
+
+static void free_gigantic_page(struct page *page, unsigned order)
+{
+       free_contig_range(page_to_pfn(page), 1 << order);
+}
+
+static int __alloc_gigantic_page(unsigned long start_pfn,
+                               unsigned long nr_pages)
+{
+       unsigned long end_pfn = start_pfn + nr_pages;
+       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+}
+
+static bool pfn_range_valid_gigantic(unsigned long start_pfn,
+                               unsigned long nr_pages)
+{
+       unsigned long i, end_pfn = start_pfn + nr_pages;
+       struct page *page;
+
+       for (i = start_pfn; i < end_pfn; i++) {
+               if (!pfn_valid(i))
+                       return false;
+
+               page = pfn_to_page(i);
+
+               if (PageReserved(page))
+                       return false;
+
+               if (page_count(page) > 0)
+                       return false;
+
+               if (PageHuge(page))
+                       return false;
+       }
+
+       return true;
+}
+
+static bool zone_spans_last_pfn(const struct zone *zone,
+                       unsigned long start_pfn, unsigned long nr_pages)
+{
+       unsigned long last_pfn = start_pfn + nr_pages - 1;
+       return zone_spans_pfn(zone, last_pfn);
+}
+
+static struct page *alloc_gigantic_page(int nid, unsigned order)
+{
+       unsigned long nr_pages = 1 << order;
+       unsigned long ret, pfn, flags;
+       struct zone *z;
+
+       z = NODE_DATA(nid)->node_zones;
+       for (; z - NODE_DATA(nid)->node_zones < MAX_NR_ZONES; z++) {
+               spin_lock_irqsave(&z->lock, flags);
+
+               pfn = ALIGN(z->zone_start_pfn, nr_pages);
+               while (zone_spans_last_pfn(z, pfn, nr_pages)) {
+                       if (pfn_range_valid_gigantic(pfn, nr_pages)) {
+                               /*
+                                * We release the zone lock here because
+                                * alloc_contig_range() will also lock the zone
+                                * at some point. If there's an allocation
+                                * spinning on this lock, it may win the race
+                                * and cause alloc_contig_range() to fail...
+                                */
+                               spin_unlock_irqrestore(&z->lock, flags);
+                               ret = __alloc_gigantic_page(pfn, nr_pages);
+                               if (!ret)
+                                       return pfn_to_page(pfn);
+                               spin_lock_irqsave(&z->lock, flags);
+                       }
+                       pfn += nr_pages;
+               }
+
+               spin_unlock_irqrestore(&z->lock, flags);
+       }
+
+       return NULL;
+}
+
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
+static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+
+static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
+{
+       struct page *page;
+
+       page = alloc_gigantic_page(nid, huge_page_order(h));
+       if (page) {
+               prep_compound_gigantic_page(page, huge_page_order(h));
+               prep_new_huge_page(h, page, nid);
+       }
+
+       return page;
+}
+
+static int alloc_fresh_gigantic_page(struct hstate *h,
+                               nodemask_t *nodes_allowed)
+{
+       struct page *page = NULL;
+       int nr_nodes, node;
+
+       for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+               page = alloc_fresh_gigantic_page_node(h, node);
+               if (page)
+                       return 1;
+       }
+
+       return 0;
+}
+
+static inline bool gigantic_page_supported(void) { return true; }
+#else
+static inline bool gigantic_page_supported(void) { return false; }
+static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void destroy_compound_gigantic_page(struct page *page,
+                                               unsigned long order) { }
+static inline int alloc_fresh_gigantic_page(struct hstate *h,
+                                       nodemask_t *nodes_allowed) { return 0; }
+#endif
+
  static void update_and_free_page(struct hstate *h, struct page *page)
  {
         int i;
  
-       VM_BUG_ON(hstate_is_gigantic(h));
+       if (hstate_is_gigantic(h) && !gigantic_page_supported())
+               return;
  
         h->nr_huge_pages--;
         h->nr_huge_pages_node[page_to_nid(page)]--;
@@ -698,8 +837,13 @@ static void update_and_free_page(struct hstate *h, struct page *page)
         VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
         set_compound_page_dtor(page, NULL);
         set_page_refcounted(page);
-       arch_release_hugepage(page);
-       __free_pages(page, huge_page_order(h));
+       if (hstate_is_gigantic(h)) {
+               destroy_compound_gigantic_page(page, huge_page_order(h));
+               free_gigantic_page(page, huge_page_order(h));
+       } else {
+               arch_release_hugepage(page);
+               __free_pages(page, huge_page_order(h));
+       }
  }
  
  struct hstate *size_to_hstate(unsigned long size)
@@ -738,7 +882,7 @@ static void free_huge_page(struct page *page)
         if (restore_reserve)
                 h->resv_huge_pages++;
  
-       if (h->surplus_huge_pages_node[nid] && !hstate_is_gigantic(h)) {
+       if (h->surplus_huge_pages_node[nid]) {
                 /* remove the page from active list */
                 list_del(&page->lru);
                 update_and_free_page(h, page);
@@ -842,9 +986,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
  {
         struct page *page;
  
-       if (hstate_is_gigantic(h))
-               return NULL;
-
         page = alloc_pages_exact_node(nid,
                 htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                                                 __GFP_REPEAT|__GFP_NOWARN,
@@ -1479,7 +1620,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
  {
         unsigned long min_count, ret;
  
-       if (hstate_is_gigantic(h))
+       if (hstate_is_gigantic(h) && !gigantic_page_supported())
                 return h->max_huge_pages;
  
         /*
@@ -1506,7 +1647,11 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                  * and reducing the surplus.
                  */
                 spin_unlock(&hugetlb_lock);
-               ret = alloc_fresh_huge_page(h, nodes_allowed);
+               if (hstate_is_gigantic(h)) {
+                       ret = alloc_fresh_gigantic_page(h, nodes_allowed);
+               } else {
+                       ret = alloc_fresh_huge_page(h, nodes_allowed);
+               }
                 spin_lock(&hugetlb_lock);
                 if (!ret)
                         goto out;
@@ -1606,7 +1751,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
                 goto out;
  
         h = kobj_to_hstate(kobj, &nid);
-       if (hstate_is_gigantic(h)) {
+       if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                 err = -EINVAL;
                 goto out;
         }
@@ -2113,7 +2258,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
  
         tmp = h->max_huge_pages;
  
-       if (write && hstate_is_gigantic(h))
+       if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
                 return -EINVAL;
  
         table->data = &tmp;
author	Luiz Capitulino <lcapitulino@redhat.com>
	Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Wed, 14 May 2014 00:01:45 +0000 (10:01 +1000)