fs/nilfs2/namei.c: remove unnecessary new_valid_dev() check

[karo-tx-linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index cc97ee078f755aaa484e1ea689fa26dcc46de329..d0499fff8c7fb1ee2f33a34bd9e424420939a654 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,7 +181,7 @@ bool pm_suspended_storage(void)
  #endif /* CONFIG_PM_SLEEP */
  
  #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
-int pageblock_order __read_mostly;
+unsigned int pageblock_order __read_mostly;
  #endif
  
  static void __free_pages_ok(struct page *page, unsigned int order);
@@ -229,6 +229,17 @@ static char * const zone_names[MAX_NR_ZONES] = {
  #endif
  };
  
+compound_page_dtor * const compound_page_dtors[] = {
+       NULL,
+       free_compound_page,
+#ifdef CONFIG_HUGETLB_PAGE
+       free_huge_page,
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       free_transhuge_page,
+#endif
+};
+
  int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
  
@@ -436,39 +447,38 @@ out:
  /*
   * Higher-order pages are called "compound pages".  They are structured thusly:
   *
- * The first PAGE_SIZE page is called the "head page".
+ * The first PAGE_SIZE page is called the "head page" and have PG_head set.
   *
- * The remaining PAGE_SIZE pages are called "tail pages".
+ * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
+ * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
   *
- * All pages have PG_compound set.  All tail pages have their ->first_page
- * pointing at the head page.
+ * The first tail page's ->compound_dtor holds the offset in array of compound
+ * page destructors. See compound_page_dtors.
   *
- * The first tail page's ->lru.next holds the address of the compound page's
- * put_page() function.  Its ->lru.prev holds the order of allocation.
+ * The first tail page's ->compound_order holds the order of allocation.
   * This usage means that zero-order pages may not be compound.
   */
  
-static void free_compound_page(struct page *page)
+void free_compound_page(struct page *page)
  {
         __free_pages_ok(page, compound_order(page));
  }
  
-void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned int order)
  {
         int i;
         int nr_pages = 1 << order;
  
-       set_compound_page_dtor(page, free_compound_page);
+       set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
         set_compound_order(page, order);
         __SetPageHead(page);
         for (i = 1; i < nr_pages; i++) {
                 struct page *p = page + i;
                 set_page_count(p, 0);
-               p->first_page = page;
-               /* Make sure p->first_page is always valid for PageTail() */
-               smp_wmb();
-               __SetPageTail(p);
+               p->mapping = TAIL_MAPPING;
+               set_compound_head(p, page);
         }
+       atomic_set(compound_mapcount_ptr(page), -1);
  }
  
  #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -656,7 +666,7 @@ static inline void __free_one_page(struct page *page,
         unsigned long combined_idx;
         unsigned long uninitialized_var(buddy_idx);
         struct page *buddy;
-       int max_order = MAX_ORDER;
+       unsigned int max_order = MAX_ORDER;
  
         VM_BUG_ON(!zone_is_initialized(zone));
         VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -669,7 +679,7 @@ static inline void __free_one_page(struct page *page,
                  * pageblock. Without this, pageblock isolation
                  * could cause incorrect freepage accounting.
                  */
-               max_order = min(MAX_ORDER, pageblock_order + 1);
+               max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
         } else {
                 __mod_zone_freepage_state(zone, 1 << order, migratetype);
         }
@@ -733,7 +743,7 @@ static inline int free_pages_check(struct page *page)
         const char *bad_reason = NULL;
         unsigned long bad_flags = 0;
  
-       if (unlikely(page_mapcount(page)))
+       if (unlikely(atomic_read(&page->_mapcount) != -1))
                 bad_reason = "nonzero mapcount";
         if (unlikely(page->mapping != NULL))
                 bad_reason = "non-NULL mapping";
@@ -845,17 +855,52 @@ static void free_one_page(struct zone *zone,
  
  static int free_tail_pages_check(struct page *head_page, struct page *page)
  {
-       if (!IS_ENABLED(CONFIG_DEBUG_VM))
-               return 0;
+       int ret = 1;
+
+       /*
+        * We rely page->lru.next never has bit 0 set, unless the page
+        * is PageTail(). Let's make sure that's true even for poisoned ->lru.
+        */
+       BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
+
+       if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
+               ret = 0;
+               goto out;
+       }
+       switch (page - head_page) {
+       case 1:
+               /* the first tail page: ->mapping is compound_mapcount() */
+               if (unlikely(compound_mapcount(page))) {
+                       bad_page(page, "nonzero compound_mapcount", 0);
+                       goto out;
+               }
+               break;
+       case 2:
+               /*
+                * the second tail page: ->mapping is
+                * page_deferred_list().next -- ignore value.
+                */
+               break;
+       default:
+               if (page->mapping != TAIL_MAPPING) {
+                       bad_page(page, "corrupted mapping in tail page", 0);
+                       goto out;
+               }
+               break;
+       }
         if (unlikely(!PageTail(page))) {
                 bad_page(page, "PageTail not set", 0);
-               return 1;
+               goto out;
         }
-       if (unlikely(page->first_page != head_page)) {
-               bad_page(page, "first_page not consistent", 0);
-               return 1;
+       if (unlikely(compound_head(page) != head_page)) {
+               bad_page(page, "compound_head not consistent", 0);
+               goto out;
         }
-       return 0;
+       ret = 0;
+out:
+       page->mapping = NULL;
+       clear_compound_head(page);
+       return ret;
  }
  
  static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -922,6 +967,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
                         struct page *page = pfn_to_page(start_pfn);
  
                         init_reserved_page(start_pfn);
+
+                       /* Avoid false-positive PageTail() */
+                       INIT_LIST_HEAD(&page->lru);
+
                         SetPageReserved(page);
                 }
         }
@@ -1313,7 +1362,7 @@ static inline int check_new_page(struct page *page)
         const char *bad_reason = NULL;
         unsigned long bad_flags = 0;
  
-       if (unlikely(page_mapcount(page)))
+       if (unlikely(atomic_read(&page->_mapcount) != -1))
                 bad_reason = "nonzero mapcount";
         if (unlikely(page->mapping != NULL))
                 bad_reason = "non-NULL mapping";
@@ -1448,7 +1497,7 @@ int move_freepages(struct zone *zone,
                           int migratetype)
  {
         struct page *page;
-       unsigned long order;
+       unsigned int order;
         int pages_moved = 0;
  
  #ifndef CONFIG_HOLES_IN_ZONE
@@ -1561,7 +1610,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
  static void steal_suitable_fallback(struct zone *zone, struct page *page,
                                                           int start_type)
  {
-       int current_order = page_order(page);
+       unsigned int current_order = page_order(page);
         int pages;
  
         /* Take ownership for orders >= pageblock_order */
@@ -2248,13 +2297,13 @@ failed:
  static struct {
         struct fault_attr attr;
  
-       u32 ignore_gfp_highmem;
-       u32 ignore_gfp_reclaim;
+       bool ignore_gfp_highmem;
+       bool ignore_gfp_reclaim;
         u32 min_order;
  } fail_page_alloc = {
         .attr = FAULT_ATTR_INITIALIZER,
-       .ignore_gfp_reclaim = 1,
-       .ignore_gfp_highmem = 1,
+       .ignore_gfp_reclaim = true,
+       .ignore_gfp_highmem = true,
         .min_order = 1,
  };
  
@@ -2322,8 +2371,10 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
  #endif /* CONFIG_FAIL_PAGE_ALLOC */
  
  /*
- * Return true if free pages are above 'mark'. This takes into account the order
- * of the allocation.
+ * Return true if free base pages are above 'mark'. For high-order checks it
+ * will return true of the order-0 watermark is reached and there is at least
+ * one free page of a suitable size. Checking now avoids taking the zone lock
+ * to check in the allocation paths if no pages are free.
   */
  static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                         unsigned long mark, int classzone_idx, int alloc_flags,
@@ -2331,7 +2382,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
  {
         long min = mark;
         int o;
-       long free_cma = 0;
+       const int alloc_harder = (alloc_flags & ALLOC_HARDER);
  
         /* free_pages may go negative - that's OK */
         free_pages -= (1 << order) - 1;
@@ -2344,7 +2395,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
          * the high-atomic reserves. This will over-estimate the size of the
          * atomic reserve but it avoids a search.
          */
-       if (likely(!(alloc_flags & ALLOC_HARDER)))
+       if (likely(!alloc_harder))
                 free_pages -= z->nr_reserved_highatomic;
         else
                 min -= min / 4;
@@ -2352,22 +2403,45 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
  #ifdef CONFIG_CMA
         /* If allocation can't use CMA areas don't use free CMA pages */
         if (!(alloc_flags & ALLOC_CMA))
-               free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
+               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
  #endif
  
-       if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
+       /*
+        * Check watermarks for an order-0 allocation request. If these
+        * are not met, then a high-order request also cannot go ahead
+        * even if a suitable page happened to be free.
+        */
+       if (free_pages <= min + z->lowmem_reserve[classzone_idx])
                 return false;
-       for (o = 0; o < order; o++) {
-               /* At the next order, this order's pages become unavailable */
-               free_pages -= z->free_area[o].nr_free << o;
  
-               /* Require fewer higher order pages to be free */
-               min >>= 1;
+       /* If this is an order-0 request then the watermark is fine */
+       if (!order)
+               return true;
+
+       /* For a high-order request, check at least one suitable page is free */
+       for (o = order; o < MAX_ORDER; o++) {
+               struct free_area *area = &z->free_area[o];
+               int mt;
+
+               if (!area->nr_free)
+                       continue;
+
+               if (alloc_harder)
+                       return true;
+
+               for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
+                       if (!list_empty(&area->free_list[mt]))
+                               return true;
+               }
  
-               if (free_pages <= min)
-                       return false;
+#ifdef CONFIG_CMA
+               if ((alloc_flags & ALLOC_CMA) &&
+                   !list_empty(&area->free_list[MIGRATE_CMA])) {
+                       return true;
+               }
+#endif
         }
-       return true;
+       return false;
  }
  
  bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
@@ -2589,7 +2663,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
                 DEFAULT_RATELIMIT_INTERVAL,
                 DEFAULT_RATELIMIT_BURST);
  
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
  {
         unsigned int filter = SHOW_MEM_FILTER_NODES;
  
@@ -2623,7 +2697,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
                 va_end(args);
         }
  
-       pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+       pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
                 current->comm, order, gfp_mask);
  
         dump_stack();
@@ -3362,24 +3436,24 @@ EXPORT_SYMBOL(__free_page_frag);
  struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
  {
         struct page *page;
-       struct mem_cgroup *memcg = NULL;
  
-       if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
-               return NULL;
         page = alloc_pages(gfp_mask, order);
-       memcg_kmem_commit_charge(page, memcg, order);
+       if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+               __free_pages(page, order);
+               page = NULL;
+       }
         return page;
  }
  
  struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
  {
         struct page *page;
-       struct mem_cgroup *memcg = NULL;
  
-       if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
-               return NULL;
         page = alloc_pages_node(nid, gfp_mask, order);
-       memcg_kmem_commit_charge(page, memcg, order);
+       if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+               __free_pages(page, order);
+               page = NULL;
+       }
         return page;
  }
  
@@ -3389,7 +3463,7 @@ struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
   */
  void __free_kmem_pages(struct page *page, unsigned int order)
  {
-       memcg_kmem_uncharge_pages(page, order);
+       memcg_kmem_uncharge(page, order);
         __free_pages(page, order);
  }
  
@@ -3401,7 +3475,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
         }
  }
  
-static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+static void *make_alloc_exact(unsigned long addr, unsigned int order,
+               size_t size)
  {
         if (addr) {
                 unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3451,7 +3526,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
   */
  void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
  {
-       unsigned order = get_order(size);
+       unsigned int order = get_order(size);
         struct page *p = alloc_pages_node(nid, gfp_mask, order);
         if (!p)
                 return NULL;
@@ -3752,7 +3827,8 @@ void show_free_areas(unsigned int filter)
         }
  
         for_each_populated_zone(zone) {
-               unsigned long nr[MAX_ORDER], flags, order, total = 0;
+               unsigned int order;
+               unsigned long nr[MAX_ORDER], flags, total = 0;
                 unsigned char types[MAX_ORDER];
  
                 if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4101,7 +4177,7 @@ static void build_zonelists(pg_data_t *pgdat)
         nodemask_t used_mask;
         int local_node, prev_node;
         struct zonelist *zonelist;
-       int order = current_zonelist_order;
+       unsigned int order = current_zonelist_order;
  
         /* initialize zonelists */
         for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -5216,6 +5292,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
  
  static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
  {
+       unsigned long __maybe_unused offset = 0;
+
         /* Skip empty nodes */
         if (!pgdat->node_spanned_pages)
                 return;
@@ -5232,6 +5310,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                  * for the buddy allocator to function correctly.
                  */
                 start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+               offset = pgdat->node_start_pfn - start;
                 end = pgdat_end_pfn(pgdat);
                 end = ALIGN(end, MAX_ORDER_NR_PAGES);
                 size =  (end - start) * sizeof(struct page);
@@ -5239,7 +5318,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                 if (!map)
                         map = memblock_virt_alloc_node_nopanic(size,
                                                                pgdat->node_id);
-               pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
+               pgdat->node_mem_map = map + offset;
         }
  #ifndef CONFIG_NEED_MULTIPLE_NODES
         /*
@@ -5247,9 +5326,9 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
          */
         if (pgdat == NODE_DATA(0)) {
                 mem_map = NODE_DATA(0)->node_mem_map;
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
                 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
-                       mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
+                       mem_map -= offset;
  #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
         }
  #endif
@@ -5461,13 +5540,17 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                  */
                 required_movablecore =
                         roundup(required_movablecore, MAX_ORDER_NR_PAGES);
+               required_movablecore = min(totalpages, required_movablecore);
                 corepages = totalpages - required_movablecore;
  
                 required_kernelcore = max(required_kernelcore, corepages);
         }
  
-       /* If kernelcore was not specified, there is no ZONE_MOVABLE */
-       if (!required_kernelcore)
+       /*
+        * If kernelcore was not specified or kernelcore size is larger
+        * than totalpages, there is no ZONE_MOVABLE.
+        */
+       if (!required_kernelcore || required_kernelcore >= totalpages)
                 goto out;
  
         /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
@@ -6623,7 +6706,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
                        unsigned migratetype)
  {
         unsigned long outer_start, outer_end;
-       int ret = 0, order;
+       unsigned int order;
+       int ret = 0;
  
         struct compact_control cc = {
                 .nr_migratepages = 0,