mm: collect LRU list heads into struct lruvec

[linux-beck.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 93baebcc06f3810a4db2f8393213a69371a367f6..25c248eb7d5ff3e7a3b074f01fe9e08ad86c0911 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -97,6 +97,14 @@ EXPORT_SYMBOL(node_states);
  
  unsigned long totalram_pages __read_mostly;
  unsigned long totalreserve_pages __read_mostly;
+/*
+ * When calculating the number of globally allowed dirty pages, there
+ * is a certain number of per-zone reserves that should not be
+ * considered dirtyable memory.  This is the sum of those reserves
+ * over all existing zones that contribute dirtyable memory.
+ */
+unsigned long dirty_balance_reserve __read_mostly;
+
  int percpu_pagelist_fraction;
  gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
  
@@ -722,32 +730,23 @@ static void __free_pages_ok(struct page *page, unsigned int order)
         local_irq_restore(flags);
  }
  
-/*
- * permit the bootmem allocator to evade page validation on high-order frees
- */
  void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
  {
-       if (order == 0) {
-               __ClearPageReserved(page);
-               set_page_count(page, 0);
-               set_page_refcounted(page);
-               __free_page(page);
-       } else {
-               int loop;
-
-               prefetchw(page);
-               for (loop = 0; loop < (1 << order); loop++) {
-                       struct page *p = &page[loop];
+       unsigned int nr_pages = 1 << order;
+       unsigned int loop;
  
-                       if (loop + 1 < (1 << order))
-                               prefetchw(p + 1);
-                       __ClearPageReserved(p);
-                       set_page_count(p, 0);
-               }
+       prefetchw(page);
+       for (loop = 0; loop < nr_pages; loop++) {
+               struct page *p = &page[loop];
  
-               set_page_refcounted(page);
-               __free_pages(page, order);
+               if (loop + 1 < nr_pages)
+                       prefetchw(p + 1);
+               __ClearPageReserved(p);
+               set_page_count(p, 0);
         }
+
+       set_page_refcounted(page);
+       __free_pages(page, order);
  }
  
  
@@ -1517,7 +1516,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
         long min = mark;
         int o;
  
-       free_pages -= (1 << order) + 1;
+       free_pages -= (1 << order) - 1;
         if (alloc_flags & ALLOC_HIGH)
                 min -= min / 2;
         if (alloc_flags & ALLOC_HARDER)
@@ -1727,6 +1726,35 @@ zonelist_scan:
                 if ((alloc_flags & ALLOC_CPUSET) &&
                         !cpuset_zone_allowed_softwall(zone, gfp_mask))
                                 continue;
+               /*
+                * When allocating a page cache page for writing, we
+                * want to get it from a zone that is within its dirty
+                * limit, such that no single zone holds more than its
+                * proportional share of globally allowed dirty pages.
+                * The dirty limits take into account the zone's
+                * lowmem reserves and high watermark so that kswapd
+                * should be able to balance it without having to
+                * write pages from its LRU list.
+                *
+                * This may look like it could increase pressure on
+                * lower zones by failing allocations in higher zones
+                * before they are full.  But the pages that do spill
+                * over are limited as the lower zones are protected
+                * by this very same mechanism.  It should not become
+                * a practical burden to them.
+                *
+                * XXX: For now, allow allocations to potentially
+                * exceed the per-zone dirty limit in the slowpath
+                * (ALLOC_WMARK_LOW unset) before going into reclaim,
+                * which is important when on a NUMA setup the allowed
+                * zones are together not big enough to reach the
+                * global limit.  The proper fix for these situations
+                * will require awareness of zones in the
+                * dirty-throttling and the flusher threads.
+                */
+               if ((alloc_flags & ALLOC_WMARK_LOW) &&
+                   (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
+                       goto this_zone_full;
  
                 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
@@ -4260,7 +4288,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
  
                 zone_pcp_init(zone);
                 for_each_lru(l)
-                       INIT_LIST_HEAD(&zone->lru[l].list);
+                       INIT_LIST_HEAD(&zone->lruvec.lists[l]);
                 zone->reclaim_stat.recent_rotated[0] = 0;
                 zone->reclaim_stat.recent_rotated[1] = 0;
                 zone->reclaim_stat.recent_scanned[0] = 0;
@@ -4822,8 +4850,19 @@ static void calculate_totalreserve_pages(void)
                         if (max > zone->present_pages)
                                 max = zone->present_pages;
                         reserve_pages += max;
+                       /*
+                        * Lowmem reserves are not available to
+                        * GFP_HIGHUSER page cache allocations and
+                        * kswapd tries to balance zones to their high
+                        * watermark.  As a result, neither should be
+                        * regarded as dirtyable memory, to prevent a
+                        * situation where reclaim has to clean pages
+                        * in order to balance the zones.
+                        */
+                       zone->dirty_balance_reserve = max;
                 }
         }
+       dirty_balance_reserve = reserve_pages;
         totalreserve_pages = reserve_pages;
  }