Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...

[mv-sheeva.git] / mm / compaction.c
diff --git a/mm/compaction.c b/mm/compaction.c

index 20011a850fef8119bfdc8cb26200446283827920..8be430b812def9f32058ffb8c7e259ff1375d98e 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -33,6 +33,7 @@ struct compact_control {
         unsigned long nr_migratepages;  /* Number of pages to migrate */
         unsigned long free_pfn;         /* isolate_freepages search base */
         unsigned long migrate_pfn;      /* isolate_migratepages search base */
+       bool sync;                      /* Synchronous migration */
  
         /* Account for isolated anon and file pages */
         unsigned long nr_anon;
@@ -41,6 +42,8 @@ struct compact_control {
         unsigned int order;             /* order a direct compactor needs */
         int migratetype;                /* MOVABLE, RECLAIMABLE etc */
         struct zone *zone;
+
+       int compact_mode;
  };
  
  static unsigned long release_freepages(struct list_head *freelist)
@@ -239,6 +242,7 @@ static unsigned long isolate_migratepages(struct zone *zone,
                                         struct compact_control *cc)
  {
         unsigned long low_pfn, end_pfn;
+       unsigned long last_pageblock_nr = 0, pageblock_nr;
         unsigned long nr_scanned = 0, nr_isolated = 0;
         struct list_head *migratelist = &cc->migratepages;
  
@@ -279,10 +283,39 @@ static unsigned long isolate_migratepages(struct zone *zone,
                 if (PageBuddy(page))
                         continue;
  
+               /*
+                * For async migration, also only scan in MOVABLE blocks. Async
+                * migration is optimistic to see if the minimum amount of work
+                * satisfies the allocation
+                */
+               pageblock_nr = low_pfn >> pageblock_order;
+               if (!cc->sync && last_pageblock_nr != pageblock_nr &&
+                               get_pageblock_migratetype(page) != MIGRATE_MOVABLE) {
+                       low_pfn += pageblock_nr_pages;
+                       low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
+                       last_pageblock_nr = pageblock_nr;
+                       continue;
+               }
+
+               if (!PageLRU(page))
+                       continue;
+
+               /*
+                * PageLRU is set, and lru_lock excludes isolation,
+                * splitting and collapsing (collapsing has already
+                * happened if PageLRU is set).
+                */
+               if (PageTransHuge(page)) {
+                       low_pfn += (1 << compound_order(page)) - 1;
+                       continue;
+               }
+
                 /* Try isolate the page */
                 if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
                         continue;
  
+               VM_BUG_ON(PageTransCompound(page));
+
                 /* Successfully isolated */
                 del_page_from_lru_list(zone, page, page_lru(page));
                 list_add(&page->lru, migratelist);
@@ -351,10 +384,10 @@ static void update_nr_listpages(struct compact_control *cc)
  }
  
  static int compact_finished(struct zone *zone,
-                                               struct compact_control *cc)
+                           struct compact_control *cc)
  {
         unsigned int order;
-       unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
+       unsigned long watermark;
  
         if (fatal_signal_pending(current))
                 return COMPACT_PARTIAL;
@@ -364,12 +397,31 @@ static int compact_finished(struct zone *zone,
                 return COMPACT_COMPLETE;
  
         /* Compaction run is not finished if the watermark is not met */
+       if (cc->compact_mode != COMPACT_MODE_KSWAPD)
+               watermark = low_wmark_pages(zone);
+       else
+               watermark = high_wmark_pages(zone);
+       watermark += (1 << cc->order);
+
         if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
                 return COMPACT_CONTINUE;
  
+       /*
+        * order == -1 is expected when compacting via
+        * /proc/sys/vm/compact_memory
+        */
         if (cc->order == -1)
                 return COMPACT_CONTINUE;
  
+       /*
+        * Generating only one page of the right order is not enough
+        * for kswapd, we must continue until we're above the high
+        * watermark as a pool for high order GFP_ATOMIC allocations
+        * too.
+        */
+       if (cc->compact_mode == COMPACT_MODE_KSWAPD)
+               return COMPACT_CONTINUE;
+
         /* Direct compactor: Is a suitable page free? */
         for (order = cc->order; order < MAX_ORDER; order++) {
                 /* Job done if page is free of the right migratetype */
@@ -384,10 +436,69 @@ static int compact_finished(struct zone *zone,
         return COMPACT_CONTINUE;
  }
  
+/*
+ * compaction_suitable: Is this suitable to run compaction on this zone now?
+ * Returns
+ *   COMPACT_SKIPPED  - If there are too few free pages for compaction
+ *   COMPACT_PARTIAL  - If the allocation would succeed without compaction
+ *   COMPACT_CONTINUE - If compaction should run now
+ */
+unsigned long compaction_suitable(struct zone *zone, int order)
+{
+       int fragindex;
+       unsigned long watermark;
+
+       /*
+        * Watermarks for order-0 must be met for compaction. Note the 2UL.
+        * This is because during migration, copies of pages need to be
+        * allocated and for a short time, the footprint is higher
+        */
+       watermark = low_wmark_pages(zone) + (2UL << order);
+       if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+               return COMPACT_SKIPPED;
+
+       /*
+        * order == -1 is expected when compacting via
+        * /proc/sys/vm/compact_memory
+        */
+       if (order == -1)
+               return COMPACT_CONTINUE;
+
+       /*
+        * fragmentation index determines if allocation failures are due to
+        * low memory or external fragmentation
+        *
+        * index of -1 implies allocations might succeed dependingon watermarks
+        * index towards 0 implies failure is due to lack of memory
+        * index towards 1000 implies failure is due to fragmentation
+        *
+        * Only compact if a failure would be due to fragmentation.
+        */
+       fragindex = fragmentation_index(zone, order);
+       if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
+               return COMPACT_SKIPPED;
+
+       if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0))
+               return COMPACT_PARTIAL;
+
+       return COMPACT_CONTINUE;
+}
+
  static int compact_zone(struct zone *zone, struct compact_control *cc)
  {
         int ret;
  
+       ret = compaction_suitable(zone, cc->order);
+       switch (ret) {
+       case COMPACT_PARTIAL:
+       case COMPACT_SKIPPED:
+               /* Compaction is likely to fail */
+               return ret;
+       case COMPACT_CONTINUE:
+               /* Fall through to compaction */
+               ;
+       }
+
         /* Setup to move all movable pages to the end of the zone */
         cc->migrate_pfn = zone->zone_start_pfn;
         cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
@@ -403,7 +514,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
  
                 nr_migrate = cc->nr_migratepages;
                 migrate_pages(&cc->migratepages, compaction_alloc,
-                                               (unsigned long)cc, 0);
+                               (unsigned long)cc, false,
+                               cc->sync);
                 update_nr_listpages(cc);
                 nr_remaining = cc->nr_migratepages;
  
@@ -429,8 +541,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
         return ret;
  }
  
-static unsigned long compact_zone_order(struct zone *zone,
-                                               int order, gfp_t gfp_mask)
+unsigned long compact_zone_order(struct zone *zone,
+                                int order, gfp_t gfp_mask,
+                                bool sync,
+                                int compact_mode)
  {
         struct compact_control cc = {
                 .nr_freepages = 0,
@@ -438,6 +552,8 @@ static unsigned long compact_zone_order(struct zone *zone,
                 .order = order,
                 .migratetype = allocflags_to_migratetype(gfp_mask),
                 .zone = zone,
+               .sync = sync,
+               .compact_mode = compact_mode,
         };
         INIT_LIST_HEAD(&cc.freepages);
         INIT_LIST_HEAD(&cc.migratepages);
@@ -453,16 +569,17 @@ int sysctl_extfrag_threshold = 500;
   * @order: The order of the current allocation
   * @gfp_mask: The GFP mask of the current allocation
   * @nodemask: The allowed nodes to allocate from
+ * @sync: Whether migration is synchronous or not
   *
   * This is the main entry point for direct page compaction.
   */
  unsigned long try_to_compact_pages(struct zonelist *zonelist,
-                       int order, gfp_t gfp_mask, nodemask_t *nodemask)
+                       int order, gfp_t gfp_mask, nodemask_t *nodemask,
+                       bool sync)
  {
         enum zone_type high_zoneidx = gfp_zone(gfp_mask);
         int may_enter_fs = gfp_mask & __GFP_FS;
         int may_perform_io = gfp_mask & __GFP_IO;
-       unsigned long watermark;
         struct zoneref *z;
         struct zone *zone;
         int rc = COMPACT_SKIPPED;
@@ -472,7 +589,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
          * made because an assumption is made that the page allocator can satisfy
          * the "cheaper" orders without taking special steps
          */
-       if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
+       if (!order || !may_enter_fs || !may_perform_io)
                 return rc;
  
         count_vm_event(COMPACTSTALL);
@@ -480,43 +597,14 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
         /* Compact each zone in the list */
         for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
                                                                 nodemask) {
-               int fragindex;
                 int status;
  
-               /*
-                * Watermarks for order-0 must be met for compaction. Note
-                * the 2UL. This is because during migration, copies of
-                * pages need to be allocated and for a short time, the
-                * footprint is higher
-                */
-               watermark = low_wmark_pages(zone) + (2UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
-                       continue;
-
-               /*
-                * fragmentation index determines if allocation failures are
-                * due to low memory or external fragmentation
-                *
-                * index of -1 implies allocations might succeed depending
-                *      on watermarks
-                * index towards 0 implies failure is due to lack of memory
-                * index towards 1000 implies failure is due to fragmentation
-                *
-                * Only compact if a failure would be due to fragmentation.
-                */
-               fragindex = fragmentation_index(zone, order);
-               if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
-                       continue;
-
-               if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
-                       rc = COMPACT_PARTIAL;
-                       break;
-               }
-
-               status = compact_zone_order(zone, order, gfp_mask);
+               status = compact_zone_order(zone, order, gfp_mask, sync,
+                                           COMPACT_MODE_DIRECT_RECLAIM);
                 rc = max(status, rc);
  
-               if (zone_watermark_ok(zone, order, watermark, 0, 0))
+               /* If a normal allocation would succeed, stop compacting */
+               if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
                         break;
         }
  
@@ -543,6 +631,7 @@ static int compact_node(int nid)
                         .nr_freepages = 0,
                         .nr_migratepages = 0,
                         .order = -1,
+                       .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
                 };
  
                 zone = &pgdat->node_zones[zoneid];