]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - mm/page_alloc.c
thp: don't alloc harder for gfp nomemalloc even if nowait
[mv-sheeva.git] / mm / page_alloc.c
index 826ba6922e84a24b53a3c4999345f85a4f3dea02..e7664b9f706c39cd999784dc4423e8d5dcaf03d5 100644 (file)
@@ -357,6 +357,7 @@ void prep_compound_page(struct page *page, unsigned long order)
        }
 }
 
+/* update __split_huge_page_refcount if you change this function */
 static int destroy_compound_page(struct page *page, unsigned long order)
 {
        int i;
@@ -651,13 +652,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
        trace_mm_page_free_direct(page, order);
        kmemcheck_free_shadow(page, order);
 
-       for (i = 0; i < (1 << order); i++) {
-               struct page *pg = page + i;
-
-               if (PageAnon(pg))
-                       pg->mapping = NULL;
-               bad += free_pages_check(pg);
-       }
+       if (PageAnon(page))
+               page->mapping = NULL;
+       for (i = 0; i < (1 << order); i++)
+               bad += free_pages_check(page + i);
        if (bad)
                return false;
 
@@ -1460,24 +1458,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
 /*
- * Return 1 if free pages are above 'mark'. This takes into account the order
+ * Return true if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                     int classzone_idx, int alloc_flags)
+static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+                     int classzone_idx, int alloc_flags, long free_pages)
 {
        /* free_pages my go negative - that's OK */
        long min = mark;
-       long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
        int o;
 
+       free_pages -= (1 << order) + 1;
        if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
        if (alloc_flags & ALLOC_HARDER)
                min -= min / 4;
 
        if (free_pages <= min + z->lowmem_reserve[classzone_idx])
-               return 0;
+               return false;
        for (o = 0; o < order; o++) {
                /* At the next order, this order's pages become unavailable */
                free_pages -= z->free_area[o].nr_free << o;
@@ -1486,9 +1484,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
                min >>= 1;
 
                if (free_pages <= min)
-                       return 0;
+                       return false;
        }
-       return 1;
+       return true;
+}
+
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+                     int classzone_idx, int alloc_flags)
+{
+       return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+                                       zone_page_state(z, NR_FREE_PAGES));
+}
+
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
+                     int classzone_idx, int alloc_flags)
+{
+       long free_pages = zone_page_state(z, NR_FREE_PAGES);
+
+       if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
+               free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
+
+       return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+                                                               free_pages);
 }
 
 #ifdef CONFIG_NUMA
@@ -1793,15 +1810,19 @@ static struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress)
+       int migratetype, unsigned long *did_some_progress,
+       bool sync_migration)
 {
        struct page *page;
+       struct task_struct *tsk = current;
 
        if (!order || compaction_deferred(preferred_zone))
                return NULL;
 
+       tsk->flags |= PF_MEMALLOC;
        *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
-                                                               nodemask);
+                                               nodemask, sync_migration);
+       tsk->flags &= ~PF_MEMALLOC;
        if (*did_some_progress != COMPACT_SKIPPED) {
 
                /* Page migration frees to the PCP lists but we want merging */
@@ -1837,7 +1858,8 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress)
+       int migratetype, unsigned long *did_some_progress,
+       bool sync_migration)
 {
        return NULL;
 }
@@ -1920,13 +1942,14 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 
 static inline
 void wake_all_kswapd(unsigned int order, struct zonelist *zonelist,
-                                               enum zone_type high_zoneidx)
+                                               enum zone_type high_zoneidx,
+                                               enum zone_type classzone_idx)
 {
        struct zoneref *z;
        struct zone *zone;
 
        for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
-               wakeup_kswapd(zone, order);
+               wakeup_kswapd(zone, order, classzone_idx);
 }
 
 static inline int
@@ -1948,7 +1971,12 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
        alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
 
        if (!wait) {
-               alloc_flags |= ALLOC_HARDER;
+               /*
+                * Not worth trying to allocate harder for
+                * __GFP_NOMEMALLOC even if it can't schedule.
+                */
+               if  (!(gfp_mask & __GFP_NOMEMALLOC))
+                       alloc_flags |= ALLOC_HARDER;
                /*
                 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
                 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
@@ -1979,6 +2007,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        unsigned long pages_reclaimed = 0;
        unsigned long did_some_progress;
        struct task_struct *p = current;
+       bool sync_migration = false;
 
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@ -2003,7 +2032,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                goto nopage;
 
 restart:
-       wake_all_kswapd(order, zonelist, high_zoneidx);
+       if (!(gfp_mask & __GFP_NO_KSWAPD))
+               wake_all_kswapd(order, zonelist, high_zoneidx,
+                                               zone_idx(preferred_zone));
 
        /*
         * OK, we're below the kswapd watermark and have kicked background
@@ -2041,14 +2072,19 @@ rebalance:
        if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
                goto nopage;
 
-       /* Try direct compaction */
+       /*
+        * Try direct compaction. The first pass is asynchronous. Subsequent
+        * attempts after direct reclaim are synchronous
+        */
        page = __alloc_pages_direct_compact(gfp_mask, order,
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, &did_some_progress);
+                                       migratetype, &did_some_progress,
+                                       sync_migration);
        if (page)
                goto got_pg;
+       sync_migration = true;
 
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2102,6 +2138,20 @@ rebalance:
                /* Wait for some write requests to complete then retry */
                wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
                goto rebalance;
+       } else {
+               /*
+                * High-order allocations do not necessarily loop after
+                * direct reclaim and reclaim/compaction depends on compaction
+                * being called after reclaim so call directly if necessary
+                */
+               page = __alloc_pages_direct_compact(gfp_mask, order,
+                                       zonelist, high_zoneidx,
+                                       nodemask,
+                                       alloc_flags, preferred_zone,
+                                       migratetype, &did_some_progress,
+                                       sync_migration);
+               if (page)
+                       goto got_pg;
        }
 
 nopage:
@@ -2442,7 +2492,7 @@ void show_free_areas(void)
                        " all_unreclaimable? %s"
                        "\n",
                        zone->name,
-                       K(zone_nr_free_pages(zone)),
+                       K(zone_page_state(zone, NR_FREE_PAGES)),
                        K(min_wmark_pages(zone)),
                        K(low_wmark_pages(zone)),
                        K(high_wmark_pages(zone)),
@@ -2585,9 +2635,16 @@ static int __parse_numa_zonelist_order(char *s)
 
 static __init int setup_numa_zonelist_order(char *s)
 {
-       if (s)
-               return __parse_numa_zonelist_order(s);
-       return 0;
+       int ret;
+
+       if (!s)
+               return 0;
+
+       ret = __parse_numa_zonelist_order(s);
+       if (ret == 0)
+               strlcpy(numa_zonelist_order, s, NUMA_ZONELIST_ORDER_LEN);
+
+       return ret;
 }
 early_param("numa_zonelist_order", setup_numa_zonelist_order);
 
@@ -5565,7 +5622,7 @@ void dump_page(struct page *page)
 {
        printk(KERN_ALERT
               "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
-               page, page_count(page), page_mapcount(page),
+               page, atomic_read(&page->_count), page_mapcount(page),
                page->mapping, page->index);
        dump_page_flags(page->flags);
 }