Merge branch 'akpm-current/current'

[karo-tx-linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 838ca8bb64f7376062fc6670c759a27d7f59c7e3..00118fee53404f8f28297a83804b6e59dcb1b94d 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -223,6 +223,19 @@ static char * const zone_names[MAX_NR_ZONES] = {
  #endif
  };
  
+char * const migratetype_names[MIGRATE_TYPES] = {
+       "Unmovable",
+       "Movable",
+       "Reclaimable",
+       "HighAtomic",
+#ifdef CONFIG_CMA
+       "CMA",
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+       "Isolate",
+#endif
+};
+
  compound_page_dtor * const compound_page_dtors[] = {
         NULL,
         free_compound_page,
@@ -247,6 +260,7 @@ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
  static unsigned long __initdata required_kernelcore;
  static unsigned long __initdata required_movablecore;
  static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+static bool mirrored_kernelcore;
  
  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
  int movable_zone;
@@ -416,7 +430,7 @@ static void bad_page(struct page *page, const char *reason,
                         goto out;
                 }
                 if (nr_unshown) {
-                       printk(KERN_ALERT
+                       pr_alert(
                               "BUG: Bad page state: %lu messages suppressed\n",
                                 nr_unshown);
                         nr_unshown = 0;
@@ -426,9 +440,14 @@ static void bad_page(struct page *page, const char *reason,
         if (nr_shown++ == 0)
                 resume = jiffies + 60 * HZ;
  
-       printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
+       pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
                 current->comm, page_to_pfn(page));
-       dump_page_badflags(page, reason, bad_flags);
+       __dump_page(page, reason);
+       bad_flags &= page->flags;
+       if (bad_flags)
+               pr_alert("bad because of flags: %#lx(%pGp)\n",
+                                               bad_flags, &bad_flags);
+       dump_page_owner(page);
  
         print_modules();
         dump_stack();
@@ -477,7 +496,9 @@ void prep_compound_page(struct page *page, unsigned int order)
  
  #ifdef CONFIG_DEBUG_PAGEALLOC
  unsigned int _debug_guardpage_minorder;
-bool _debug_pagealloc_enabled __read_mostly;
+bool _debug_pagealloc_enabled __read_mostly
+                       = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled);
  bool _debug_guardpage_enabled __read_mostly;
  
  static int __init early_debug_pagealloc(char *buf)
@@ -488,6 +509,9 @@ static int __init early_debug_pagealloc(char *buf)
         if (strcmp(buf, "on") == 0)
                 _debug_pagealloc_enabled = true;
  
+       if (strcmp(buf, "off") == 0)
+               _debug_pagealloc_enabled = false;
+
         return 0;
  }
  early_param("debug_pagealloc", early_debug_pagealloc);
@@ -1002,6 +1026,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
                                            PAGE_SIZE << order);
         }
         arch_free_page(page, order);
+       kernel_poison_pages(page, 1 << order, 0);
         kernel_map_pages(page, 1 << order, 0);
  
         return true;
@@ -1104,6 +1129,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
         return __free_pages_boot_core(page, pfn, order);
  }
  
+/*
+ * Check that the whole (or subset of) a pageblock given by the interval of
+ * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
+ * with the migration of free compaction scanner. The scanners then need to
+ * use only pfn_valid_within() check for arches that allow holes within
+ * pageblocks.
+ *
+ * Return struct page pointer of start_pfn, or NULL if checks were not passed.
+ *
+ * It's possible on some configurations to have a setup like node0 node1 node0
+ * i.e. it's possible that all pages within a zones range of pages do not
+ * belong to a single zone. We assume that a border between node0 and node1
+ * can occur within a single pageblock, but not a node0 node1 node0
+ * interleaving within a single pageblock. It is therefore sufficient to check
+ * the first and last page of a pageblock and avoid checking each individual
+ * page in a pageblock.
+ */
+struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+                                    unsigned long end_pfn, struct zone *zone)
+{
+       struct page *start_page;
+       struct page *end_page;
+
+       /* end_pfn is one past the range we are checking */
+       end_pfn--;
+
+       if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
+               return NULL;
+
+       start_page = pfn_to_page(start_pfn);
+
+       if (page_zone(start_page) != zone)
+               return NULL;
+
+       end_page = pfn_to_page(end_pfn);
+
+       /* This gives a shorter code than deriving page_zone(end_page) */
+       if (page_zone_id(start_page) != page_zone_id(end_page))
+               return NULL;
+
+       return start_page;
+}
+
+void set_zone_contiguous(struct zone *zone)
+{
+       unsigned long block_start_pfn = zone->zone_start_pfn;
+       unsigned long block_end_pfn;
+
+       block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
+       for (; block_start_pfn < zone_end_pfn(zone);
+                       block_start_pfn = block_end_pfn,
+                        block_end_pfn += pageblock_nr_pages) {
+
+               block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+               if (!__pageblock_pfn_to_page(block_start_pfn,
+                                            block_end_pfn, zone))
+                       return;
+       }
+
+       /* We confirm that there is no hole */
+       zone->contiguous = true;
+}
+
+void clear_zone_contiguous(struct zone *zone)
+{
+       zone->contiguous = false;
+}
+
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  static void __init deferred_free_range(struct page *page,
                                         unsigned long pfn, int nr_pages)
@@ -1254,9 +1348,13 @@ free_range:
         pgdat_init_report_one_done();
         return 0;
  }
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
  
  void __init page_alloc_init_late(void)
  {
+       struct zone *zone;
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
         int nid;
  
         /* There will be num_node_state(N_MEMORY) threads */
@@ -1270,8 +1368,11 @@ void __init page_alloc_init_late(void)
  
         /* Reinit limits that are based on free pages after the kernel is up */
         files_maxfiles_init();
+#endif
+
+       for_each_populated_zone(zone)
+               set_zone_contiguous(zone);
  }
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
  
  #ifdef CONFIG_CMA
  /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
@@ -1381,6 +1482,12 @@ static inline int check_new_page(struct page *page)
         return 0;
  }
  
+static inline bool should_zero(void)
+{
+       return !IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) ||
+               !page_poisoning_enabled();
+}
+
  static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
                                                                 int alloc_flags)
  {
@@ -1396,10 +1503,11 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
         set_page_refcounted(page);
  
         arch_alloc_page(page, order);
+       kernel_poison_pages(page, 1 << order, 1);
         kernel_map_pages(page, 1 << order, 1);
         kasan_alloc_pages(page, order);
  
-       if (gfp_flags & __GFP_ZERO)
+       if (should_zero() && gfp_flags & __GFP_ZERO)
                 for (i = 0; i < (1 << order); i++)
                         clear_highpage(page + i);
  
@@ -2690,9 +2798,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
                 va_end(args);
         }
  
-       pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
-               current->comm, order, gfp_mask);
-
+       pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
+               current->comm, order, gfp_mask, &gfp_mask);
         dump_stack();
         if (!should_suppress_show_mem())
                 show_mem(filter);
@@ -2748,8 +2855,12 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                          * XXX: Page reclaim didn't yield anything,
                          * and the OOM killer can't be invoked, but
                          * keep looping as per tradition.
+                        *
+                        * But do not keep looping if oom_killer_disable()
+                        * was already called, for the system is trying to
+                        * enter a quiescent state during suspend.
                          */
-                       *did_some_progress = 1;
+                       *did_some_progress = !oom_killer_disabled;
                         goto out;
                 }
                 if (pm_suspended_storage())
@@ -2976,6 +3087,103 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
         return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
  }
  
+/*
+ * Maximum number of reclaim retries without any progress before OOM killer
+ * is consider as the only way to move forward.
+ */
+#define MAX_RECLAIM_RETRIES 16
+
+/*
+ * Checks whether it makes sense to retry the reclaim to make a forward progress
+ * for the given allocation request.
+ * The reclaim feedback represented by did_some_progress (any progress during
+ * the last reclaim round) and no_progress_loops (number of reclaim rounds
+ * without any progress in a row) is considered as well as the reclaimable pages
+ * on the applicable zone list (with a backoff mechanism which is a function of
+ * no_progress_loops).
+ *
+ * Returns true if a retry is viable or false to enter the oom path.
+ */
+static inline bool
+should_reclaim_retry(gfp_t gfp_mask, unsigned order,
+                    struct alloc_context *ac, int alloc_flags,
+                    bool did_some_progress,
+                    int no_progress_loops)
+{
+       struct zone *zone;
+       struct zoneref *z;
+
+       /*
+        * Make sure we converge to OOM if we cannot make any progress
+        * several times in the row.
+        */
+       if (no_progress_loops > MAX_RECLAIM_RETRIES)
+               return false;
+
+       /* Do not retry high order allocations unless they are __GFP_REPEAT */
+       if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
+               return false;
+
+       /*
+        * Keep reclaiming pages while there is a chance this will lead
+        * somewhere.  If none of the target zones can satisfy our allocation
+        * request even if all reclaimable pages are considered then we are
+        * screwed and have to go OOM.
+        */
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
+                       ac->high_zoneidx, ac->nodemask) {
+               unsigned long available;
+               unsigned long reclaimable;
+
+               available = reclaimable = zone_reclaimable_pages(zone);
+               available -= DIV_ROUND_UP(no_progress_loops * available,
+                                         MAX_RECLAIM_RETRIES);
+               available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+               /*
+                * Would the allocation succeed if we reclaimed the whole
+                * available?
+                */
+               if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
+                               ac->high_zoneidx, alloc_flags, available)) {
+                       unsigned long writeback;
+                       unsigned long dirty;
+
+                       writeback = zone_page_state_snapshot(zone, NR_WRITEBACK);
+                       dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY);
+
+                       /*
+                        * If we didn't make any progress and have a lot of
+                        * dirty + writeback pages then we should wait for
+                        * an IO to complete to slow down the reclaim and
+                        * prevent from pre mature OOM
+                        */
+                       if (!did_some_progress && 2*(writeback + dirty) > reclaimable) {
+                               congestion_wait(BLK_RW_ASYNC, HZ/10);
+                               return true;
+                       }
+
+                       /*
+                        * Memory allocation/reclaim might be called from a WQ
+                        * context and the current implementation of the WQ
+                        * concurrency control doesn't recognize that
+                        * a particular WQ is congested if the worker thread is
+                        * looping without ever sleeping. Therefore we have to
+                        * do a short sleep here rather than calling
+                        * cond_resched().
+                        */
+                       if (current->flags & PF_WQ_WORKER)
+                               schedule_timeout(1);
+                       else
+                               cond_resched();
+
+                       return true;
+               }
+       }
+
+       return false;
+}
+
  static inline struct page *
  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                                 struct alloc_context *ac)
@@ -2983,11 +3191,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
         struct page *page = NULL;
         int alloc_flags;
-       unsigned long pages_reclaimed = 0;
         unsigned long did_some_progress;
         enum migrate_mode migration_mode = MIGRATE_ASYNC;
         bool deferred_compaction = false;
         int contended_compaction = COMPACT_CONTENDED_NONE;
+       int no_progress_loops = 0;
  
         /*
          * In the slowpath, we sanity check order to avoid ever trying to
@@ -3147,14 +3355,19 @@ retry:
         if (gfp_mask & __GFP_NORETRY)
                 goto noretry;
  
-       /* Keep reclaiming pages as long as there is reasonable progress */
-       pages_reclaimed += did_some_progress;
-       if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
-           ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
-               /* Wait for some write requests to complete then retry */
-               wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
+       /*
+        * Costly allocations might have made a progress but this doesn't mean
+        * their order will become available due to high fragmentation so do
+        * not reset the no progress counter for them
+        */
+       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
+               no_progress_loops = 0;
+       else
+               no_progress_loops++;
+
+       if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
+                                did_some_progress > 0, no_progress_loops))
                 goto retry;
-       }
  
         /* Reclaim has failed us, start killing things */
         page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
@@ -3162,8 +3375,10 @@ retry:
                 goto got_pg;
  
         /* Retry as long as the OOM killer is making progress */
-       if (did_some_progress)
+       if (did_some_progress) {
+               no_progress_loops = 0;
                 goto retry;
+       }
  
  noretry:
         /*
@@ -4491,6 +4706,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
         pg_data_t *pgdat = NODE_DATA(nid);
         unsigned long pfn;
         unsigned long nr_initialised = 0;
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+       struct memblock_region *r = NULL, *tmp;
+#endif
  
         if (highest_memmap_pfn < end_pfn - 1)
                 highest_memmap_pfn = end_pfn - 1;
@@ -4504,20 +4722,51 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
  
         for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                 /*
-                * There can be holes in boot-time mem_map[]s
-                * handed to this function.  They do not
-                * exist on hotplugged memory.
+                * There can be holes in boot-time mem_map[]s handed to this
+                * function.  They do not exist on hotplugged memory.
                  */
-               if (context == MEMMAP_EARLY) {
-                       if (!early_pfn_valid(pfn))
+               if (context != MEMMAP_EARLY)
+                       goto not_early;
+
+               if (!early_pfn_valid(pfn))
+                       continue;
+               if (!early_pfn_in_nid(pfn, nid))
+                       continue;
+               if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
+                       break;
+
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+               /*
+                * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
+                * from zone_movable_pfn[nid] to end of each node should be
+                * ZONE_MOVABLE not ZONE_NORMAL. skip it.
+                */
+               if (!mirrored_kernelcore && zone_movable_pfn[nid])
+                       if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
                                 continue;
-                       if (!early_pfn_in_nid(pfn, nid))
+
+               /*
+                * Check given memblock attribute by firmware which can affect
+                * kernel memory layout.  If zone==ZONE_MOVABLE but memory is
+                * mirrored, it's an overlapped memmap init. skip it.
+                */
+               if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
+                       if (!r || pfn >= memblock_region_memory_end_pfn(r)) {
+                               for_each_memblock(memory, tmp)
+                                       if (pfn < memblock_region_memory_end_pfn(tmp))
+                                               break;
+                               r = tmp;
+                       }
+                       if (pfn >= memblock_region_memory_base_pfn(r) &&
+                           memblock_is_mirror(r)) {
+                               /* already initialized as NORMAL */
+                               pfn = memblock_region_memory_end_pfn(r);
                                 continue;
-                       if (!update_defer_init(pgdat, pfn, end_pfn,
-                                               &nr_initialised))
-                               break;
+                       }
                 }
+#endif
  
+not_early:
                 /*
                  * Mark the block movable so that blocks are reserved for
                  * movable at startup. This will force kernel allocations
@@ -4934,11 +5183,6 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
                         *zone_end_pfn = min(node_end_pfn,
                                 arch_zone_highest_possible_pfn[movable_zone]);
  
-               /* Adjust for ZONE_MOVABLE starting within this range */
-               } else if (*zone_start_pfn < zone_movable_pfn[nid] &&
-                               *zone_end_pfn > zone_movable_pfn[nid]) {
-                       *zone_end_pfn = zone_movable_pfn[nid];
-
                 /* Check if this whole range is within ZONE_MOVABLE */
                 } else if (*zone_start_pfn >= zone_movable_pfn[nid])
                         *zone_start_pfn = *zone_end_pfn;
@@ -4953,31 +5197,31 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                         unsigned long zone_type,
                                         unsigned long node_start_pfn,
                                         unsigned long node_end_pfn,
+                                       unsigned long *zone_start_pfn,
+                                       unsigned long *zone_end_pfn,
                                         unsigned long *ignored)
  {
-       unsigned long zone_start_pfn, zone_end_pfn;
-
         /* When hotadd a new node from cpu_up(), the node should be empty */
         if (!node_start_pfn && !node_end_pfn)
                 return 0;
  
         /* Get the start and end of the zone */
-       zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
-       zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+       *zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+       *zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
         adjust_zone_range_for_zone_movable(nid, zone_type,
                                 node_start_pfn, node_end_pfn,
-                               &zone_start_pfn, &zone_end_pfn);
+                               zone_start_pfn, zone_end_pfn);
  
         /* Check that this node has pages within the zone's required range */
-       if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
+       if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
                 return 0;
  
         /* Move the zone boundaries inside the node if necessary */
-       zone_end_pfn = min(zone_end_pfn, node_end_pfn);
-       zone_start_pfn = max(zone_start_pfn, node_start_pfn);
+       *zone_end_pfn = min(*zone_end_pfn, node_end_pfn);
+       *zone_start_pfn = max(*zone_start_pfn, node_start_pfn);
  
         /* Return the spanned pages */
-       return zone_end_pfn - zone_start_pfn;
+       return *zone_end_pfn - *zone_start_pfn;
  }
  
  /*
@@ -5023,6 +5267,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
         unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
         unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
         unsigned long zone_start_pfn, zone_end_pfn;
+       unsigned long nr_absent;
  
         /* When hotadd a new node from cpu_up(), the node should be empty */
         if (!node_start_pfn && !node_end_pfn)
@@ -5034,7 +5279,39 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
         adjust_zone_range_for_zone_movable(nid, zone_type,
                         node_start_pfn, node_end_pfn,
                         &zone_start_pfn, &zone_end_pfn);
-       return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
+       nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
+
+       /*
+        * ZONE_MOVABLE handling.
+        * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
+        * and vice versa.
+        */
+       if (zone_movable_pfn[nid]) {
+               if (mirrored_kernelcore) {
+                       unsigned long start_pfn, end_pfn;
+                       struct memblock_region *r;
+
+                       for_each_memblock(memory, r) {
+                               start_pfn = clamp(memblock_region_memory_base_pfn(r),
+                                                 zone_start_pfn, zone_end_pfn);
+                               end_pfn = clamp(memblock_region_memory_end_pfn(r),
+                                               zone_start_pfn, zone_end_pfn);
+
+                               if (zone_type == ZONE_MOVABLE &&
+                                   memblock_is_mirror(r))
+                                       nr_absent += end_pfn - start_pfn;
+
+                               if (zone_type == ZONE_NORMAL &&
+                                   !memblock_is_mirror(r))
+                                       nr_absent += end_pfn - start_pfn;
+                       }
+               } else {
+                       if (zone_type == ZONE_NORMAL)
+                               nr_absent += node_end_pfn - zone_movable_pfn[nid];
+               }
+       }
+
+       return nr_absent;
  }
  
  #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
@@ -5042,8 +5319,18 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                         unsigned long zone_type,
                                         unsigned long node_start_pfn,
                                         unsigned long node_end_pfn,
+                                       unsigned long *zone_start_pfn,
+                                       unsigned long *zone_end_pfn,
                                         unsigned long *zones_size)
  {
+       unsigned int zone;
+
+       *zone_start_pfn = node_start_pfn;
+       for (zone = 0; zone < zone_type; zone++)
+               *zone_start_pfn += zones_size[zone];
+
+       *zone_end_pfn = *zone_start_pfn + zones_size[zone_type];
+
         return zones_size[zone_type];
  }
  
@@ -5072,15 +5359,22 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
  
         for (i = 0; i < MAX_NR_ZONES; i++) {
                 struct zone *zone = pgdat->node_zones + i;
+               unsigned long zone_start_pfn, zone_end_pfn;
                 unsigned long size, real_size;
  
                 size = zone_spanned_pages_in_node(pgdat->node_id, i,
                                                   node_start_pfn,
                                                   node_end_pfn,
+                                                 &zone_start_pfn,
+                                                 &zone_end_pfn,
                                                   zones_size);
                 real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
                                                   node_start_pfn, node_end_pfn,
                                                   zholes_size);
+               if (size)
+                       zone->zone_start_pfn = zone_start_pfn;
+               else
+                       zone->zone_start_pfn = 0;
                 zone->spanned_pages = size;
                 zone->present_pages = real_size;
  
@@ -5201,7 +5495,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
  {
         enum zone_type j;
         int nid = pgdat->node_id;
-       unsigned long zone_start_pfn = pgdat->node_start_pfn;
         int ret;
  
         pgdat_resize_init(pgdat);
@@ -5217,11 +5510,15 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
  #endif
         init_waitqueue_head(&pgdat->kswapd_wait);
         init_waitqueue_head(&pgdat->pfmemalloc_wait);
+#ifdef CONFIG_COMPACTION
+       init_waitqueue_head(&pgdat->kcompactd_wait);
+#endif
         pgdat_page_ext_init(pgdat);
  
         for (j = 0; j < MAX_NR_ZONES; j++) {
                 struct zone *zone = pgdat->node_zones + j;
                 unsigned long size, realsize, freesize, memmap_pages;
+               unsigned long zone_start_pfn = zone->zone_start_pfn;
  
                 size = zone->spanned_pages;
                 realsize = freesize = zone->present_pages;
@@ -5290,7 +5587,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 ret = init_currently_empty_zone(zone, zone_start_pfn, size);
                 BUG_ON(ret);
                 memmap_init(size, nid, j, zone_start_pfn);
-               zone_start_pfn += size;
         }
  }
  
@@ -5358,6 +5654,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
         pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
                 (u64)start_pfn << PAGE_SHIFT,
                 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
+#else
+       start_pfn = node_start_pfn;
  #endif
         calculate_node_totalpages(pgdat, start_pfn, end_pfn,
                                   zones_size, zholes_size);
@@ -5528,6 +5826,36 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                 goto out2;
         }
  
+       /*
+        * If kernelcore=mirror is specified, ignore movablecore option
+        */
+       if (mirrored_kernelcore) {
+               bool mem_below_4gb_not_mirrored = false;
+
+               for_each_memblock(memory, r) {
+                       if (memblock_is_mirror(r))
+                               continue;
+
+                       nid = r->nid;
+
+                       usable_startpfn = memblock_region_memory_base_pfn(r);
+
+                       if (usable_startpfn < 0x100000) {
+                               mem_below_4gb_not_mirrored = true;
+                               continue;
+                       }
+
+                       zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+                               min(usable_startpfn, zone_movable_pfn[nid]) :
+                               usable_startpfn;
+               }
+
+               if (mem_below_4gb_not_mirrored)
+                       pr_warn("This configuration results in unmirrored kernel memory.");
+
+               goto out2;
+       }
+
         /*
          * If movablecore=nn[KMG] was specified, calculate what size of
          * kernelcore that corresponds so that memory usable for
@@ -5788,6 +6116,12 @@ static int __init cmdline_parse_core(char *p, unsigned long *core)
   */
  static int __init cmdline_parse_kernelcore(char *p)
  {
+       /* parse kernelcore=mirror */
+       if (parse_option_str(p, "mirror")) {
+               mirrored_kernelcore = true;
+               return 0;
+       }
+
         return cmdline_parse_core(p, &required_kernelcore);
  }
  
@@ -6927,7 +7261,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  }
  #endif
  
-#ifdef CONFIG_MEMORY_FAILURE
  bool is_free_buddy_page(struct page *page)
  {
         struct zone *zone = page_zone(page);
@@ -6946,4 +7279,3 @@ bool is_free_buddy_page(struct page *page)
  
         return order < MAX_ORDER;
  }
-#endif