extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask,
- bool sync, bool *contended);
+ bool sync, bool *contended, struct page **page);
extern int compact_pgdat(pg_data_t *pgdat, int order);
extern unsigned long compaction_suitable(struct zone *zone, int order);
#else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended)
+ bool sync, bool *contended, struct page **page)
{
return COMPACT_CONTINUE;
}
void split_page(struct page *page, unsigned int order);
int split_free_page(struct page *page);
+int capture_free_page(struct page *page, int alloc_order, int migratetype);
/*
* Compound pages have a destructor function. Provide a
return compact_checklock_irqsave(lock, flags, false, cc);
}
+static void compact_capture_page(struct compact_control *cc)
+{
+ unsigned long flags;
+ int mtype, mtype_low, mtype_high;
+
+ if (!cc->page || *cc->page)
+ return;
+
+ /*
+ * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
+ * regardless of the migratetype of the freelist is is captured from.
+ * This is fine because the order for a high-order MIGRATE_MOVABLE
+ * allocation is typically at least a pageblock size and overall
+ * fragmentation is not impaired. Other allocation types must
+ * capture pages from their own migratelist because otherwise they
+ * could pollute other pageblocks like MIGRATE_MOVABLE with
+ * difficult to move pages and making fragmentation worse overall.
+ */
+ if (cc->migratetype == MIGRATE_MOVABLE) {
+ mtype_low = 0;
+ mtype_high = MIGRATE_PCPTYPES;
+ } else {
+ mtype_low = cc->migratetype;
+ mtype_high = cc->migratetype + 1;
+ }
+
+ /* Speculatively examine the free lists without zone lock */
+ for (mtype = mtype_low; mtype < mtype_high; mtype++) {
+ int order;
+ for (order = cc->order; order < MAX_ORDER; order++) {
+ struct page *page;
+ struct free_area *area;
+ area = &(cc->zone->free_area[order]);
+ if (list_empty(&area->free_list[mtype]))
+ continue;
+
+ /* Take the lock and attempt capture of the page */
+ if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
+ return;
+ if (!list_empty(&area->free_list[mtype])) {
+ page = list_entry(area->free_list[mtype].next,
+ struct page, lru);
+ if (capture_free_page(page, cc->order, mtype)) {
+ spin_unlock_irqrestore(&cc->zone->lock,
+ flags);
+ *cc->page = page;
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&cc->zone->lock, flags);
+ }
+ }
+}
+
/*
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
static int compact_finished(struct zone *zone,
struct compact_control *cc)
{
- unsigned int order;
unsigned long watermark;
if (fatal_signal_pending(current))
return COMPACT_CONTINUE;
/* Direct compactor: Is a suitable page free? */
- for (order = cc->order; order < MAX_ORDER; order++) {
- /* Job done if page is free of the right migratetype */
- if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
- return COMPACT_PARTIAL;
-
- /* Job done if allocation would set block type */
- if (order >= pageblock_order && zone->free_area[order].nr_free)
+ if (cc->page) {
+ /* Was a suitable page captured? */
+ if (*cc->page)
return COMPACT_PARTIAL;
+ } else {
+ unsigned int order;
+ for (order = cc->order; order < MAX_ORDER; order++) {
+ struct free_area *area = &zone->free_area[cc->order];
+ /* Job done if page is free of the right migratetype */
+ if (!list_empty(&area->free_list[cc->migratetype]))
+ return COMPACT_PARTIAL;
+
+ /* Job done if allocation would set block type */
+ if (cc->order >= pageblock_order && area->nr_free)
+ return COMPACT_PARTIAL;
+ }
}
return COMPACT_CONTINUE;
goto out;
}
}
+
+ /* Capture a page now if it is a suitable size */
+ compact_capture_page(cc);
}
out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync, bool *contended)
+ bool sync, bool *contended,
+ struct page **page)
{
struct compact_control cc = {
.nr_freepages = 0,
.zone = zone,
.sync = sync,
.contended = contended,
+ .page = page,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended)
+ bool sync, bool *contended, struct page **page)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
int status;
status = compact_zone_order(zone, order, gfp_mask, sync,
- contended);
+ contended, page);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
struct compact_control cc = {
.order = order,
.sync = false,
+ .page = NULL,
};
return __compact_pgdat(pgdat, &cc);
struct compact_control cc = {
.order = -1,
.sync = true,
+ .page = NULL,
};
return __compact_pgdat(NODE_DATA(nid), &cc);
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
bool *contended; /* True if a lock was contended */
+ struct page **page; /* Page captured of requested size */
};
unsigned long
}
/*
- * Similar to split_page except the page is already free. As this is only
- * being used for migration, the migratetype of the block also changes.
- * As this is called with interrupts disabled, the caller is responsible
- * for calling arch_alloc_page() and kernel_map_page() after interrupts
- * are enabled.
- *
- * Note: this is probably too low level an operation for use in drivers.
- * Please consult with lkml before using this in your driver.
+ * Similar to the split_page family of functions except that the page
+ * required at the given order and being isolated now to prevent races
+ * with parallel allocators
*/
-int split_free_page(struct page *page)
+int capture_free_page(struct page *page, int alloc_order, int migratetype)
{
unsigned int order;
unsigned long watermark;
rmv_page_order(page);
__mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
- /* Split into individual pages */
- set_page_refcounted(page);
- split_page(page, order);
+ if (alloc_order != order)
+ expand(zone, page, alloc_order, order,
+ &zone->free_area[order], migratetype);
+ /* Set the pageblock if the captured page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
}
}
- return 1 << order;
+ return 1UL << order;
+}
+
+/*
+ * Similar to split_page except the page is already free. As this is only
+ * being used for migration, the migratetype of the block also changes.
+ * As this is called with interrupts disabled, the caller is responsible
+ * for calling arch_alloc_page() and kernel_map_page() after interrupts
+ * are enabled.
+ *
+ * Note: this is probably too low level an operation for use in drivers.
+ * Please consult with lkml before using this in your driver.
+ */
+int split_free_page(struct page *page)
+{
+ unsigned int order;
+ int nr_pages;
+
+ BUG_ON(!PageBuddy(page));
+ order = page_order(page);
+
+ nr_pages = capture_free_page(page, order, 0);
+ if (!nr_pages)
+ return 0;
+
+ /* Split into individual pages */
+ set_page_refcounted(page);
+ split_page(page, order);
+ return nr_pages;
}
/*
bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
- struct page *page;
+ struct page *page = NULL;
if (!order)
return NULL;
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, sync_migration,
- contended_compaction);
+ contended_compaction, &page);
current->flags &= ~PF_MEMALLOC;
- if (*did_some_progress != COMPACT_SKIPPED) {
+ /* If compaction captured a page, prep and use it */
+ if (page) {
+ prep_new_page(page, order, gfp_mask);
+ goto got_page;
+ }
+
+ if (*did_some_progress != COMPACT_SKIPPED) {
/* Page migration frees to the PCP lists but we want merging */
drain_pages(get_cpu());
put_cpu();
alloc_flags & ~ALLOC_NO_WATERMARKS,
preferred_zone, migratetype);
if (page) {
+got_page:
preferred_zone->compact_considered = 0;
preferred_zone->compact_defer_shift = 0;
if (order >= preferred_zone->compact_order_failed)