*/
static __always_inline void slab_lock(struct page *page)
{
+ VM_BUG_ON_PAGE(PageTail(page), page);
bit_spin_lock(PG_locked, &page->flags);
}
static __always_inline void slab_unlock(struct page *page)
{
+ VM_BUG_ON_PAGE(PageTail(page), page);
__bit_spin_unlock(PG_locked, &page->flags);
}
/*
* Debug settings:
*/
-#ifdef CONFIG_SLUB_DEBUG_ON
+#if defined(CONFIG_SLUB_DEBUG_ON)
static int slub_debug = DEBUG_DEFAULT_FLAGS;
+#elif defined(CONFIG_KASAN)
+static int slub_debug = SLAB_STORE_USER;
#else
static int slub_debug;
#endif
return 0;
}
+/* Supports checking bulk free of a constructed freelist */
static noinline struct kmem_cache_node *free_debug_processing(
- struct kmem_cache *s, struct page *page, void *object,
+ struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int bulk_cnt,
unsigned long addr, unsigned long *flags)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ void *object = head;
+ int cnt = 0;
spin_lock_irqsave(&n->list_lock, *flags);
slab_lock(page);
if (!check_slab(s, page))
goto fail;
+next_object:
+ cnt++;
+
if (!check_valid_pointer(s, page, object)) {
slab_err(s, page, "Invalid object pointer 0x%p", object);
goto fail;
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
+ /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
+
+ /* Reached end of constructed freelist yet? */
+ if (object != tail) {
+ object = get_freepointer(s, object);
+ goto next_object;
+ }
out:
+ if (cnt != bulk_cnt)
+ slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
+ bulk_cnt, cnt);
+
slab_unlock(page);
/*
* Keep node_lock to preserve integrity
return flags;
}
-#else
+#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
struct page *page, void *object, unsigned long addr) { return 0; }
static inline struct kmem_cache_node *free_debug_processing(
- struct kmem_cache *s, struct page *page, void *object,
+ struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int bulk_cnt,
unsigned long addr, unsigned long *flags) { return NULL; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
{
flags &= gfp_allowed_mask;
lockdep_trace_alloc(flags);
- might_sleep_if(flags & __GFP_WAIT);
+ might_sleep_if(gfpflags_allow_blocking(flags));
if (should_failslab(s->object_size, flags, s->flags))
return NULL;
kasan_slab_free(s, x);
}
+static inline void slab_free_freelist_hook(struct kmem_cache *s,
+ void *head, void *tail)
+{
+/*
+ * Compiler cannot detect this function can be removed if slab_free_hook()
+ * evaluates to nothing. Thus, catch all relevant config debug options here.
+ */
+#if defined(CONFIG_KMEMCHECK) || \
+ defined(CONFIG_LOCKDEP) || \
+ defined(CONFIG_DEBUG_KMEMLEAK) || \
+ defined(CONFIG_DEBUG_OBJECTS_FREE) || \
+ defined(CONFIG_KASAN)
+
+ void *object = head;
+ void *tail_obj = tail ? : head;
+
+ do {
+ slab_free_hook(s, object);
+ } while ((object != tail_obj) &&
+ (object = get_freepointer(s, object)));
+#endif
+}
+
static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
flags |= __GFP_NOTRACK;
- if (memcg_charge_slab(s, flags, order))
- return NULL;
-
if (node == NUMA_NO_NODE)
page = alloc_pages(flags, order);
else
page = __alloc_pages_node(node, flags, order);
- if (!page)
- memcg_uncharge_slab(s, order);
+ if (page && memcg_charge_slab(page, flags, order, s)) {
+ __free_pages(page, order);
+ page = NULL;
+ }
return page;
}
flags &= gfp_allowed_mask;
- if (flags & __GFP_WAIT)
+ if (gfpflags_allow_blocking(flags))
local_irq_enable();
flags |= s->allocflags;
* so we fall-back to the minimum order allocation.
*/
alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
- if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min))
- alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT;
+ if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
+ alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page)) {
page->frozen = 1;
out:
- if (flags & __GFP_WAIT)
+ if (gfpflags_allow_blocking(flags))
local_irq_disable();
if (!page)
return NULL;
page_mapcount_reset(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- __free_pages(page, order);
- memcg_uncharge_slab(s, order);
+ __free_kmem_pages(page, order);
}
#define need_reserve_slab_rcu \
VM_BUG_ON(s->reserved != sizeof(*head));
head = page_address(page) + offset;
} else {
- /*
- * RCU free overloads the RCU head over the LRU
- */
- head = (void *)&page->lru;
+ head = &page->rcu_head;
}
call_rcu(head, rcu_free_slab);
* And if we were unable to get a new slab from the partial slab lists then
* we need to allocate a new slab. This is the slowest path since it involves
* a call to the page allocator and the setup of a new slab.
+ *
+ * Version of __slab_alloc to use when we know that interrupts are
+ * already disabled (which is the case for bulk allocation).
*/
-static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void *freelist;
struct page *page;
- unsigned long flags;
-
- local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
- /*
- * We may have been preempted and rescheduled on a different
- * cpu before disabling interrupts. Need to reload cpu area
- * pointer.
- */
- c = this_cpu_ptr(s->cpu_slab);
-#endif
page = c->page;
if (!page)
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
- local_irq_restore(flags);
return freelist;
new_slab:
if (unlikely(!freelist)) {
slab_out_of_memory(s, gfpflags, node);
- local_irq_restore(flags);
return NULL;
}
deactivate_slab(s, page, get_freepointer(s, freelist));
c->page = NULL;
c->freelist = NULL;
- local_irq_restore(flags);
return freelist;
}
+/*
+ * Another one that disabled interrupt and compensates for possible
+ * cpu changes by refetching the per cpu area pointer.
+ */
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ unsigned long addr, struct kmem_cache_cpu *c)
+{
+ void *p;
+ unsigned long flags;
+
+ local_irq_save(flags);
+#ifdef CONFIG_PREEMPT
+ /*
+ * We may have been preempted and rescheduled on a different
+ * cpu before disabling interrupts. Need to reload cpu area
+ * pointer.
+ */
+ c = this_cpu_ptr(s->cpu_slab);
+#endif
+
+ p = ___slab_alloc(s, gfpflags, node, addr, c);
+ local_irq_restore(flags);
+ return p;
+}
+
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
- void *x, unsigned long addr)
+ void *head, void *tail, int cnt,
+ unsigned long addr)
+
{
void *prior;
- void **object = (void *)x;
int was_frozen;
struct page new;
unsigned long counters;
stat(s, FREE_SLOWPATH);
if (kmem_cache_debug(s) &&
- !(n = free_debug_processing(s, page, x, addr, &flags)))
+ !(n = free_debug_processing(s, page, head, tail, cnt,
+ addr, &flags)))
return;
do {
}
prior = page->freelist;
counters = page->counters;
- set_freepointer(s, object, prior);
+ set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
- new.inuse--;
+ new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
if (kmem_cache_has_cpu_partial(s) && !prior) {
} while (!cmpxchg_double_slab(s, page,
prior, counters,
- object, new.counters,
+ head, new.counters,
"__slab_free"));
if (likely(!n)) {
*
* If fastpath is not possible then fall back to __slab_free where we deal
* with all sorts of special processing.
+ *
+ * Bulk free of a freelist with several objects (all pointing to the
+ * same page) possible by specifying head and tail ptr, plus objects
+ * count (cnt). Bulk free indicated by tail pointer being set.
*/
-static __always_inline void slab_free(struct kmem_cache *s,
- struct page *page, void *x, unsigned long addr)
+static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int cnt,
+ unsigned long addr)
{
- void **object = (void *)x;
+ void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
- slab_free_hook(s, x);
+ slab_free_freelist_hook(s, head, tail);
redo:
/*
barrier();
if (likely(page == c->page)) {
- set_freepointer(s, object, c->freelist);
+ set_freepointer(s, tail_obj, c->freelist);
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
- object, next_tid(tid)))) {
+ head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
stat(s, FREE_FASTPATH);
} else
- __slab_free(s, page, x, addr);
+ __slab_free(s, page, head, tail_obj, cnt, addr);
}
s = cache_from_obj(s, x);
if (!s)
return;
- slab_free(s, virt_to_head_page(x), x, _RET_IP_);
+ slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
-{
- struct kmem_cache_cpu *c;
+struct detached_freelist {
struct page *page;
- int i;
+ void *tail;
+ void *freelist;
+ int cnt;
+};
- local_irq_disable();
- c = this_cpu_ptr(s->cpu_slab);
+/*
+ * This function progressively scans the array with free objects (with
+ * a limited look ahead) and extract objects belonging to the same
+ * page. It builds a detached freelist directly within the given
+ * page/objects. This can happen without any need for
+ * synchronization, because the objects are owned by running process.
+ * The freelist is build up as a single linked list in the objects.
+ * The idea is, that this detached freelist can then be bulk
+ * transferred to the real freelist(s), but only requiring a single
+ * synchronization primitive. Look ahead in the array is limited due
+ * to performance reasons.
+ */
+static int build_detached_freelist(struct kmem_cache *s, size_t size,
+ void **p, struct detached_freelist *df)
+{
+ size_t first_skipped_index = 0;
+ int lookahead = 3;
+ void *object;
- for (i = 0; i < size; i++) {
- void *object = p[i];
+ /* Always re-init detached_freelist */
+ df->page = NULL;
- BUG_ON(!object);
- /* kmem cache debug support */
- s = cache_from_obj(s, object);
- if (unlikely(!s))
- goto exit;
- slab_free_hook(s, object);
+ do {
+ object = p[--size];
+ } while (!object && size);
+
+ if (!object)
+ return 0;
- page = virt_to_head_page(object);
+ /* Start new detached freelist */
+ set_freepointer(s, object, NULL);
+ df->page = virt_to_head_page(object);
+ df->tail = object;
+ df->freelist = object;
+ p[size] = NULL; /* mark object processed */
+ df->cnt = 1;
+
+ while (size) {
+ object = p[--size];
+ if (!object)
+ continue; /* Skip processed objects */
+
+ /* df->page is always set at this point */
+ if (df->page == virt_to_head_page(object)) {
+ /* Opportunity build freelist */
+ set_freepointer(s, object, df->freelist);
+ df->freelist = object;
+ df->cnt++;
+ p[size] = NULL; /* mark object processed */
- if (c->page == page) {
- /* Fastpath: local CPU free */
- set_freepointer(s, object, c->freelist);
- c->freelist = object;
- } else {
- c->tid = next_tid(c->tid);
- local_irq_enable();
- /* Slowpath: overhead locked cmpxchg_double_slab */
- __slab_free(s, page, object, _RET_IP_);
- local_irq_disable();
- c = this_cpu_ptr(s->cpu_slab);
+ continue;
}
+
+ /* Limit look ahead search */
+ if (!--lookahead)
+ break;
+
+ if (!first_skipped_index)
+ first_skipped_index = size + 1;
}
-exit:
- c->tid = next_tid(c->tid);
- local_irq_enable();
+
+ return first_skipped_index;
+}
+
+
+/* Note that interrupts must be enabled when calling this function. */
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+ if (WARN_ON(!size))
+ return;
+
+ do {
+ struct detached_freelist df;
+
+ size = build_detached_freelist(s, size, p, &df);
+ if (unlikely(!df.page))
+ continue;
+
+ slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+ } while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
void *object = c->freelist;
if (unlikely(!object)) {
- local_irq_enable();
/*
* Invoking slow path likely have side-effect
* of re-populating per CPU c->freelist
*/
- p[i] = __slab_alloc(s, flags, NUMA_NO_NODE,
+ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
_RET_IP_, c);
- if (unlikely(!p[i])) {
- __kmem_cache_free_bulk(s, i, p);
- return false;
- }
- local_irq_disable();
+ if (unlikely(!p[i]))
+ goto error;
+
c = this_cpu_ptr(s->cpu_slab);
continue; /* goto for-loop */
}
/* kmem_cache debug support */
s = slab_pre_alloc_hook(s, flags);
- if (unlikely(!s)) {
- __kmem_cache_free_bulk(s, i, p);
- c->tid = next_tid(c->tid);
- local_irq_enable();
- return false;
- }
+ if (unlikely(!s))
+ goto error;
c->freelist = get_freepointer(s, object);
p[i] = object;
}
return true;
+
+error:
+ __kmem_cache_free_bulk(s, i, p);
+ local_irq_enable();
+ return false;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1;
- for (order = max(min_order,
- fls(min_objects * size - 1) - PAGE_SHIFT);
+ for (order = max(min_order, get_order(min_objects * size + reserved));
order <= max_order; order++) {
unsigned long slab_size = PAGE_SIZE << order;
- if (slab_size < min_objects * size + reserved)
- continue;
-
rem = (slab_size - reserved) % size;
if (rem <= slab_size / fract_leftover)
break;
-
}
return order;
* works by first attempting to generate a layout with
* the best configuration and backing off gradually.
*
- * First we reduce the acceptable waste in a slab. Then
+ * First we increase the acceptable waste in a slab. Then
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
__free_kmem_pages(page, compound_order(page));
return;
}
- slab_free(page->slab_cache, page, object, _RET_IP_);
+ slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);