* Mininum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
*/
-#define MIN_PARTIAL 2
+#define MIN_PARTIAL 5
/*
* Maximum number of desirable partial slabs.
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) {}
+static inline void sysfs_slab_remove(struct kmem_cache *s)
+{
+ kfree(s);
+}
#endif
/********************************************************************
printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
newline = 0;
}
- printk(" %02x", addr[i]);
+ printk(KERN_CONT " %02x", addr[i]);
offset = i % 16;
ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
if (offset == 15) {
- printk(" %s\n",ascii);
+ printk(KERN_CONT " %s\n", ascii);
newline = 1;
}
}
if (!newline) {
i %= 16;
while (i < 16) {
- printk(" ");
+ printk(KERN_CONT " ");
ascii[i] = ' ';
i++;
}
- printk(" %s\n", ascii);
+ printk(KERN_CONT " %s\n", ascii);
}
}
if (s->flags & __OBJECT_POISON) {
memset(p, POISON_FREE, s->objsize - 1);
- p[s->objsize -1] = POISON_END;
+ p[s->objsize - 1] = POISON_END;
}
if (s->flags & SLAB_RED_ZONE)
static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
u8 *object, char *what,
- u8* start, unsigned int value, unsigned int bytes)
+ u8 *start, unsigned int value, unsigned int bytes)
{
u8 *fault;
u8 *end;
(!check_bytes_and_report(s, page, p, "Poison", p,
POISON_FREE, s->objsize - 1) ||
!check_bytes_and_report(s, page, p, "Poison",
- p + s->objsize -1, POISON_END, 1)))
+ p + s->objsize - 1, POISON_END, 1)))
return 0;
/*
* check_pad_bytes cleans up on its own.
"SLUB <none>: no slab for object 0x%p.\n",
object);
dump_stack();
- }
- else
+ } else
object_err(s, page, object,
"page slab pointer corrupt.");
goto fail;
/*
* Determine which debug features should be switched on
*/
- for ( ;*str && *str != ','; str++) {
+ for (; *str && *str != ','; str++) {
switch (tolower(*str)) {
case 'f':
slub_debug |= SLAB_DEBUG_FREE;
break;
default:
printk(KERN_ERR "slub_debug option '%c' "
- "unknown. skipped\n",*str);
+ "unknown. skipped\n", *str);
}
}
*/
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
- struct page * page;
+ struct page *page;
int pages = 1 << s->order;
if (s->order)
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
- - pages);
+ -pages);
__free_pages(page, s->order);
}
/*
* Management of partially allocated slabs
*/
-static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
+static void add_partial(struct kmem_cache_node *n,
+ struct page *page, int tail)
{
spin_lock(&n->list_lock);
n->nr_partial++;
- list_add_tail(&page->lru, &n->partial);
- spin_unlock(&n->list_lock);
-}
-
-static void add_partial(struct kmem_cache_node *n, struct page *page)
-{
- spin_lock(&n->list_lock);
- n->nr_partial++;
- list_add(&page->lru, &n->partial);
+ if (tail)
+ list_add_tail(&page->lru, &n->partial);
+ else
+ list_add(&page->lru, &n->partial);
spin_unlock(&n->list_lock);
}
* expensive if we do it every time we are trying to find a slab
* with available objects.
*/
- if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
+ if (!s->remote_node_defrag_ratio ||
+ get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;
zonelist = &NODE_DATA(slab_node(current->mempolicy))
*
* On exit the slab lock will have been dropped.
*/
-static void unfreeze_slab(struct kmem_cache *s, struct page *page)
+static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
if (page->inuse) {
if (page->freelist)
- add_partial(n, page);
+ add_partial(n, page, tail);
else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
add_full(n, page);
slab_unlock(page);
* partial list stays small. kmem_cache_shrink can
* reclaim empty slabs from the partial list.
*/
- add_partial_tail(n, page);
+ add_partial(n, page, 1);
slab_unlock(page);
} else {
slab_unlock(page);
static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
struct page *page = c->page;
+ int tail = 1;
/*
* Merge cpu freelist into freelist. Typically we get here
* because both freelists are empty. So this is unlikely
while (unlikely(c->freelist)) {
void **object;
+ tail = 0; /* Hot objects. Put the slab first */
+
/* Retrieve object from cpu_freelist */
object = c->freelist;
c->freelist = c->freelist[c->offset];
page->inuse--;
}
c->page = NULL;
- unfreeze_slab(s, page);
+ unfreeze_slab(s, page, tail);
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
-static void __always_inline *slab_alloc(struct kmem_cache *s,
+static __always_inline void *slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr)
{
void **object;
* then add it.
*/
if (unlikely(!prior))
- add_partial(get_node(s, page_to_nid(page)), page);
+ add_partial(get_node(s, page_to_nid(page)), page, 1);
out_unlock:
slab_unlock(page);
* If fastpath is not possible then fall back to __slab_free where we deal
* with all sorts of special processing.
*/
-static void __always_inline slab_free(struct kmem_cache *s,
+static __always_inline void slab_free(struct kmem_cache *s,
struct page *page, void *x, void *addr)
{
void **object = (void *)x;
{
struct page *page;
struct kmem_cache_node *n;
+ unsigned long flags;
BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
#endif
init_kmem_cache_node(n);
atomic_long_inc(&n->nr_slabs);
- add_partial(n, page);
+ /*
+ * lockdep requires consistent irq usage for each lock
+ * so even though there cannot be a race this early in
+ * the boot sequence, we still disable irqs.
+ */
+ local_irq_save(flags);
+ add_partial(n, page, 0);
+ local_irq_restore(flags);
return n;
}
s->refcount = 1;
#ifdef CONFIG_NUMA
- s->defrag_ratio = 100;
+ s->remote_node_defrag_ratio = 100;
#endif
if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
goto error;
*/
int kmem_ptr_validate(struct kmem_cache *s, const void *object)
{
- struct page * page;
+ struct page *page;
page = get_object_page(object);
if (kmem_cache_close(s))
WARN_ON(1);
sysfs_slab_remove(s);
- kfree(s);
} else
up_write(&slub_lock);
}
static int __init setup_slub_min_order(char *str)
{
- get_option (&str, &slub_min_order);
+ get_option(&str, &slub_min_order);
return 1;
}
static int __init setup_slub_max_order(char *str)
{
- get_option (&str, &slub_max_order);
+ get_option(&str, &slub_max_order);
return 1;
}
static int __init setup_slub_min_objects(char *str)
{
- get_option (&str, &slub_min_objects);
+ get_option(&str, &slub_min_objects);
return 1;
}
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
- page = get_object_page(object);
+ page = virt_to_head_page(object);
BUG_ON(!page);
+
+ if (unlikely(!PageSlab(page)))
+ return PAGE_SIZE << compound_order(page);
+
s = page->slab;
BUG_ON(!s);
}
EXPORT_SYMBOL(kfree);
+static unsigned long count_partial(struct kmem_cache_node *n)
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += page->inuse;
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
* the remaining slabs by the number of items in use. The slabs with the
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
- if ((s->size & ~(align -1)) != s->size)
+ if ((s->size & ~(align - 1)) != s->size)
continue;
if (s->size - size >= sizeof(void *))
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata slab_notifier =
- { &slab_cpuup_callback, NULL, 0 };
+static struct notifier_block __cpuinitdata slab_notifier = {
+ &slab_cpuup_callback, NULL, 0
+};
#endif
static int list_locations(struct kmem_cache *s, char *buf,
enum track_item alloc)
{
- int n = 0;
+ int len = 0;
unsigned long i;
struct loc_track t = { 0, 0, NULL };
int node;
for (i = 0; i < t.count; i++) {
struct location *l = &t.loc[i];
- if (n > PAGE_SIZE - 100)
+ if (len > PAGE_SIZE - 100)
break;
- n += sprintf(buf + n, "%7ld ", l->count);
+ len += sprintf(buf + len, "%7ld ", l->count);
if (l->addr)
- n += sprint_symbol(buf + n, (unsigned long)l->addr);
+ len += sprint_symbol(buf + len, (unsigned long)l->addr);
else
- n += sprintf(buf + n, "<not-available>");
+ len += sprintf(buf + len, "<not-available>");
if (l->sum_time != l->min_time) {
unsigned long remainder;
- n += sprintf(buf + n, " age=%ld/%ld/%ld",
+ len += sprintf(buf + len, " age=%ld/%ld/%ld",
l->min_time,
div_long_long_rem(l->sum_time, l->count, &remainder),
l->max_time);
} else
- n += sprintf(buf + n, " age=%ld",
+ len += sprintf(buf + len, " age=%ld",
l->min_time);
if (l->min_pid != l->max_pid)
- n += sprintf(buf + n, " pid=%ld-%ld",
+ len += sprintf(buf + len, " pid=%ld-%ld",
l->min_pid, l->max_pid);
else
- n += sprintf(buf + n, " pid=%ld",
+ len += sprintf(buf + len, " pid=%ld",
l->min_pid);
if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
- n < PAGE_SIZE - 60) {
- n += sprintf(buf + n, " cpus=");
- n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50,
+ len < PAGE_SIZE - 60) {
+ len += sprintf(buf + len, " cpus=");
+ len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
l->cpus);
}
if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
- n < PAGE_SIZE - 60) {
- n += sprintf(buf + n, " nodes=");
- n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50,
+ len < PAGE_SIZE - 60) {
+ len += sprintf(buf + len, " nodes=");
+ len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
l->nodes);
}
- n += sprintf(buf + n, "\n");
+ len += sprintf(buf + len, "\n");
}
free_loc_track(&t);
if (!t.count)
- n += sprintf(buf, "No data\n");
- return n;
-}
-
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += page->inuse;
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
+ len += sprintf(buf, "No data\n");
+ return len;
}
enum slab_stat_type {
for_each_possible_cpu(cpu) {
struct page *page;
- int node;
struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
if (!c)
continue;
if (page) {
if (flags & SO_CPU) {
- int x = 0;
-
if (flags & SO_OBJECTS)
x = page->inuse;
else
SLAB_ATTR_RO(free_calls);
#ifdef CONFIG_NUMA
-static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf)
+static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", s->defrag_ratio / 10);
+ return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
}
-static ssize_t defrag_ratio_store(struct kmem_cache *s,
+static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
const char *buf, size_t length)
{
int n = simple_strtoul(buf, NULL, 10);
if (n < 100)
- s->defrag_ratio = n * 10;
+ s->remote_node_defrag_ratio = n * 10;
return length;
}
-SLAB_ATTR(defrag_ratio);
+SLAB_ATTR(remote_node_defrag_ratio);
#endif
-static struct attribute * slab_attrs[] = {
+static struct attribute *slab_attrs[] = {
&slab_size_attr.attr,
&object_size_attr.attr,
&objs_per_slab_attr.attr,
&cache_dma_attr.attr,
#endif
#ifdef CONFIG_NUMA
- &defrag_ratio_attr.attr,
+ &remote_node_defrag_ratio_attr.attr,
#endif
NULL
};
return err;
}
+static void kmem_cache_release(struct kobject *kobj)
+{
+ struct kmem_cache *s = to_slab(kobj);
+
+ kfree(s);
+}
+
static struct sysfs_ops slab_sysfs_ops = {
.show = slab_attr_show,
.store = slab_attr_store,
static struct kobj_type slab_ktype = {
.sysfs_ops = &slab_sysfs_ops,
+ .release = kmem_cache_release
};
static int uevent_filter(struct kset *kset, struct kobject *kobj)
.filter = uevent_filter,
};
-static decl_subsys(slab, &slab_ktype, &slab_uevent_ops);
+static struct kset *slab_kset;
#define ID_STR_LENGTH 64
* This is typically the case for debug situations. In that
* case we can catch duplicate names easily.
*/
- sysfs_remove_link(&slab_subsys.kobj, s->name);
+ sysfs_remove_link(&slab_kset->kobj, s->name);
name = s->name;
} else {
/*
name = create_unique_id(s);
}
- kobj_set_kset_s(s, slab_subsys);
- kobject_set_name(&s->kobj, name);
- kobject_init(&s->kobj);
- err = kobject_add(&s->kobj);
- if (err)
+ s->kobj.kset = slab_kset;
+ err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
+ if (err) {
+ kobject_put(&s->kobj);
return err;
+ }
err = sysfs_create_group(&s->kobj, &slab_attr_group);
if (err)
{
kobject_uevent(&s->kobj, KOBJ_REMOVE);
kobject_del(&s->kobj);
+ kobject_put(&s->kobj);
}
/*
/*
* If we have a leftover link then remove it.
*/
- sysfs_remove_link(&slab_subsys.kobj, name);
- return sysfs_create_link(&slab_subsys.kobj,
- &s->kobj, name);
+ sysfs_remove_link(&slab_kset->kobj, name);
+ return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
}
al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
struct kmem_cache *s;
int err;
- err = subsystem_register(&slab_subsys);
- if (err) {
+ slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
+ if (!slab_kset) {
printk(KERN_ERR "Cannot register slab subsystem.\n");
return -ENOSYS;
}
__initcall(slab_sysfs_init);
#endif
+
+/*
+ * The /proc/slabinfo ABI
+ */
+#ifdef CONFIG_SLABINFO
+
+ssize_t slabinfo_write(struct file *file, const char __user * buffer,
+ size_t count, loff_t *ppos)
+{
+ return -EINVAL;
+}
+
+
+static void print_slabinfo_header(struct seq_file *m)
+{
+ seq_puts(m, "slabinfo - version: 2.1\n");
+ seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
+ "<objperslab> <pagesperslab>");
+ seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
+ seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+ seq_putc(m, '\n');
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t n = *pos;
+
+ down_read(&slub_lock);
+ if (!n)
+ print_slabinfo_header(m);
+
+ return seq_list_start(&slab_caches, *pos);
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &slab_caches, pos);
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+ up_read(&slub_lock);
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+ unsigned long nr_partials = 0;
+ unsigned long nr_slabs = 0;
+ unsigned long nr_inuse = 0;
+ unsigned long nr_objs;
+ struct kmem_cache *s;
+ int node;
+
+ s = list_entry(p, struct kmem_cache, list);
+
+ for_each_online_node(node) {
+ struct kmem_cache_node *n = get_node(s, node);
+
+ if (!n)
+ continue;
+
+ nr_partials += n->nr_partial;
+ nr_slabs += atomic_long_read(&n->nr_slabs);
+ nr_inuse += count_partial(n);
+ }
+
+ nr_objs = nr_slabs * s->objects;
+ nr_inuse += (nr_slabs - nr_partials) * s->objects;
+
+ seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
+ nr_objs, s->size, s->objects, (1 << s->order));
+ seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
+ seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
+ 0UL);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+const struct seq_operations slabinfo_op = {
+ .start = s_start,
+ .next = s_next,
+ .stop = s_stop,
+ .show = s_show,
+};
+
+#endif /* CONFIG_SLABINFO */