From 68fffd95861d1f4d9634e54db0f937b844e2a7dd Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Wed, 20 Feb 2013 13:13:48 +1100 Subject: [PATCH] sched: /proc/sched_stat fails on very very large machines On systems with 4096 cores doing a cat /proc/sched_stat fails. We are trying to push all the data into a single kmalloc buffer. The issue is on these very large machines all the data will not fit in 4mb. A better solution is to not use the single_open() mechanism but to provide our own seq_operations. The output should be identical to previous version and thus not need the version number. Signed-off-by: Nathan Zimmer Reported-by: Dave Jones Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/sched/stats.c | 73 ++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 903ffa9e8872..33a85c90045e 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -21,9 +21,13 @@ static int show_schedstat(struct seq_file *seq, void *v) if (mask_str == NULL) return -ENOMEM; - seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); - seq_printf(seq, "timestamp %lu\n", jiffies); - for_each_online_cpu(cpu) { + if (v == (void *)1) { + seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); + seq_printf(seq, "timestamp %lu\n", jiffies); + } else { + + cpu = (unsigned long)(v - 2); + struct rq *rq = cpu_rq(cpu); #ifdef CONFIG_SMP struct sched_domain *sd; @@ -72,35 +76,64 @@ static int show_schedstat(struct seq_file *seq, void *v) } rcu_read_unlock(); #endif + kfree(mask_str); } - kfree(mask_str); return 0; } -static int schedstat_open(struct inode *inode, struct file *file) +static void *schedstat_start(struct seq_file *file, loff_t *offset) { - unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); - char *buf = kmalloc(size, GFP_KERNEL); - struct seq_file *m; - int res; + unsigned long n = *offset; - if (!buf) - return -ENOMEM; - res = single_open(file, show_schedstat, NULL); - if (!res) { - m = file->private_data; - m->buf = buf; - m->size = size; - } else - kfree(buf); - return res; + if (n == 0) + return (void *) 1; + + n--; + + if (n > 0) + n = cpumask_next(n - 1, cpu_online_mask); + else + n = cpumask_first(cpu_online_mask); + + *offset = n + 1; + + if (n < nr_cpu_ids) + return (void *)(unsigned long)(n + 2); + return NULL; +} + +static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) +{ + (*offset)++; + return schedstat_start(file, offset); +} + +static void schedstat_stop(struct seq_file *file, void *data) +{ +} + +static const struct seq_operations schedstat_sops = { + .start = schedstat_start, + .next = schedstat_next, + .stop = schedstat_stop, + .show = show_schedstat, +}; + +static int schedstat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &schedstat_sops); } +static int schedstat_release(struct inode *inode, struct file *file) +{ + return 0; +}; + static const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = schedstat_release, }; static int __init proc_schedstat_init(void) -- 2.39.5