test_bit(STRIPE_COMPUTE_RUN, &sh->state);
}
+static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
+{
+ struct r5conf *conf = sh->raid_conf;
+ struct raid5_percpu *percpu;
+ int i, orphaned = 1;
+
+ percpu = per_cpu_ptr(conf->percpu, sh->cpu);
+ for_each_cpu(i, &percpu->handle_threads) {
+ md_wakeup_thread(conf->aux_threads[i]->thread);
+ orphaned = 0;
+ }
+ if (orphaned)
+ md_wakeup_thread(conf->mddev->thread);
+}
+
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
{
BUG_ON(!list_empty(&sh->lru));
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
+ int cpu = sh->cpu;
+ struct raid5_percpu *percpu;
+ if (!cpu_online(cpu)) {
+ cpu = cpumask_any(cpu_online_mask);
+ sh->cpu = cpu;
+ }
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+
clear_bit(STRIPE_DELAYED, &sh->state);
clear_bit(STRIPE_BIT_DELAY, &sh->state);
- list_add_tail(&sh->lru, &conf->handle_list);
+ list_add_tail(&sh->lru, &percpu->handle_list);
+ raid5_wakeup_stripe_thread(sh);
+ return;
}
md_wakeup_thread(conf->mddev->thread);
} else {
raid5_build_block(sh, i, previous);
}
insert_hash(conf, sh);
+ sh->cpu = smp_processor_id();
}
static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
while (!list_empty(&conf->delayed_list)) {
struct list_head *l = conf->delayed_list.next;
struct stripe_head *sh;
+ int cpu;
sh = list_entry(l, struct stripe_head, lru);
list_del_init(l);
clear_bit(STRIPE_DELAYED, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
list_add_tail(&sh->lru, &conf->hold_list);
+ cpu = sh->cpu;
+ if (!cpu_online(cpu)) {
+ cpu = cpumask_any(cpu_online_mask);
+ sh->cpu = cpu;
+ }
+ raid5_wakeup_stripe_thread(sh);
}
}
}
* head of the hold_list has changed, i.e. the head was promoted to the
* handle_list.
*/
-static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
-{
- struct stripe_head *sh;
-
+static struct stripe_head *__get_priority_stripe(struct r5conf *conf,
+ cpumask_t *mask)
+{
+ struct stripe_head *sh = NULL, *tmp;
+ struct list_head *handle_list = NULL;
+ int cpu;
+
+ /* Should we take action to avoid starvation of latter CPUs ? */
+ for_each_cpu(cpu, mask) {
+ struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+ if (!list_empty(&percpu->handle_list)) {
+ handle_list = &percpu->handle_list;
+ break;
+ }
+ }
pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
__func__,
- list_empty(&conf->handle_list) ? "empty" : "busy",
+ !handle_list ? "empty" : "busy",
list_empty(&conf->hold_list) ? "empty" : "busy",
atomic_read(&conf->pending_full_writes), conf->bypass_count);
- if (!list_empty(&conf->handle_list)) {
- sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
+ if (handle_list) {
+ sh = list_entry(handle_list->next, typeof(*sh), lru);
if (list_empty(&conf->hold_list))
conf->bypass_count = 0;
((conf->bypass_threshold &&
conf->bypass_count > conf->bypass_threshold) ||
atomic_read(&conf->pending_full_writes) == 0)) {
- sh = list_entry(conf->hold_list.next,
- typeof(*sh), lru);
- conf->bypass_count -= conf->bypass_threshold;
- if (conf->bypass_count < 0)
- conf->bypass_count = 0;
- } else
+
+ list_for_each_entry(tmp, &conf->hold_list, lru) {
+ if (cpumask_test_cpu(tmp->cpu, mask) ||
+ !cpu_online(tmp->cpu)) {
+ sh = tmp;
+ break;
+ }
+ }
+
+ if (sh) {
+ conf->bypass_count -= conf->bypass_threshold;
+ if (conf->bypass_count < 0)
+ conf->bypass_count = 0;
+ }
+ }
+
+ if (!sh)
return NULL;
list_del_init(&sh->lru);
}
#define MAX_STRIPE_BATCH 8
-static int handle_active_stripes(struct r5conf *conf)
+static int handle_active_stripes(struct r5conf *conf, cpumask_t *mask)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0;
while (batch_size < MAX_STRIPE_BATCH &&
- (sh = __get_priority_stripe(conf)) != NULL)
+ (sh = __get_priority_stripe(conf, mask)) != NULL)
batch[batch_size++] = sh;
if (batch_size == 0)
return batch_size;
}
+static void raid5auxd(struct md_thread *thread)
+{
+ struct mddev *mddev = thread->mddev;
+ struct r5conf *conf = mddev->private;
+ struct blk_plug plug;
+ int handled;
+ struct raid5_auxth *auxth = thread->private;
+
+ pr_debug("+++ raid5auxd active\n");
+
+ blk_start_plug(&plug);
+ handled = 0;
+ spin_lock_irq(&conf->device_lock);
+ while (1) {
+ int batch_size;
+
+ batch_size = handle_active_stripes(conf, &auxth->work_mask);
+ if (!batch_size)
+ break;
+ handled += batch_size;
+ }
+ pr_debug("%d stripes handled\n", handled);
+
+ spin_unlock_irq(&conf->device_lock);
+ blk_finish_plug(&plug);
+
+ pr_debug("--- raid5auxd inactive\n");
+}
+
/*
* This is our raid5 kernel thread.
*
handled++;
}
- batch_size = handle_active_stripes(conf);
+ batch_size = handle_active_stripes(conf, &conf->work_mask);
if (!batch_size)
break;
handled += batch_size;
static struct md_sysfs_entry
raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
+static void raid5_update_threads_handle_mask(struct mddev *mddev)
+{
+ int cpu, i;
+ struct raid5_percpu *percpu;
+ struct r5conf *conf = mddev->private;
+
+ for_each_online_cpu(cpu) {
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ cpumask_clear(&percpu->handle_threads);
+ }
+ cpumask_copy(&conf->work_mask, cpu_online_mask);
+
+ for (i = 0; i < conf->aux_thread_num; i++) {
+ cpumask_t *work_mask = &conf->aux_threads[i]->work_mask;
+ for_each_cpu(cpu, work_mask) {
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ cpumask_set_cpu(i, &percpu->handle_threads);
+ }
+ cpumask_andnot(&conf->work_mask, &conf->work_mask,
+ work_mask);
+ }
+}
+
+struct raid5_auxth_sysfs {
+ struct attribute attr;
+ ssize_t (*show)(struct mddev *, struct raid5_auxth *, char *);
+ ssize_t (*store)(struct mddev *, struct raid5_auxth *,
+ const char *, size_t);
+};
+
+static ssize_t raid5_show_thread_cpulist(struct mddev *mddev,
+ struct raid5_auxth *thread, char *page)
+{
+ if (!mddev->private)
+ return 0;
+ return cpulist_scnprintf(page, PAGE_SIZE, &thread->work_mask);
+}
+
+static ssize_t
+raid5_store_thread_cpulist(struct mddev *mddev, struct raid5_auxth *thread,
+ const char *page, size_t len)
+{
+ struct r5conf *conf = mddev->private;
+ cpumask_var_t mask;
+
+ if (!conf)
+ return -ENODEV;
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (cpulist_parse(page, mask)) {
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+
+ get_online_cpus();
+ spin_lock_irq(&conf->device_lock);
+ cpumask_copy(&thread->work_mask, mask);
+ raid5_update_threads_handle_mask(mddev);
+ spin_unlock_irq(&conf->device_lock);
+ put_online_cpus();
+ set_cpus_allowed_ptr(thread->thread->tsk, mask);
+
+ free_cpumask_var(mask);
+ return len;
+}
+
+static struct raid5_auxth_sysfs thread_cpulist =
+__ATTR(cpulist, S_IRUGO|S_IWUSR, raid5_show_thread_cpulist,
+ raid5_store_thread_cpulist);
+
+static struct attribute *auxth_attrs[] = {
+ &thread_cpulist.attr,
+ NULL,
+};
+
+static ssize_t
+raid5_auxth_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct raid5_auxth_sysfs *entry = container_of(attr,
+ struct raid5_auxth_sysfs, attr);
+ struct raid5_auxth *thread = container_of(kobj,
+ struct raid5_auxth, kobj);
+ struct mddev *mddev = thread->thread->mddev;
+ ssize_t ret;
+
+ if (!entry->show)
+ return -EIO;
+ mddev_lock(mddev);
+ ret = entry->show(mddev, thread, page);
+ mddev_unlock(mddev);
+ return ret;
+}
+
+static ssize_t
+raid5_auxth_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct raid5_auxth_sysfs *entry = container_of(attr,
+ struct raid5_auxth_sysfs, attr);
+ struct raid5_auxth *thread = container_of(kobj,
+ struct raid5_auxth, kobj);
+ struct mddev *mddev = thread->thread->mddev;
+ ssize_t ret;
+
+ if (!entry->store)
+ return -EIO;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ mddev_lock(mddev);
+ ret = entry->store(mddev, thread, page, length);
+ mddev_unlock(mddev);
+ return ret;
+}
+
+static void raid5_auxth_release(struct kobject *kobj)
+{
+ struct raid5_auxth *thread = container_of(kobj,
+ struct raid5_auxth, kobj);
+ kfree(thread);
+}
+
+static const struct sysfs_ops raid5_auxth_sysfsops = {
+ .show = raid5_auxth_attr_show,
+ .store = raid5_auxth_attr_store,
+};
+static struct kobj_type raid5_auxth_ktype = {
+ .release = raid5_auxth_release,
+ .sysfs_ops = &raid5_auxth_sysfsops,
+ .default_attrs = auxth_attrs,
+};
+
+static ssize_t
+raid5_show_auxthread_number(struct mddev *mddev, char *page)
+{
+ struct r5conf *conf = mddev->private;
+ if (conf)
+ return sprintf(page, "%d\n", conf->aux_thread_num);
+ else
+ return 0;
+}
+
+static void raid5_auxth_delete(struct work_struct *ws)
+{
+ struct raid5_auxth *thread = container_of(ws, struct raid5_auxth,
+ del_work);
+
+ kobject_del(&thread->kobj);
+ kobject_put(&thread->kobj);
+}
+
+static void __free_aux_thread(struct mddev *mddev, struct raid5_auxth *thread)
+{
+ md_unregister_thread(&thread->thread);
+ INIT_WORK(&thread->del_work, raid5_auxth_delete);
+ kobject_get(&thread->kobj);
+ md_queue_misc_work(&thread->del_work);
+}
+
+static struct raid5_auxth *__create_aux_thread(struct mddev *mddev, int i)
+{
+ struct raid5_auxth *thread;
+ char name[10];
+
+ thread = kzalloc(sizeof(*thread), GFP_KERNEL);
+ if (!thread)
+ return NULL;
+ snprintf(name, 10, "aux%d", i);
+ thread->thread = md_register_thread(raid5auxd, mddev, name);
+ if (!thread->thread) {
+ kfree(thread);
+ return NULL;
+ }
+ thread->thread->private = thread;
+
+ cpumask_copy(&thread->work_mask, cpu_online_mask);
+
+ if (kobject_init_and_add(&thread->kobj, &raid5_auxth_ktype,
+ &mddev->kobj, "auxth%d", i)) {
+ md_unregister_thread(&thread->thread);
+ kfree(thread);
+ return NULL;
+ }
+ return thread;
+}
+
+static ssize_t
+raid5_store_auxthread_number(struct mddev *mddev, const char *page, size_t len)
+{
+ struct r5conf *conf = mddev->private;
+ unsigned long new;
+ int i;
+ struct raid5_auxth **threads;
+
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+ if (!conf)
+ return -ENODEV;
+
+ if (kstrtoul(page, 10, &new))
+ return -EINVAL;
+
+ if (new == conf->aux_thread_num)
+ return len;
+
+ /* There is no point creating more threads than cpu number */
+ if (new > num_online_cpus())
+ return -EINVAL;
+
+ if (new > conf->aux_thread_num) {
+ threads = kzalloc(sizeof(struct raid5_auxth *) * new,
+ GFP_KERNEL);
+ if (!threads)
+ return -ENOMEM;
+
+ i = conf->aux_thread_num;
+ while (i < new) {
+ threads[i] = __create_aux_thread(mddev, i);
+ if (!threads[i])
+ goto error;
+
+ i++;
+ }
+ memcpy(threads, conf->aux_threads,
+ sizeof(struct raid5_auxth *) * conf->aux_thread_num);
+ get_online_cpus();
+ spin_lock_irq(&conf->device_lock);
+ kfree(conf->aux_threads);
+ conf->aux_threads = threads;
+ conf->aux_thread_num = new;
+ raid5_update_threads_handle_mask(mddev);
+ spin_unlock_irq(&conf->device_lock);
+ put_online_cpus();
+ } else {
+ int old = conf->aux_thread_num;
+
+ get_online_cpus();
+ spin_lock_irq(&conf->device_lock);
+ conf->aux_thread_num = new;
+ raid5_update_threads_handle_mask(mddev);
+ spin_unlock_irq(&conf->device_lock);
+ put_online_cpus();
+ for (i = new; i < old; i++)
+ __free_aux_thread(mddev, conf->aux_threads[i]);
+ }
+
+ return len;
+error:
+ while (--i >= conf->aux_thread_num)
+ __free_aux_thread(mddev, threads[i]);
+ kfree(threads);
+ return -ENOMEM;
+}
+
+static struct md_sysfs_entry
+raid5_auxthread_number = __ATTR(auxthread_number, S_IRUGO|S_IWUSR,
+ raid5_show_auxthread_number,
+ raid5_store_auxthread_number);
+
static struct attribute *raid5_attrs[] = {
&raid5_stripecache_size.attr,
&raid5_stripecache_active.attr,
&raid5_preread_bypass_threshold.attr,
+ &raid5_auxthread_number.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
static void free_conf(struct r5conf *conf)
{
+ kfree(conf->aux_threads);
shrink_stripes(conf);
raid5_free_percpu(conf);
kfree(conf->disks);
void *hcpu)
{
struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
- long cpu = (long)hcpu;
+ long cpu = (long)hcpu, anycpu;
struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
switch (action) {
__func__, cpu);
return notifier_from_errno(-ENOMEM);
}
+ INIT_LIST_HEAD(&(percpu->handle_list));
+ cpumask_clear(&percpu->handle_threads);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ spin_lock_irq(&conf->device_lock);
+ anycpu = cpumask_any(cpu_online_mask);
+ list_splice_tail_init(&percpu->handle_list,
+ &per_cpu_ptr(conf->percpu, anycpu)->handle_list);
+ spin_unlock_irq(&conf->device_lock);
+
safe_put_page(percpu->spare_page);
kfree(percpu->scribble);
percpu->spare_page = NULL;
default:
break;
}
+
+ spin_lock_irq(&conf->device_lock);
+ raid5_update_threads_handle_mask(conf->mddev);
+ spin_unlock_irq(&conf->device_lock);
return NOTIFY_OK;
}
#endif
get_online_cpus();
err = 0;
for_each_present_cpu(cpu) {
+ struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+
if (conf->level == 6) {
spare_page = alloc_page(GFP_KERNEL);
if (!spare_page) {
err = -ENOMEM;
break;
}
- per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+ percpu->spare_page = spare_page;
}
scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
if (!scribble) {
err = -ENOMEM;
break;
}
- per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+ percpu->scribble = scribble;
+ INIT_LIST_HEAD(&percpu->handle_list);
+ cpumask_clear(&percpu->handle_threads);
}
#ifdef CONFIG_HOTPLUG_CPU
conf->cpu_notify.notifier_call = raid456_cpu_notify;
spin_lock_init(&conf->device_lock);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
- INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->hold_list);
INIT_LIST_HEAD(&conf->delayed_list);
INIT_LIST_HEAD(&conf->bitmap_list);
conf->bypass_threshold = BYPASS_THRESHOLD;
conf->recovery_disabled = mddev->recovery_disabled - 1;
+ cpumask_copy(&conf->work_mask, cpu_online_mask);
+
conf->raid_disks = mddev->raid_disks;
if (mddev->reshape_position == MaxSector)
conf->previous_raid_disks = mddev->raid_disks;
static int stop(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
+ int i;
+
+ for (i = 0; i < conf->aux_thread_num; i++)
+ __free_aux_thread(mddev, conf->aux_threads[i]);
md_unregister_thread(&mddev->thread);
if (mddev->queue)