From a8d1b6513c1f8cd4da305ed52b7140d4114f421b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Jul 2012 20:49:59 +1000 Subject: [PATCH] md/raid1: submit IO from originating thread instead of md thread. queuing writes to the md thread means that all requests go through the one processor which may not be able to keep up with very high request rates. So use the plugging infrastructure to submit all requests on unplug. If a 'schedule' is needed, we fall back on the old approach of handing the requests to the thread for it to handle. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 2 +- drivers/md/raid1.c | 57 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 15dbe03117e4..94e7f6ba2e11 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect prepare_to_wait(&bitmap->overflow_wait, &__wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&bitmap->counts.lock); - io_schedule(); + schedule(); finish_wait(&bitmap->overflow_wait, &__wait); continue; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c04efd2bf945..2d24a828bff7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -944,6 +944,44 @@ do_sync_io: pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } +struct raid1_plug_cb { + struct blk_plug_cb cb; + struct bio_list pending; + int pending_cnt; +}; + +static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) +{ + struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, + cb); + struct mddev *mddev = plug->cb.data; + struct r1conf *conf = mddev->private; + struct bio *bio; + + if (from_schedule) { + spin_lock_irq(&conf->device_lock); + bio_list_merge(&conf->pending_bio_list, &plug->pending); + conf->pending_count += plug->pending_cnt; + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(mddev->thread); + kfree(plug); + return; + } + + /* we aren't scheduling, so we can do the write-out directly. */ + bio = bio_list_get(&plug->pending); + bitmap_unplug(mddev->bitmap); + wake_up(&conf->wait_barrier); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + kfree(plug); +} + static void make_request(struct mddev *mddev, struct bio * bio) { struct r1conf *conf = mddev->private; @@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; + struct blk_plug_cb *cb; + struct raid1_plug_cb *plug = NULL; int first_clone; int sectors_handled; int max_sectors; @@ -1259,11 +1299,22 @@ read_again: mbio->bi_private = r1_bio; atomic_inc(&r1_bio->remaining); + + cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid1_plug_cb, cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } /* Mustn't call r1_bio_write_done before this next test, -- 2.39.5