From abbf098e6e1e23d5d247b9eaaf325e67f67b0328 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:54 +1100 Subject: [PATCH] md/raid10: preferentially read from replacement device if possible. When reading (for array reads, not for recovery etc) we read from the replacement device if it has recovered far enough. This requires storing the chosen rdev in the 'r10_bio' so we can make sure to drop the ref on the right device when the read finishes. Signed-off-by: NeilBrown --- drivers/md/raid10.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 70356c130273..5b886218110e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -324,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; int slot, dev; + struct md_rdev *rdev; struct r10conf *conf = r10_bio->mddev->private; slot = r10_bio->read_slot; dev = r10_bio->devs[slot].devnum; + rdev = r10_bio->devs[slot].rdev; /* * this branch is our 'one mirror IO has finished' event handler: */ @@ -346,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error) */ set_bit(R10BIO_Uptodate, &r10_bio->state); raid_end_bio_io(r10_bio); - rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); + rdev_dec_pending(rdev, conf->mddev); } else { /* * oops, read error - keep the refcount on the rdev @@ -355,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error) printk_ratelimited(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n", mdname(conf->mddev), - bdevname(conf->mirrors[dev].rdev->bdev, b), + bdevname(rdev->bdev, b), (unsigned long long)r10_bio->sector); set_bit(R10BIO_ReadError, &r10_bio->state); reschedule_retry(r10_bio); @@ -599,7 +601,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, int sectors = r10_bio->sectors; int best_good_sectors; sector_t new_distance, best_dist; - struct md_rdev *rdev; + struct md_rdev *rdev, *best_rdev; int do_balance; int best_slot; @@ -608,6 +610,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, retry: sectors = r10_bio->sectors; best_slot = -1; + best_rdev = NULL; best_dist = MaxSector; best_good_sectors = 0; do_balance = 1; @@ -629,10 +632,16 @@ retry: if (r10_bio->devs[slot].bio == IO_BLOCKED) continue; disk = r10_bio->devs[slot].devnum; - rdev = rcu_dereference(conf->mirrors[disk].rdev); + rdev = rcu_dereference(conf->mirrors[disk].replacement); + if (rdev == NULL || test_bit(Faulty, &rdev->flags) || + r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) + rdev = rcu_dereference(conf->mirrors[disk].rdev); if (rdev == NULL) continue; - if (!test_bit(In_sync, &rdev->flags)) + if (test_bit(Faulty, &rdev->flags)) + continue; + if (!test_bit(In_sync, &rdev->flags) && + r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) continue; dev_sector = r10_bio->devs[slot].addr; @@ -657,6 +666,7 @@ retry: if (good_sectors > best_good_sectors) { best_good_sectors = good_sectors; best_slot = slot; + best_rdev = rdev; } if (!do_balance) /* Must read from here */ @@ -685,16 +695,15 @@ retry: if (new_distance < best_dist) { best_dist = new_distance; best_slot = slot; + best_rdev = rdev; } } - if (slot == conf->copies) + if (slot >= conf->copies) { slot = best_slot; + rdev = best_rdev; + } if (slot >= 0) { - disk = r10_bio->devs[slot].devnum; - rdev = rcu_dereference(conf->mirrors[disk].rdev); - if (!rdev) - goto retry; atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { /* Cannot risk returning a device that failed @@ -990,6 +999,7 @@ read_again: max_sectors); r10_bio->devs[slot].bio = read_bio; + r10_bio->devs[slot].rdev = rdev; read_bio->bi_sector = r10_bio->devs[slot].addr + rdev->data_offset; @@ -2088,10 +2098,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) { int slot = r10_bio->read_slot; - int mirror = r10_bio->devs[slot].devnum; struct bio *bio; struct r10conf *conf = mddev->private; - struct md_rdev *rdev; + struct md_rdev *rdev = r10_bio->devs[slot].rdev; char b[BDEVNAME_SIZE]; unsigned long do_sync; int max_sectors; @@ -2109,7 +2118,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) fix_read_error(conf, mddev, r10_bio); unfreeze_array(conf); } - rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); + rdev_dec_pending(rdev, mddev); bio = r10_bio->devs[slot].bio; bdevname(bio->bi_bdev, b); @@ -2144,6 +2153,7 @@ read_more: r10_bio->sector - bio->bi_sector, max_sectors); r10_bio->devs[slot].bio = bio; + r10_bio->devs[slot].rdev = rdev; bio->bi_sector = r10_bio->devs[slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; -- 2.39.5