unsigned int min_pending;
struct md_rdev *rdev;
int choose_first;
+ int choose_next_idle;
rcu_read_lock();
/*
min_pending = UINT_MAX;
best_good_sectors = 0;
has_nonrot_disk = 0;
+ choose_next_idle = 0;
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync))
sector_t first_bad;
int bad_sectors;
unsigned int pending;
+ bool nonrot;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
} else
best_good_sectors = sectors;
- has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+ nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+ has_nonrot_disk |= nonrot;
pending = atomic_read(&rdev->nr_pending);
dist = abs(this_sector - conf->mirrors[disk].head_position);
- if (choose_first
- /* Don't change to another disk for sequential reads */
- || conf->mirrors[disk].next_seq_sect == this_sector
- || dist == 0
- /* If device is idle, use it */
- || pending == 0) {
+ if (choose_first) {
best_disk = disk;
break;
}
+ /* Don't change to another disk for sequential reads */
+ if (conf->mirrors[disk].next_seq_sect == this_sector
+ || dist == 0) {
+ int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
+ struct raid1_info *mirror = &conf->mirrors[disk];
+
+ best_disk = disk;
+ /*
+ * If buffered sequential IO size exceeds optimal
+ * iosize, check if there is idle disk. If yes, choose
+ * the idle disk. read_balance could already choose an
+ * idle disk before noticing it's a sequential IO in
+ * this disk. This doesn't matter because this disk
+ * will idle, next time it will be utilized after the
+ * first disk has IO size exceeds optimal iosize. In
+ * this way, iosize of the first disk will be optimal
+ * iosize at least. iosize of the second disk might be
+ * small, but not a big deal since when the second disk
+ * starts IO, the first disk is likely still busy.
+ */
+ if (nonrot && opt_iosize > 0 &&
+ mirror->seq_start != MaxSector &&
+ mirror->next_seq_sect > opt_iosize &&
+ mirror->next_seq_sect - opt_iosize >=
+ mirror->seq_start) {
+ choose_next_idle = 1;
+ continue;
+ }
+ break;
+ }
+ /* If device is idle, use it */
+ if (pending == 0) {
+ best_disk = disk;
+ break;
+ }
+
+ if (choose_next_idle)
+ continue;
if (min_pending > pending) {
min_pending = pending;
goto retry;
}
sectors = best_good_sectors;
+
+ if (conf->mirrors[best_disk].next_seq_sect != this_sector)
+ conf->mirrors[best_disk].seq_start = this_sector;
+
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
}
rcu_read_unlock();
mddev->merge_check_needed = 1;
disk->head_position = 0;
+ disk->seq_start = MaxSector;
}
conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;