return sh;
}
+/* Determine if 'data_offset' or 'new_data_offset' should be used
+ * in this stripe_head.
+ */
+static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
+{
+ sector_t progress = conf->reshape_progress;
+ /* Need a memory barrier to make sure we see the value
+ * of conf->generation, or ->data_offset that was set before
+ * reshape_progress was updated.
+ */
+ smp_rmb();
+ if (progress == MaxSector)
+ return 0;
+ if (sh->generation == conf->generation - 1)
+ return 0;
+ /* We are in a reshape, and this is a new-generation stripe,
+ * so use new_data_offset.
+ */
+ return 1;
+}
+
static void
raid5_end_read_request(struct bio *bi, int error);
static void
__func__, (unsigned long long)sh->sector,
bi->bi_rw, i);
atomic_inc(&sh->count);
- bi->bi_sector = sh->sector + rdev->data_offset;
+ if (use_new_offset(conf, sh))
+ bi->bi_sector = (sh->sector
+ + rdev->new_data_offset);
+ else
+ bi->bi_sector = (sh->sector
+ + rdev->data_offset);
bi->bi_flags = 1 << BIO_UPTODATE;
bi->bi_idx = 0;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
__func__, (unsigned long long)sh->sector,
rbi->bi_rw, i);
atomic_inc(&sh->count);
- rbi->bi_sector = sh->sector + rrdev->data_offset;
+ if (use_new_offset(conf, sh))
+ rbi->bi_sector = (sh->sector
+ + rrdev->new_data_offset);
+ else
+ rbi->bi_sector = (sh->sector
+ + rrdev->data_offset);
rbi->bi_flags = 1 << BIO_UPTODATE;
rbi->bi_idx = 0;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
char b[BDEVNAME_SIZE];
struct md_rdev *rdev = NULL;
-
+ sector_t s;
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
if (!rdev)
rdev = conf->disks[i].rdev;
+ if (use_new_offset(conf, sh))
+ s = sh->sector + rdev->new_data_offset;
+ else
+ s = sh->sector + rdev->data_offset;
if (uptodate) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
"md/raid:%s: read error corrected"
" (%lu sectors at %llu on %s)\n",
mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector
- + rdev->data_offset),
+ (unsigned long long)s,
bdevname(rdev->bdev, b));
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
clear_bit(R5_ReadError, &sh->dev[i].flags);
"md/raid:%s: read error on replacement device "
"(sector %llu on %s).\n",
mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
+ (unsigned long long)s,
bdn);
else if (conf->mddev->degraded >= conf->max_degraded)
printk_ratelimited(
"md/raid:%s: read error not correctable "
"(sector %llu on %s).\n",
mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
+ (unsigned long long)s,
bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
"md/raid:%s: read error NOT corrected!! "
"(sector %llu on %s).\n",
mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
+ (unsigned long long)s,
bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
raid_bio->bi_next = (void*)rdev;
align_bi->bi_bdev = rdev->bdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
+ /* No reshape active, so we can trust rdev->data_offset */
align_bi->bi_sector += rdev->data_offset;
if (!bio_fits_rdev(align_bi) ||
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
- rdev_for_each(rdev, mddev)
+ rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->new_data_offset << 9);
+ }
}
return 0;
conf->chunk_sectors = mddev->new_chunk_sectors;
conf->prev_algo = conf->algorithm;
conf->algorithm = mddev->new_layout;
+ conf->generation++;
+ /* Code that selects data_offset needs to see the generation update
+ * if reshape_progress has been set - so a memory barrier needed.
+ */
+ smp_mb();
if (mddev->reshape_backwards)
conf->reshape_progress = raid5_size(mddev, 0, 0);
else
conf->reshape_progress = 0;
conf->reshape_safe = conf->reshape_progress;
- conf->generation++;
spin_unlock_irq(&conf->device_lock);
/* Add some new drives, as many as will fit.
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+ rdev_for_each(rdev, mddev)
+ rdev->new_data_offset = rdev->data_offset;
+ smp_wmb();
conf->reshape_progress = MaxSector;
mddev->reshape_position = MaxSector;
spin_unlock_irq(&conf->device_lock);
{
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
+ struct md_rdev *rdev;
spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
+ rdev_for_each(rdev, conf->mddev)
+ rdev->data_offset = rdev->new_data_offset;
+ smp_wmb();
conf->reshape_progress = MaxSector;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);