bi->bi_end_io = raid5_end_read_request;
rcu_read_lock();
- rdev = rcu_dereference(conf->disks[i].rdev);
rrdev = rcu_dereference(conf->disks[i].replacement);
+ smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
+ rdev = rcu_dereference(conf->disks[i].rdev);
+ if (!rdev) {
+ rdev = rrdev;
+ rrdev = NULL;
+ }
if (rw & WRITE) {
if (replace_only)
rdev = NULL;
+ if (rdev == rrdev)
+ /* We raced and saw duplicates */
+ rrdev = NULL;
} else {
- if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
+ if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
rdev = rrdev;
rrdev = NULL;
}
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
char b[BDEVNAME_SIZE];
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL;
for (i=0 ; i<disks; i++)
return;
}
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
+ /* If replacement finished while this request was outstanding,
+ * 'replacement' might be NULL already.
+ * In that case it moved down to 'rdev'.
+ * rdev is not removed until all requests are finished.
+ */
rdev = conf->disks[i].replacement;
- else
+ if (!rdev)
rdev = conf->disks[i].rdev;
if (uptodate) {
}
if (bi == &sh->dev[i].rreq) {
rdev = conf->disks[i].replacement;
- replacement = 1;
+ if (rdev)
+ replacement = 1;
+ else
+ /* rdev was removed and 'replacement'
+ * replaced it. rdev is not removed
+ * until all requests are finished.
+ */
+ rdev = conf->disks[i].rdev;
break;
}
}
}
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
rdev = conf->disks[i].replacement;
+ if (!rdev)
+ /* rdev have been moved down */
+ rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector,
STRIPE_SECTORS);
rdev_dec_pending(rdev, conf->mddev);
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->disks + i;
- if (tmp->rdev
+ if (tmp->replacement
+ && tmp->replacement->recovery_offset == MaxSector
+ && !test_bit(Faulty, &tmp->replacement->flags)
+ && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+ /* Replacement has just become active. */
+ if (!tmp->rdev
+ || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+ count++;
+ if (tmp->rdev) {
+ /* Replaced device not technically faulty,
+ * but we need to be sure it gets removed
+ * and never re-added.
+ */
+ set_bit(Faulty, &tmp->rdev->flags);
+ sysfs_notify_dirent_safe(
+ tmp->rdev->sysfs_state);
+ }
+ sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
+ } else if (tmp->rdev
&& tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
+ (!p->replacement || p->replacement == rdev) &&
number < conf->raid_disks) {
err = -EBUSY;
goto abort;
/* lost the race, try later */
err = -EBUSY;
*rdevp = rdev;
- }
+ } else if (p->replacement) {
+ /* We must have just cleared 'rdev' */
+ p->rdev = p->replacement;
+ clear_bit(Replacement, &p->replacement->flags);
+ smp_mb(); /* Make sure other CPUs may see both as identical
+ * but will never see neither - if they are careful
+ */
+ p->replacement = NULL;
+ clear_bit(WantReplacement, &rdev->flags);
+ } else
+ /* We might have just removed the Replacement as faulty-
+ * clear the bit just in case
+ */
+ clear_bit(WantReplacement, &rdev->flags);
abort:
print_raid5_conf(conf);