From a6da4ef85cef0382244fc588c901e133a2ec5109 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 14 Apr 2015 10:45:22 -0500 Subject: [PATCH] md: re-add a failed disk This adds the capability of re-adding a failed disk by writing "re-add" to /sys/block/mdXX/md/dev-YYY/state. This facilitates adding disks which have encountered a temporary error such as a network disconnection/hiccup in an iSCSI device, or a SAN cable disconnection which has been restored. In such a situation, you do not need to remove and re-add the device. Writing re-add to the failed device's state would add it again to the array and perform the recovery of only the blocks which were written after the device failed. This works for generic md, and is not related to clustering. However, this patch is to ease re-add operations listed above in clustering environments. Signed-off-by: Goldwyn Rodrigues Signed-off-by: NeilBrown --- drivers/md/md.c | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ca011d1d1de7..429e95e9a942 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2375,6 +2375,36 @@ repeat: } EXPORT_SYMBOL(md_update_sb); +static int add_bound_rdev(struct md_rdev *rdev) +{ + struct mddev *mddev = rdev->mddev; + int err = 0; + + if (!mddev->pers->hot_remove_disk) { + /* If there is hot_add_disk but no hot_remove_disk + * then added disks for geometry changes, + * and should be added immediately. + */ + super_types[mddev->major_version]. + validate_super(mddev, rdev); + err = mddev->pers->hot_add_disk(mddev, rdev); + if (err) { + unbind_rdev_from_array(rdev); + export_rdev(rdev); + return err; + } + } + sysfs_notify_dirent_safe(rdev->sysfs_state); + + set_bit(MD_CHANGE_DEVS, &mddev->flags); + if (mddev->degraded) + set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_new_event(mddev); + md_wakeup_thread(mddev->thread); + return 0; +} + /* words written to sysfs files may, or may not, be \n terminated. * We want to accept with case. For this we use cmd_match. */ @@ -2564,6 +2594,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(Replacement, &rdev->flags); err = 0; } + } else if (cmd_match(buf, "re-add")) { + if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) { + clear_bit(Faulty, &rdev->flags); + err = add_bound_rdev(rdev); + } else + err = -EBUSY; } if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -5875,29 +5911,10 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) rdev->raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); - if (!err && !mddev->pers->hot_remove_disk) { - /* If there is hot_add_disk but no hot_remove_disk - * then added disks for geometry changes, - * and should be added immediately. - */ - super_types[mddev->major_version]. - validate_super(mddev, rdev); - err = mddev->pers->hot_add_disk(mddev, rdev); - if (err) - unbind_rdev_from_array(rdev); - } if (err) export_rdev(rdev); else - sysfs_notify_dirent_safe(rdev->sysfs_state); - - set_bit(MD_CHANGE_DEVS, &mddev->flags); - if (mddev->degraded) - set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - if (!err) - md_new_event(mddev); - md_wakeup_thread(mddev->thread); + err = add_bound_rdev(rdev); if (mddev_is_clustered(mddev) && (info->state & (1 << MD_DISK_CLUSTER_ADD))) md_cluster_ops->add_new_disk_finish(mddev); -- 2.39.5