X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid5.c;h=9768a7d67148225ccafad99f473c1d4bc107729a;hb=bc2607f393bd4fb844c1886a02af929ca0372056;hp=719445004dd992b95792127aa42f1c3e1680abfd;hpb=34b343cff4354ab9864be83be88405fd53d928a0;p=karo-tx-linux.git diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 719445004dd9..9768a7d67148 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1658,8 +1658,10 @@ static void raid5_end_write_request(struct bio *bi, int error) return; } - if (!uptodate) - md_error(conf->mddev, conf->disks[i].rdev); + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags); + set_bit(R5_WriteError, &sh->dev[i].flags); + } rdev_dec_pending(conf->disks[i].rdev, conf->mddev); @@ -1706,6 +1708,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT @@ -2231,9 +2234,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, rcu_read_lock(); rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) - /* multiple read failures in one stripe */ - md_error(conf->mddev, rdev); + atomic_inc(&rdev->nr_pending); + else + rdev = NULL; rcu_read_unlock(); + if (rdev) { + if (!rdev_set_badblocks( + rdev, + sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } } spin_lock_irq(&conf->device_lock); /* fail all writes first */ @@ -2312,6 +2324,41 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, md_wakeup_thread(conf->mddev->thread); } +static void +handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s) +{ + int abort = 0; + int i; + + md_done_sync(conf->mddev, STRIPE_SECTORS, 0); + clear_bit(STRIPE_SYNCING, &sh->state); + s->syncing = 0; + /* There is nothing more to do for sync/check/repair. + * For recover we need to record a bad block on all + * non-sync devices, or abort the recovery + */ + if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) + return; + /* During recovery devices cannot be removed, so locking and + * refcounting of rdevs is not needed + */ + for (i = 0; i < conf->raid_disks; i++) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (!rdev + || test_bit(Faulty, &rdev->flags) + || test_bit(In_sync, &rdev->flags)) + continue; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + abort = 1; + } + if (abort) { + conf->recovery_disabled = conf->mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); + } +} + /* fetch_block - checks the given member device to see if its data needs * to be read or computed to satisfy a request. * @@ -2922,6 +2969,9 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) spin_lock_irq(&conf->device_lock); for (i=disks; i--; ) { mdk_rdev_t *rdev; + sector_t first_bad; + int bad_sectors; + int is_bad = 0; dev = &sh->dev[i]; @@ -2958,21 +3008,46 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (dev->written) s->written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (s->blocked_rdev == NULL && - rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - s->blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + if (rdev) { + is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors); + if (s->blocked_rdev == NULL + && (test_bit(Blocked, &rdev->flags) + || is_bad < 0)) { + if (is_bad < 0) + set_bit(BlockedBadBlocks, + &rdev->flags); + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } clear_bit(R5_Insync, &dev->flags); if (!rdev) /* Not in-sync */; - else if (test_bit(In_sync, &rdev->flags)) + else if (is_bad) { + /* also not in-sync */ + if (!test_bit(WriteErrorSeen, &rdev->flags)) { + /* treat as in-sync, but with a read error + * which we can now try to correct + */ + set_bit(R5_Insync, &dev->flags); + set_bit(R5_ReadError, &dev->flags); + } + } else if (test_bit(In_sync, &rdev->flags)) set_bit(R5_Insync, &dev->flags); else { /* in sync if before recovery_offset */ if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) set_bit(R5_Insync, &dev->flags); } + if (test_bit(R5_WriteError, &dev->flags)) { + clear_bit(R5_Insync, &dev->flags); + if (!test_bit(Faulty, &rdev->flags)) { + s->handle_bad_blocks = 1; + atomic_inc(&rdev->nr_pending); + } else + clear_bit(R5_WriteError, &dev->flags); + } if (!test_bit(R5_Insync, &dev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -3021,6 +3096,11 @@ static void handle_stripe(struct stripe_head *sh) analyse_stripe(sh, &s); + if (s.handle_bad_blocks) { + set_bit(STRIPE_HANDLE, &sh->state); + goto finish; + } + if (unlikely(s.blocked_rdev)) { if (s.syncing || s.expanding || s.expanded || s.to_write || s.written) { @@ -3046,11 +3126,8 @@ static void handle_stripe(struct stripe_head *sh) */ if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); - if (s.failed > conf->max_degraded && s.syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS, 0); - clear_bit(STRIPE_SYNCING, &sh->state); - s.syncing = 0; - } + if (s.failed > conf->max_degraded && s.syncing) + handle_failed_sync(conf, sh, &s); /* * might be able to return some write requests if the parity blocks @@ -3221,6 +3298,20 @@ finish: if (unlikely(s.blocked_rdev)) md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); + if (s.handle_bad_blocks) + for (i = disks; i--; ) { + mdk_rdev_t *rdev; + struct r5dev *dev = &sh->dev[i]; + if (test_and_clear_bit(R5_WriteError, &dev->flags)) { + /* We own a safe reference to the rdev */ + rdev = conf->disks[i].rdev; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } + } + if (s.ops_request) raid_run_ops(sh, s.ops_request); @@ -3470,6 +3561,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) rcu_read_lock(); rdev = rcu_dereference(conf->disks[dd_idx].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + atomic_inc(&rdev->nr_pending); rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; @@ -3477,8 +3571,10 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); align_bi->bi_sector += rdev->data_offset; - if (!bio_fits_rdev(align_bi)) { - /* too big in some way */ + if (!bio_fits_rdev(align_bi) || + is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, + &first_bad, &bad_sectors)) { + /* too big in some way, or has a known bad block */ bio_put(align_bi); rdev_dec_pending(rdev, mddev); return 0; @@ -4143,6 +4239,9 @@ static void raid5d(mddev_t *mddev) release_stripe(sh); cond_resched(); + if (mddev->flags & ~(1<device_lock); } pr_debug("%d stripes handled\n", handled); @@ -4667,10 +4766,6 @@ static int run(mddev_t *mddev) * 0 for a fully functional array, 1 or 2 for a degraded array. */ list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->badblocks.count) { - printk(KERN_ERR "md/raid5: cannot handle bad blocks yet\n"); - goto abort; - } if (rdev->raid_disk < 0) continue; if (test_bit(In_sync, &rdev->flags)) { @@ -4951,6 +5046,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number) * isn't possible. */ if (!test_bit(Faulty, &rdev->flags) && + mddev->recovery_disabled != conf->recovery_disabled && !has_failed(conf) && number < conf->raid_disks) { err = -EBUSY; @@ -4979,8 +5075,8 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int first = 0; int last = conf->raid_disks - 1; - if (rdev->badblocks.count) - return -EINVAL; + if (mddev->recovery_disabled == conf->recovery_disabled) + return -EBUSY; if (has_failed(conf)) /* no point adding a device */