]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
md: fix raid5 'repair' operations
authorDan Williams <dan.j.williams@intel.com>
Tue, 13 May 2008 19:10:11 +0000 (19:10 +0000)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 15 May 2008 14:50:08 +0000 (07:50 -0700)
commit c8894419acf5e56851de9741c5047bebd78acd1f upstream
Date: Mon, 12 May 2008 14:02:12 -0700
Subject: md: fix raid5 'repair' operations

commit bd2ab67030e9116f1e4aae1289220255412b37fd "md: close a livelock window
in handle_parity_checks5" introduced a bug in handling 'repair' operations.
After a repair operation completes we clear the state bits tracking this
operation.  However, they are cleared too early and this results in the code
deciding to re-run the parity check operation.  Since we have done the repair
in memory the second check does not find a mismatch and thus does not do a
writeback.

Test results:
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
51072
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
0

(also fix incorrect indentation)

Tested-by: George Spelvin <linux@horizon.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
drivers/md/raid5.c

index b162b839a6620921eb258252ee9ddaf1a1d12810..18a1379ed04c59746d3855fc72666982082f8361 100644 (file)
@@ -2354,8 +2354,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 
        /* complete a check operation */
        if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-           clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-           clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+               clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+               clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
                if (s->failed == 0) {
                        if (sh->ops.zero_sum_result == 0)
                                /* parity is correct (on disc,
@@ -2385,16 +2385,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                        canceled_check = 1; /* STRIPE_INSYNC is not set */
        }
 
-       /* check if we can clear a parity disk reconstruct */
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
-               test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-
-               clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
-       }
-
        /* start a new check operation if there are no failures, the stripe is
         * not insync, and a repair is not in flight
         */
@@ -2409,6 +2399,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                }
        }
 
+       /* check if we can clear a parity disk reconstruct */
+       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+           test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+
+               clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+       }
+
+
        /* Wait for check parity and compute block operations to complete
         * before write-back.  If a failure occurred while the check operation
         * was in flight we need to cycle this stripe through handle_stripe