Merge tag 'md-3.7-fixes' of git://neil.brown.name/md

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 9ab768acfb623f8bbb13870e5f65150a60ecad07..61200717687b85b3fed040f9599ffd1987b8842d 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1817,10 +1817,10 @@ retry:
                         memset(bbp, 0xff, PAGE_SIZE);
  
                         for (i = 0 ; i < bb->count ; i++) {
-                               u64 internal_bb = *p++;
+                               u64 internal_bb = p[i];
                                 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
                                                 | BB_LEN(internal_bb));
-                               *bbp++ = cpu_to_le64(store_bb);
+                               bbp[i] = cpu_to_le64(store_bb);
                         }
                         bb->changed = 0;
                         if (read_seqretry(&bb->lock, seq))
@@ -5294,7 +5294,7 @@ void md_stop_writes(struct mddev *mddev)
  }
  EXPORT_SYMBOL_GPL(md_stop_writes);
  
-void md_stop(struct mddev *mddev)
+static void __md_stop(struct mddev *mddev)
  {
         mddev->ready = 0;
         mddev->pers->stop(mddev);
@@ -5304,6 +5304,18 @@ void md_stop(struct mddev *mddev)
         mddev->pers = NULL;
         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
  }
+
+void md_stop(struct mddev *mddev)
+{
+       /* stop the array and free an attached data structures.
+        * This is called from dm-raid
+        */
+       __md_stop(mddev);
+       bitmap_destroy(mddev);
+       if (mddev->bio_set)
+               bioset_free(mddev->bio_set);
+}
+
  EXPORT_SYMBOL_GPL(md_stop);
  
  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
@@ -5364,7 +5376,7 @@ static int do_md_stop(struct mddev * mddev, int mode,
                         set_disk_ro(disk, 0);
  
                 __md_stop_writes(mddev);
-               md_stop(mddev);
+               __md_stop(mddev);
                 mddev->queue->merge_bvec_fn = NULL;
                 mddev->queue->backing_dev_info.congested_fn = NULL;
  
@@ -7936,9 +7948,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
                    sector_t *first_bad, int *bad_sectors)
  {
         int hi;
-       int lo = 0;
+       int lo;
         u64 *p = bb->page;
-       int rv = 0;
+       int rv;
         sector_t target = s + sectors;
         unsigned seq;
  
@@ -7953,7 +7965,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
  
  retry:
         seq = read_seqbegin(&bb->lock);
-
+       lo = 0;
+       rv = 0;
         hi = bb->count;
  
         /* Binary search between lo and hi for 'target'
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index d1295aff41739eea048f0e43513dfba6ade4c8f1..0d5d0ff2c0f7beb47a02bd4692273b31369b2bc5 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -499,7 +499,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
          */
         one_write_done(r10_bio);
         if (dec_rdev)
-               rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+               rdev_dec_pending(rdev, conf->mddev);
  }
  
  /*
@@ -1334,18 +1334,21 @@ retry_write:
                         blocked_rdev = rrdev;
                         break;
                 }
+               if (rdev && (test_bit(Faulty, &rdev->flags)
+                            || test_bit(Unmerged, &rdev->flags)))
+                       rdev = NULL;
                 if (rrdev && (test_bit(Faulty, &rrdev->flags)
                               || test_bit(Unmerged, &rrdev->flags)))
                         rrdev = NULL;
  
                 r10_bio->devs[i].bio = NULL;
                 r10_bio->devs[i].repl_bio = NULL;
-               if (!rdev || test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags)) {
+
+               if (!rdev && !rrdev) {
                         set_bit(R10BIO_Degraded, &r10_bio->state);
                         continue;
                 }
-               if (test_bit(WriteErrorSeen, &rdev->flags)) {
+               if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
                         sector_t first_bad;
                         sector_t dev_sector = r10_bio->devs[i].addr;
                         int bad_sectors;
@@ -1387,8 +1390,10 @@ retry_write:
                                         max_sectors = good_sectors;
                         }
                 }
-               r10_bio->devs[i].bio = bio;
-               atomic_inc(&rdev->nr_pending);
+               if (rdev) {
+                       r10_bio->devs[i].bio = bio;
+                       atomic_inc(&rdev->nr_pending);
+               }
                 if (rrdev) {
                         r10_bio->devs[i].repl_bio = bio;
                         atomic_inc(&rrdev->nr_pending);
@@ -1444,69 +1449,71 @@ retry_write:
         for (i = 0; i < conf->copies; i++) {
                 struct bio *mbio;
                 int d = r10_bio->devs[i].devnum;
-               if (!r10_bio->devs[i].bio)
-                       continue;
+               if (r10_bio->devs[i].bio) {
+                       struct md_rdev *rdev = conf->mirrors[d].rdev;
+                       mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+                       md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+                                   max_sectors);
+                       r10_bio->devs[i].bio = mbio;
+
+                       mbio->bi_sector = (r10_bio->devs[i].addr+
+                                          choose_data_offset(r10_bio,
+                                                             rdev));
+                       mbio->bi_bdev = rdev->bdev;
+                       mbio->bi_end_io = raid10_end_write_request;
+                       mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
+                       mbio->bi_private = r10_bio;
  
-               mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-                           max_sectors);
-               r10_bio->devs[i].bio = mbio;
+                       atomic_inc(&r10_bio->remaining);
  
-               mbio->bi_sector = (r10_bio->devs[i].addr+
-                                  choose_data_offset(r10_bio,
-                                                     conf->mirrors[d].rdev));
-               mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
-               mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
-               mbio->bi_private = r10_bio;
+                       cb = blk_check_plugged(raid10_unplug, mddev,
+                                              sizeof(*plug));
+                       if (cb)
+                               plug = container_of(cb, struct raid10_plug_cb,
+                                                   cb);
+                       else
+                               plug = NULL;
+                       spin_lock_irqsave(&conf->device_lock, flags);
+                       if (plug) {
+                               bio_list_add(&plug->pending, mbio);
+                               plug->pending_cnt++;
+                       } else {
+                               bio_list_add(&conf->pending_bio_list, mbio);
+                               conf->pending_count++;
+                       }
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       if (!plug)
+                               md_wakeup_thread(mddev->thread);
+               }
  
-               atomic_inc(&r10_bio->remaining);
+               if (r10_bio->devs[i].repl_bio) {
+                       struct md_rdev *rdev = conf->mirrors[d].replacement;
+                       if (rdev == NULL) {
+                               /* Replacement just got moved to main 'rdev' */
+                               smp_mb();
+                               rdev = conf->mirrors[d].rdev;
+                       }
+                       mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+                       md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+                                   max_sectors);
+                       r10_bio->devs[i].repl_bio = mbio;
+
+                       mbio->bi_sector = (r10_bio->devs[i].addr +
+                                          choose_data_offset(
+                                                  r10_bio, rdev));
+                       mbio->bi_bdev = rdev->bdev;
+                       mbio->bi_end_io = raid10_end_write_request;
+                       mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
+                       mbio->bi_private = r10_bio;
  
-               cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
-               if (cb)
-                       plug = container_of(cb, struct raid10_plug_cb, cb);
-               else
-                       plug = NULL;
-               spin_lock_irqsave(&conf->device_lock, flags);
-               if (plug) {
-                       bio_list_add(&plug->pending, mbio);
-                       plug->pending_cnt++;
-               } else {
+                       atomic_inc(&r10_bio->remaining);
+                       spin_lock_irqsave(&conf->device_lock, flags);
                         bio_list_add(&conf->pending_bio_list, mbio);
                         conf->pending_count++;
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       if (!mddev_check_plugged(mddev))
+                               md_wakeup_thread(mddev->thread);
                 }
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (!plug)
-                       md_wakeup_thread(mddev->thread);
-
-               if (!r10_bio->devs[i].repl_bio)
-                       continue;
-
-               mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-                           max_sectors);
-               r10_bio->devs[i].repl_bio = mbio;
-
-               /* We are actively writing to the original device
-                * so it cannot disappear, so the replacement cannot
-                * become NULL here
-                */
-               mbio->bi_sector = (r10_bio->devs[i].addr +
-                                  choose_data_offset(
-                                          r10_bio,
-                                          conf->mirrors[d].replacement));
-               mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
-               mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
-               mbio->bi_private = r10_bio;
-
-               atomic_inc(&r10_bio->remaining);
-               spin_lock_irqsave(&conf->device_lock, flags);
-               bio_list_add(&conf->pending_bio_list, mbio);
-               conf->pending_count++;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (!mddev_check_plugged(mddev))
-                       md_wakeup_thread(mddev->thread);
         }
  
         /* Don't remove the bias on 'remaining' (one_write_done) until
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index c5439dce0295078ecf82094af5474a649284ce61..a4502686e7a8763fb5aeded988f026b2a9dd1c99 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                         dev = &sh->dev[i];
                         if (!test_bit(R5_LOCKED, &dev->flags) &&
                             (test_bit(R5_UPTODATE, &dev->flags) ||
-                            test_and_clear_bit(R5_Discard, &dev->flags))) {
+                            test_bit(R5_Discard, &dev->flags))) {
                                 /* We can return any write requests */
                                 struct bio *wbi, *wbi2;
                                 pr_debug("Return write for disc %d\n", i);
+                               if (test_and_clear_bit(R5_Discard, &dev->flags))
+                                       clear_bit(R5_UPTODATE, &dev->flags);
                                 wbi = dev->written;
                                 dev->written = NULL;
                                 while (wbi && wbi->bi_sector <
@@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                          !test_bit(STRIPE_DEGRADED, &sh->state),
                                                 0);
                         }
-               }
+               } else if (test_bit(R5_Discard, &sh->dev[i].flags))
+                       clear_bit(R5_Discard, &sh->dev[i].flags);
  
         if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
                 if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3490,40 +3493,6 @@ static void handle_stripe(struct stripe_head *sh)
                         handle_failed_sync(conf, sh, &s);
         }
  
-       /*
-        * might be able to return some write requests if the parity blocks
-        * are safe, or on a failed drive
-        */
-       pdev = &sh->dev[sh->pd_idx];
-       s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
-               || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
-       qdev = &sh->dev[sh->qd_idx];
-       s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
-               || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
-               || conf->level < 6;
-
-       if (s.written &&
-           (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
-                            && !test_bit(R5_LOCKED, &pdev->flags)
-                            && (test_bit(R5_UPTODATE, &pdev->flags) ||
-                                test_bit(R5_Discard, &pdev->flags))))) &&
-           (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
-                            && !test_bit(R5_LOCKED, &qdev->flags)
-                            && (test_bit(R5_UPTODATE, &qdev->flags) ||
-                                test_bit(R5_Discard, &qdev->flags))))))
-               handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
-
-       /* Now we might consider reading some blocks, either to check/generate
-        * parity, or to satisfy requests
-        * or to load a block that is being partially written.
-        */
-       if (s.to_read || s.non_overwrite
-           || (conf->level == 6 && s.to_write && s.failed)
-           || (s.syncing && (s.uptodate + s.compute < disks))
-           || s.replacing
-           || s.expanding)
-               handle_stripe_fill(sh, &s, disks);
-
         /* Now we check to see if any write operations have recently
          * completed
          */
@@ -3561,6 +3530,40 @@ static void handle_stripe(struct stripe_head *sh)
                         s.dec_preread_active = 1;
         }
  
+       /*
+        * might be able to return some write requests if the parity blocks
+        * are safe, or on a failed drive
+        */
+       pdev = &sh->dev[sh->pd_idx];
+       s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+               || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+       qdev = &sh->dev[sh->qd_idx];
+       s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+               || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+               || conf->level < 6;
+
+       if (s.written &&
+           (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+                            && !test_bit(R5_LOCKED, &pdev->flags)
+                            && (test_bit(R5_UPTODATE, &pdev->flags) ||
+                                test_bit(R5_Discard, &pdev->flags))))) &&
+           (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+                            && !test_bit(R5_LOCKED, &qdev->flags)
+                            && (test_bit(R5_UPTODATE, &qdev->flags) ||
+                                test_bit(R5_Discard, &qdev->flags))))))
+               handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
+
+       /* Now we might consider reading some blocks, either to check/generate
+        * parity, or to satisfy requests
+        * or to load a block that is being partially written.
+        */
+       if (s.to_read || s.non_overwrite
+           || (conf->level == 6 && s.to_write && s.failed)
+           || (s.syncing && (s.uptodate + s.compute < disks))
+           || s.replacing
+           || s.expanding)
+               handle_stripe_fill(sh, &s, disks);
+
         /* Now to consider new write requests and what else, if anything
          * should be read.  We do not handle new writes when:
          * 1/ A 'write' operation (copy+xor) is already in flight.
@@ -5529,6 +5532,10 @@ static int run(struct mddev *mddev)
                  * discard data disk but write parity disk
                  */
                 stripe = stripe * PAGE_SIZE;
+               /* Round up to power of 2, as discard handling
+                * currently assumes that */
+               while ((stripe-1) & stripe)
+                       stripe = (stripe | (stripe-1)) + 1;
                 mddev->queue->limits.discard_alignment = stripe;
                 mddev->queue->limits.discard_granularity = stripe;
                 /*
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 23 Nov 2012 22:11:13 +0000 (12:11 -1000)
drivers/md/md.c		patch \| blob \| history
drivers/md/raid10.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history