Merge tag 'md-3.10-fixes' of git://neil.brown.name/md

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 681d1099a2d58936864b3b63610a31f38a908219..9b82377a833bd6572b628c79426ca153781fd712 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev)
  
  static void __md_stop_writes(struct mddev *mddev)
  {
+       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         if (mddev->sync_thread) {
-               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                 md_reap_sync_thread(mddev);
         }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 55951182af73680d3b7f40d32cac1302062dbe74..6e17f8181c4b923eb4044838d5bfc9dbb6d625fd 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
  
                 r1_bio->bios[mirror] = NULL;
                 to_put = bio;
-               set_bit(R1BIO_Uptodate, &r1_bio->state);
+               /*
+                * Do not set R1BIO_Uptodate if the current device is
+                * rebuilding or Faulty. This is because we cannot use
+                * such device for properly reading the data back (we could
+                * potentially use it, if the current write would have felt
+                * before rdev->recovery_offset, but for simplicity we don't
+                * check this here.
+                */
+               if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
+                   !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
+                       set_bit(R1BIO_Uptodate, &r1_bio->state);
  
                 /* Maybe we can clear some bad blocks. */
                 if (is_badblock(conf->mirrors[mirror].rdev,
@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-static void freeze_array(struct r1conf *conf)
+static void freeze_array(struct r1conf *conf, int extra)
  {
         /* stop syncio and normal IO and wait for everything to
          * go quite.
          * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+1
+        * wait until nr_pending match nr_queued+extra
          * This is called in the context of one normal IO request
          * that has failed. Thus any sync request that might be pending
          * will be blocked by nr_pending, and we need to wait for
          * pending IO requests to complete or be queued for re-try.
-        * Thus the number queued (nr_queued) plus this request (1)
+        * Thus the number queued (nr_queued) plus this request (extra)
          * must match the number of pending IOs (nr_pending) before
          * we continue.
          */
@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
         conf->barrier++;
         conf->nr_waiting++;
         wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+1,
+                               conf->nr_pending == conf->nr_queued+extra,
                                 conf->resync_lock,
                                 flush_pending_writes(conf));
         spin_unlock_irq(&conf->resync_lock);
@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                  * we wait for all outstanding requests to complete.
                  */
                 synchronize_sched();
-               raise_barrier(conf);
-               lower_barrier(conf);
+               freeze_array(conf, 0);
+               unfreeze_array(conf);
                 clear_bit(Unmerged, &rdev->flags);
         }
         md_integrity_add_rdev(rdev, mddev);
@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                          */
                         struct md_rdev *repl =
                                 conf->mirrors[conf->raid_disks + number].rdev;
-                       raise_barrier(conf);
+                       freeze_array(conf, 0);
                         clear_bit(Replacement, &repl->flags);
                         p->rdev = repl;
                         conf->mirrors[conf->raid_disks + number].rdev = NULL;
-                       lower_barrier(conf);
+                       unfreeze_array(conf);
                         clear_bit(WantReplacement, &rdev->flags);
                 } else
                         clear_bit(WantReplacement, &rdev->flags);
@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
          * frozen
          */
         if (mddev->ro == 0) {
-               freeze_array(conf);
+               freeze_array(conf, 1);
                 fix_read_error(conf, r1_bio->read_disk,
                                r1_bio->sector, r1_bio->sectors);
                 unfreeze_array(conf);
@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev)
                 return PTR_ERR(conf);
  
         if (mddev->queue)
-               blk_queue_max_write_same_sectors(mddev->queue,
-                                                mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, 0);
+
         rdev_for_each(rdev, mddev) {
                 if (!mddev->gendisk)
                         continue;
@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)
                 return -ENOMEM;
         }
  
-       raise_barrier(conf);
+       freeze_array(conf, 0);
  
         /* ok, everything is stopped */
         oldpool = conf->r1bio_pool;
@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)
         conf->raid_disks = mddev->raid_disks = raid_disks;
         mddev->delta_disks = 0;
  
-       lower_barrier(conf);
+       unfreeze_array(conf);
  
         set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         md_wakeup_thread(mddev->thread);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 59d4daa5f4c7a32c245ef954f24650fe75084117..6ddae2501b9ae0fb2eb7119a4bf5467d9b6a90b2 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
                 sector_t first_bad;
                 int bad_sectors;
  
-               set_bit(R10BIO_Uptodate, &r10_bio->state);
+               /*
+                * Do not set R10BIO_Uptodate if the current device is
+                * rebuilding or Faulty. This is because we cannot use
+                * such device for properly reading the data back (we could
+                * potentially use it, if the current write would have felt
+                * before rdev->recovery_offset, but for simplicity we don't
+                * check this here.
+                */
+               if (test_bit(In_sync, &rdev->flags) &&
+                   !test_bit(Faulty, &rdev->flags))
+                       set_bit(R10BIO_Uptodate, &r10_bio->state);
  
                 /* Maybe we can clear some bad blocks. */
                 if (is_badblock(rdev,
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-static void freeze_array(struct r10conf *conf)
+static void freeze_array(struct r10conf *conf, int extra)
  {
         /* stop syncio and normal IO and wait for everything to
          * go quiet.
          * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+1
+        * wait until nr_pending match nr_queued+extra
          * This is called in the context of one normal IO request
          * that has failed. Thus any sync request that might be pending
          * will be blocked by nr_pending, and we need to wait for
          * pending IO requests to complete or be queued for re-try.
-        * Thus the number queued (nr_queued) plus this request (1)
+        * Thus the number queued (nr_queued) plus this request (extra)
          * must match the number of pending IOs (nr_pending) before
          * we continue.
          */
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
         conf->barrier++;
         conf->nr_waiting++;
         wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+1,
+                               conf->nr_pending == conf->nr_queued+extra,
                                 conf->resync_lock,
                                 flush_pending_writes(conf));
  
@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                  * we wait for all outstanding requests to complete.
                  */
                 synchronize_sched();
-               raise_barrier(conf, 0);
-               lower_barrier(conf);
+               freeze_array(conf, 0);
+               unfreeze_array(conf);
                 clear_bit(Unmerged, &rdev->flags);
         }
         md_integrity_add_rdev(rdev, mddev);
@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
         r10_bio->devs[slot].bio = NULL;
  
         if (mddev->ro == 0) {
-               freeze_array(conf);
+               freeze_array(conf, 1);
                 fix_read_error(conf, mddev, r10_bio);
                 unfreeze_array(conf);
         } else
@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev)
         if (mddev->queue) {
                 blk_queue_max_discard_sectors(mddev->queue,
                                               mddev->chunk_sectors);
-               blk_queue_max_write_same_sectors(mddev->queue,
-                                                mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, 0);
                 blk_queue_io_min(mddev->queue, chunk_size);
                 if (conf->geo.raid_disks % conf->geo.near_copies)
                         blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 753f318c89841ead61f11e568a61337583933638..05e4a105b9c706bb91bf4490ef2ce5f724601477 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev)
                 if (mddev->major_version == 0 &&
                     mddev->minor_version > 90)
                         rdev->recovery_offset = reshape_offset;
-                       
+
                 if (rdev->recovery_offset < reshape_offset) {
                         /* We need to check old and new layout */
                         if (!only_parity(rdev->raid_disk,
@@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev)
                  */
                 mddev->queue->limits.discard_zeroes_data = 0;
  
+               blk_queue_max_write_same_sectors(mddev->queue, 0);
+
                 rdev_for_each(rdev, mddev) {
                         disk_stack_limits(mddev->gendisk, rdev->bdev,
                                           rdev->data_offset << 9);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 13 Jun 2013 17:13:29 +0000 (10:13 -0700)
drivers/md/md.c		patch \| blob \| history
drivers/md/raid1.c		patch \| blob \| history
drivers/md/raid10.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history