x86_64: fix incorrect comments

[mv-sheeva.git] / drivers / md / raid5.c
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 062df846fd6212ed64ed771870f567bde0d19fc3..3bbc6d647044c6b6d782427a1e3e35a146bc0751 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -948,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num)
         return 0;
  }
  
-#ifdef CONFIG_MD_RAID5_RESHAPE
  static int resize_stripes(raid5_conf_t *conf, int newsize)
  {
         /* Make all the stripes able to hold 'newsize' devices.
@@ -1073,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
         conf->pool_size = newsize;
         return err;
  }
-#endif
  
  static int drop_one_stripe(raid5_conf_t *conf)
  {
@@ -3639,10 +3637,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
  
         retry:
                 previous = 0;
+               disks = conf->raid_disks;
                 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-               if (likely(conf->reshape_progress == MaxSector))
-                       disks = conf->raid_disks;
-               else {
+               if (unlikely(conf->reshape_progress != MaxSector)) {
                         /* spinlock is needed as reshape_progress may be
                          * 64bit on a 32bit platform, and so it might be
                          * possible to see a half-updated value
@@ -3652,7 +3649,6 @@ static int make_request(struct request_queue *q, struct bio * bi)
                          * to check again.
                          */
                         spin_lock_irq(&conf->device_lock);
-                       disks = conf->raid_disks;
                         if (mddev->delta_disks < 0
                             ? logical_sector < conf->reshape_progress
                             : logical_sector >= conf->reshape_progress) {
@@ -3681,7 +3677,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
                 sh = get_active_stripe(conf, new_sector, previous,
                                        (bi->bi_rw&RWA_MASK));
                 if (sh) {
-                       if (unlikely(conf->reshape_progress != MaxSector)) {
+                       if (unlikely(previous)) {
                                 /* expansion might have moved on while waiting for a
                                  * stripe, so we must do the range check again.
                                  * Expansion could still move past after this
@@ -3692,10 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
                                  */
                                 int must_retry = 0;
                                 spin_lock_irq(&conf->device_lock);
-                               if ((mddev->delta_disks < 0
-                                    ? logical_sector >= conf->reshape_progress
-                                    : logical_sector < conf->reshape_progress)
-                                   && previous)
+                               if (mddev->delta_disks < 0
+                                   ? logical_sector >= conf->reshape_progress
+                                   : logical_sector < conf->reshape_progress)
                                         /* mismatch, need to try again */
                                         must_retry = 1;
                                 spin_unlock_irq(&conf->device_lock);
@@ -3771,7 +3766,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
         int new_data_disks = conf->raid_disks - conf->max_degraded;
         int i;
         int dd_idx;
-       sector_t writepos, safepos, gap;
+       sector_t writepos, readpos, safepos;
         sector_t stripe_addr;
         int reshape_sectors;
         struct list_head stripes;
@@ -3811,26 +3806,46 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
          */
         writepos = conf->reshape_progress;
         sector_div(writepos, new_data_disks);
+       readpos = conf->reshape_progress;
+       sector_div(readpos, data_disks);
         safepos = conf->reshape_safe;
         sector_div(safepos, data_disks);
         if (mddev->delta_disks < 0) {
                 writepos -= reshape_sectors;
+               readpos += reshape_sectors;
                 safepos += reshape_sectors;
-               gap = conf->reshape_safe - conf->reshape_progress;
         } else {
                 writepos += reshape_sectors;
+               readpos -= reshape_sectors;
                 safepos -= reshape_sectors;
-               gap = conf->reshape_progress - conf->reshape_safe;
         }
  
+       /* 'writepos' is the most advanced device address we might write.
+        * 'readpos' is the least advanced device address we might read.
+        * 'safepos' is the least address recorded in the metadata as having
+        *     been reshaped.
+        * If 'readpos' is behind 'writepos', then there is no way that we can
+        * ensure safety in the face of a crash - that must be done by userspace
+        * making a backup of the data.  So in that case there is no particular
+        * rush to update metadata.
+        * Otherwise if 'safepos' is behind 'writepos', then we really need to
+        * update the metadata to advance 'safepos' to match 'readpos' so that
+        * we can be safe in the event of a crash.
+        * So we insist on updating metadata if safepos is behind writepos and
+        * readpos is beyond writepos.
+        * In any case, update the metadata every 10 seconds.
+        * Maybe that number should be configurable, but I'm not sure it is
+        * worth it.... maybe it could be a multiple of safemode_delay???
+        */
         if ((mddev->delta_disks < 0
-            ? writepos < safepos
-            : writepos > safepos) ||
-           gap > (new_data_disks)*3000*2 /*3Meg*/) {
+            ? (safepos > writepos && readpos < writepos)
+            : (safepos < writepos && readpos > writepos)) ||
+           time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                 /* Cannot proceed until we've updated the superblock... */
                 wait_event(conf->wait_for_overlap,
                            atomic_read(&conf->reshape_stripes)==0);
                 mddev->reshape_position = conf->reshape_progress;
+               conf->reshape_checkpoint = jiffies;
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
                 wait_event(mddev->sb_wait, mddev->flags == 0 ||
@@ -3928,6 +3943,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                 wait_event(conf->wait_for_overlap,
                            atomic_read(&conf->reshape_stripes) == 0);
                 mddev->reshape_position = conf->reshape_progress;
+               conf->reshape_checkpoint = jiffies;
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
                 wait_event(mddev->sb_wait,
@@ -4822,7 +4838,6 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
         return 0;
  }
  
-#ifdef CONFIG_MD_RAID5_RESHAPE
  static int raid5_check_reshape(mddev_t *mddev)
  {
         raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4963,11 +4978,11 @@ static int raid5_start_reshape(mddev_t *mddev)
                 spin_unlock_irq(&conf->device_lock);
                 return -EAGAIN;
         }
+       conf->reshape_checkpoint = jiffies;
         md_wakeup_thread(mddev->sync_thread);
         md_new_event(mddev);
         return 0;
  }
-#endif
  
  /* This is called from the reshape thread and should make any
   * changes needed in 'conf'
@@ -4981,6 +4996,7 @@ static void end_reshape(raid5_conf_t *conf)
                 conf->previous_raid_disks = conf->raid_disks;
                 conf->reshape_progress = MaxSector;
                 spin_unlock_irq(&conf->device_lock);
+               wake_up(&conf->wait_for_overlap);
  
                 /* read-ahead size must cover two whole stripes, which is
                  * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
@@ -5289,11 +5305,9 @@ static struct mdk_personality raid6_personality =
         .sync_request   = sync_request,
         .resize         = raid5_resize,
         .size           = raid5_size,
-#ifdef CONFIG_MD_RAID5_RESHAPE
         .check_reshape  = raid5_check_reshape,
         .start_reshape  = raid5_start_reshape,
         .finish_reshape = raid5_finish_reshape,
-#endif
         .quiesce        = raid5_quiesce,
         .takeover       = raid6_takeover,
         .reconfig       = raid6_reconfig,
@@ -5314,11 +5328,9 @@ static struct mdk_personality raid5_personality =
         .sync_request   = sync_request,
         .resize         = raid5_resize,
         .size           = raid5_size,
-#ifdef CONFIG_MD_RAID5_RESHAPE
         .check_reshape  = raid5_check_reshape,
         .start_reshape  = raid5_start_reshape,
         .finish_reshape = raid5_finish_reshape,
-#endif
         .quiesce        = raid5_quiesce,
         .takeover       = raid5_takeover,
         .reconfig       = raid5_reconfig,
@@ -5340,11 +5352,9 @@ static struct mdk_personality raid4_personality =
         .sync_request   = sync_request,
         .resize         = raid5_resize,
         .size           = raid5_size,
-#ifdef CONFIG_MD_RAID5_RESHAPE
         .check_reshape  = raid5_check_reshape,
         .start_reshape  = raid5_start_reshape,
         .finish_reshape = raid5_finish_reshape,
-#endif
         .quiesce        = raid5_quiesce,
  };