]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/md/raid10.c
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[karo-tx-linux.git] / drivers / md / raid10.c
index 6ddae2501b9ae0fb2eb7119a4bf5467d9b6a90b2..cd066b63bdafea61ae1175428f2c261575cfd4d1 100644 (file)
@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
 
 static void allow_barrier(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
-static int enough(struct r10conf *conf, int ignore);
+static int _enough(struct r10conf *conf, int previous, int ignore);
 static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                                int *skipped);
 static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
                 * than fail the last device.  Here we redefine
                 * "uptodate" to mean "Don't want to retry"
                 */
-               unsigned long flags;
-               spin_lock_irqsave(&conf->device_lock, flags);
-               if (!enough(conf, rdev->raid_disk))
+               if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
+                            rdev->raid_disk))
                        uptodate = 1;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
        }
        if (uptodate) {
                raid_end_bio_io(r10_bio);
@@ -1632,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev)
  * Don't consider the device numbered 'ignore'
  * as we might be about to remove it.
  */
-static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
+static int _enough(struct r10conf *conf, int previous, int ignore)
 {
        int first = 0;
+       int has_enough = 0;
+       int disks, ncopies;
+       if (previous) {
+               disks = conf->prev.raid_disks;
+               ncopies = conf->prev.near_copies;
+       } else {
+               disks = conf->geo.raid_disks;
+               ncopies = conf->geo.near_copies;
+       }
 
+       rcu_read_lock();
        do {
                int n = conf->copies;
                int cnt = 0;
                int this = first;
                while (n--) {
-                       if (conf->mirrors[this].rdev &&
-                           this != ignore)
+                       struct md_rdev *rdev;
+                       if (this != ignore &&
+                           (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
+                           test_bit(In_sync, &rdev->flags))
                                cnt++;
-                       this = (this+1) % geo->raid_disks;
+                       this = (this+1) % disks;
                }
                if (cnt == 0)
-                       return 0;
-               first = (first + geo->near_copies) % geo->raid_disks;
+                       goto out;
+               first = (first + ncopies) % disks;
        } while (first != 0);
-       return 1;
+       has_enough = 1;
+out:
+       rcu_read_unlock();
+       return has_enough;
 }
 
 static int enough(struct r10conf *conf, int ignore)
 {
-       return _enough(conf, &conf->geo, ignore) &&
-               _enough(conf, &conf->prev, ignore);
+       /* when calling 'enough', both 'prev' and 'geo' must
+        * be stable.
+        * This is ensured if ->reconfig_mutex or ->device_lock
+        * is held.
+        */
+       return _enough(conf, 0, ignore) &&
+               _enough(conf, 1, ignore);
 }
 
 static void error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
+       unsigned long flags;
 
        /*
         * If it is not operational, then we have already marked it as dead
@@ -1670,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
         * next level up know.
         * else mark the drive as failed
         */
+       spin_lock_irqsave(&conf->device_lock, flags);
        if (test_bit(In_sync, &rdev->flags)
-           && !enough(conf, rdev->raid_disk))
+           && !enough(conf, rdev->raid_disk)) {
                /*
                 * Don't fail the drive, just return an IO error.
                 */
+               spin_unlock_irqrestore(&conf->device_lock, flags);
                return;
+       }
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
-               unsigned long flags;
-               spin_lock_irqsave(&conf->device_lock, flags);
                mddev->degraded++;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               /*
+                       /*
                 * if recovery is running, make sure it aborts.
                 */
                set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -1689,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       spin_unlock_irqrestore(&conf->device_lock, flags);
        printk(KERN_ALERT
               "md/raid10:%s: Disk failure on %s, disabling device.\n"
               "md/raid10:%s: Operation continuing on %d devices.\n",
@@ -1791,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                 * very different from resync
                 */
                return -EBUSY;
-       if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1))
+       if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
                return -EINVAL;
 
        if (rdev->raid_disk >= 0)
@@ -1819,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                        set_bit(Replacement, &rdev->flags);
                        rdev->raid_disk = mirror;
                        err = 0;
-                       disk_stack_limits(mddev->gendisk, rdev->bdev,
-                                         rdev->data_offset << 9);
+                       if (mddev->gendisk)
+                               disk_stack_limits(mddev->gendisk, rdev->bdev,
+                                                 rdev->data_offset << 9);
                        conf->fullsync = 1;
                        rcu_assign_pointer(p->replacement, rdev);
                        break;
                }
 
-               disk_stack_limits(mddev->gendisk, rdev->bdev,
-                                 rdev->data_offset << 9);
+               if (mddev->gendisk)
+                       disk_stack_limits(mddev->gendisk, rdev->bdev,
+                                         rdev->data_offset << 9);
 
                p->head_position = 0;
                p->recovery_disabled = mddev->recovery_disabled - 1;
@@ -2909,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
         */
        if (mddev->bitmap == NULL &&
            mddev->recovery_cp == MaxSector &&
+           mddev->reshape_position == MaxSector &&
+           !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+           !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            conf->fullsync == 0) {
                *skipped = 1;
-               max_sector = mddev->dev_sectors;
-               if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
-                   test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-                       max_sector = mddev->resync_max_sectors;
-               return max_sector - sector_nr;
+               return mddev->dev_sectors - sector_nr;
        }
 
  skipped:
@@ -3532,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 
        /* FIXME calc properly */
        conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
-                                                           max(0,mddev->delta_disks)),
+                                                           max(0,-mddev->delta_disks)),
                                GFP_KERNEL);
        if (!conf->mirrors)
                goto out;
@@ -3691,7 +3712,7 @@ static int run(struct mddev *mddev)
                    conf->geo.far_offset == 0)
                        goto out_free_conf;
                if (conf->prev.far_copies != 1 &&
-                   conf->geo.far_offset == 0)
+                   conf->prev.far_offset == 0)
                        goto out_free_conf;
        }