]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/md/dm-snap.c
Merge commit 'gcl/merge' into merge
[karo-tx-linux.git] / drivers / md / dm-snap.c
index bc52776c69ccaddb9151547e2b886c2031953152..ee8eb283650d6ccbe3a5a6e3d6a39ba347d6c35c 100644 (file)
@@ -71,7 +71,10 @@ struct dm_snapshot {
        /* List of snapshots per Origin */
        struct list_head list;
 
-       /* You can't use a snapshot if this is 0 (e.g. if full) */
+       /*
+        * You can't use a snapshot if this is 0 (e.g. if full).
+        * A snapshot-merge target never clears this.
+        */
        int valid;
 
        /* Origin writes don't trigger exceptions until this is set */
@@ -107,6 +110,21 @@ struct dm_snapshot {
        spinlock_t tracked_chunk_lock;
        struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
 
+       /*
+        * The merge operation failed if this flag is set.
+        * Failure modes are handled as follows:
+        * - I/O error reading the header
+        *      => don't load the target; abort.
+        * - Header does not have "valid" flag set
+        *      => use the origin; forget about the snapshot.
+        * - I/O error when reading exceptions
+        *      => don't load the target; abort.
+        *         (We can't use the intermediate origin state.)
+        * - I/O error while merging
+        *      => stop merging; set merge_failed; process I/O normally.
+        */
+       int merge_failed;
+
        /* Wait for events based on state_bits */
        unsigned long state_bits;
 
@@ -270,6 +288,10 @@ struct origin {
 static struct list_head *_origins;
 static struct rw_semaphore _origins_lock;
 
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
+static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
+static uint64_t _pending_exceptions_done_count;
+
 static int init_origin_hash(void)
 {
        int i;
@@ -847,14 +869,39 @@ out:
        return r;
 }
 
+static int origin_write_extent(struct dm_snapshot *merging_snap,
+                              sector_t sector, unsigned chunk_size);
+
 static void merge_callback(int read_err, unsigned long write_err,
                           void *context);
 
+static uint64_t read_pending_exceptions_done_count(void)
+{
+       uint64_t pending_exceptions_done;
+
+       spin_lock(&_pending_exceptions_done_spinlock);
+       pending_exceptions_done = _pending_exceptions_done_count;
+       spin_unlock(&_pending_exceptions_done_spinlock);
+
+       return pending_exceptions_done;
+}
+
+static void increment_pending_exceptions_done_count(void)
+{
+       spin_lock(&_pending_exceptions_done_spinlock);
+       _pending_exceptions_done_count++;
+       spin_unlock(&_pending_exceptions_done_spinlock);
+
+       wake_up_all(&_pending_exceptions_done);
+}
+
 static void snapshot_merge_next_chunks(struct dm_snapshot *s)
 {
-       int r;
+       int i, linear_chunks;
        chunk_t old_chunk, new_chunk;
        struct dm_io_region src, dest;
+       sector_t io_size;
+       uint64_t previous_count;
 
        BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
        if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
@@ -868,31 +915,63 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
                goto shut;
        }
 
-       r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
-       if (r <= 0) {
-               if (r < 0)
+       linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
+                                                     &new_chunk);
+       if (linear_chunks <= 0) {
+               if (linear_chunks < 0) {
                        DMERR("Read error in exception store: "
                              "shutting down merge");
+                       down_write(&s->lock);
+                       s->merge_failed = 1;
+                       up_write(&s->lock);
+               }
                goto shut;
        }
 
-       /* TODO: use larger I/O size once we verify that kcopyd handles it */
+       /* Adjust old_chunk and new_chunk to reflect start of linear region */
+       old_chunk = old_chunk + 1 - linear_chunks;
+       new_chunk = new_chunk + 1 - linear_chunks;
+
+       /*
+        * Use one (potentially large) I/O to copy all 'linear_chunks'
+        * from the exception store to the origin
+        */
+       io_size = linear_chunks * s->store->chunk_size;
 
        dest.bdev = s->origin->bdev;
        dest.sector = chunk_to_sector(s->store, old_chunk);
-       dest.count = min((sector_t)s->store->chunk_size,
-                        get_dev_size(dest.bdev) - dest.sector);
+       dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
 
        src.bdev = s->cow->bdev;
        src.sector = chunk_to_sector(s->store, new_chunk);
        src.count = dest.count;
 
+       /*
+        * Reallocate any exceptions needed in other snapshots then
+        * wait for the pending exceptions to complete.
+        * Each time any pending exception (globally on the system)
+        * completes we are woken and repeat the process to find out
+        * if we can proceed.  While this may not seem a particularly
+        * efficient algorithm, it is not expected to have any
+        * significant impact on performance.
+        */
+       previous_count = read_pending_exceptions_done_count();
+       while (origin_write_extent(s, dest.sector, io_size)) {
+               wait_event(_pending_exceptions_done,
+                          (read_pending_exceptions_done_count() !=
+                           previous_count));
+               /* Retry after the wait, until all exceptions are done. */
+               previous_count = read_pending_exceptions_done_count();
+       }
+
        down_write(&s->lock);
        s->first_merging_chunk = old_chunk;
-       s->num_merging_chunks = 1;
+       s->num_merging_chunks = linear_chunks;
        up_write(&s->lock);
 
-       __check_for_conflicting_io(s, old_chunk);
+       /* Wait until writes to all 'linear_chunks' drain */
+       for (i = 0; i < linear_chunks; i++)
+               __check_for_conflicting_io(s, old_chunk + i);
 
        dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
        return;
@@ -931,6 +1010,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
 
 shut:
        down_write(&s->lock);
+       s->merge_failed = 1;
        b = __release_queued_bios_after_merge(s);
        up_write(&s->lock);
        error_bios(b);
@@ -1033,6 +1113,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        INIT_LIST_HEAD(&s->list);
        spin_lock_init(&s->pe_lock);
        s->state_bits = 0;
+       s->merge_failed = 0;
        s->first_merging_chunk = 0;
        s->num_merging_chunks = 0;
        bio_list_init(&s->bios_queued_during_merge);
@@ -1372,6 +1453,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
        origin_bios = bio_list_get(&pe->origin_bios);
        free_pending_exception(pe);
 
+       increment_pending_exceptions_done_count();
+
        up_write(&s->lock);
 
        /* Submit any pending write bios */
@@ -1616,11 +1699,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
        down_write(&s->lock);
 
-       /* Full snapshots are not usable */
-       if (!s->valid) {
-               r = -EIO;
-               goto out_unlock;
-       }
+       /* Full merging snapshots are redirected to the origin */
+       if (!s->valid)
+               goto redirect_to_origin;
 
        /* If the block is already remapped - use that */
        e = dm_lookup_exception(&s->complete, chunk);
@@ -1643,6 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
                goto out_unlock;
        }
 
+redirect_to_origin:
        bio->bi_bdev = s->origin->bdev;
 
        if (bio_rw(bio) == WRITE) {
@@ -1776,6 +1858,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 
                if (!snap->valid)
                        DMEMIT("Invalid");
+               else if (snap->merge_failed)
+                       DMEMIT("Merge failed");
                else {
                        if (snap->store->type->usage) {
                                sector_t total_sectors, sectors_allocated,
@@ -1962,6 +2046,41 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
        return r;
 }
 
+/*
+ * Trigger exceptions in all non-merging snapshots.
+ *
+ * The chunk size of the merging snapshot may be larger than the chunk
+ * size of some other snapshot so we may need to reallocate multiple
+ * chunks in other snapshots.
+ *
+ * We scan all the overlapping exceptions in the other snapshots.
+ * Returns 1 if anything was reallocated and must be waited for,
+ * otherwise returns 0.
+ *
+ * size must be a multiple of merging_snap's chunk_size.
+ */
+static int origin_write_extent(struct dm_snapshot *merging_snap,
+                              sector_t sector, unsigned size)
+{
+       int must_wait = 0;
+       sector_t n;
+       struct origin *o;
+
+       /*
+        * The origin's __minimum_chunk_size() got stored in split_io
+        * by snapshot_merge_resume().
+        */
+       down_read(&_origins_lock);
+       o = __lookup_origin(merging_snap->origin->bdev);
+       for (n = 0; n < size; n += merging_snap->ti->split_io)
+               if (__origin_write(&o->snapshots, sector + n, NULL) ==
+                   DM_MAPIO_SUBMITTED)
+                       must_wait = 1;
+       up_read(&_origins_lock);
+
+       return must_wait;
+}
+
 /*
  * Origin: maps a linear range of a device, with hooks for snapshotting.
  */