]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Aug 2010 17:16:46 +0000 (10:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Aug 2010 17:16:46 +0000 (10:16 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (33 commits)
  dm mpath: support discard
  dm stripe: support discards
  dm: split discard requests on target boundaries
  dm stripe: optimize sector division
  dm stripe: move sector translation to a function
  dm: error return error for discards
  dm delay: support discard
  dm: zero silently drop discards
  dm: use dm_target_offset macro
  dm: factor out max_io_len_target_boundary
  dm: use common __issue_target_request for flush and discard support
  dm: linear support discard
  dm crypt: simplify crypt_ctr
  dm crypt: simplify crypt_config destruction logic
  dm: allow autoloading of dm mod
  dm: rename map_info flush_request to target_request_nr
  dm ioctl: refactor dm_table_complete
  dm snapshot: implement merge
  dm: do not initialise full request queue when bio based
  dm ioctl: make bio or request based device type immutable
  ...

20 files changed:
Documentation/devices.txt
drivers/md/dm-crypt.c
drivers/md/dm-delay.c
drivers/md/dm-exception-store.c
drivers/md/dm-exception-store.h
drivers/md/dm-ioctl.c
drivers/md/dm-linear.c
drivers/md/dm-mpath.c
drivers/md/dm-raid1.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-stripe.c
drivers/md/dm-table.c
drivers/md/dm-target.c
drivers/md/dm-zero.c
drivers/md/dm.c
drivers/md/dm.h
include/linux/device-mapper.h
include/linux/dm-ioctl.h
include/linux/miscdevice.h

index f2da781705b20ce3b4bf0b2f60daff546beec177..d0d1df6cb5dea8687af070c246898007d018614f 100644 (file)
@@ -445,6 +445,7 @@ Your cooperation is appreciated.
                233 = /dev/kmview       View-OS A process with a view
                234 = /dev/btrfs-control        Btrfs control device
                235 = /dev/autofs       Autofs control device
+               236 = /dev/mapper/control       Device-Mapper control device
                240-254                 Reserved for local use
                255                     Reserved for MISC_DYNAMIC_MINOR
 
index 3bdbb6115702500498548c1936c9fe19510aa414..368e8e98f7050e0fa5ddd7fa2220379681fdb04d 100644 (file)
@@ -107,11 +107,10 @@ struct crypt_config {
        struct workqueue_struct *io_queue;
        struct workqueue_struct *crypt_queue;
 
-       /*
-        * crypto related data
-        */
+       char *cipher;
+       char *cipher_mode;
+
        struct crypt_iv_operations *iv_gen_ops;
-       char *iv_mode;
        union {
                struct iv_essiv_private essiv;
                struct iv_benbi_private benbi;
@@ -135,8 +134,6 @@ struct crypt_config {
        unsigned int dmreq_start;
        struct ablkcipher_request *req;
 
-       char cipher[CRYPTO_MAX_ALG_NAME];
-       char chainmode[CRYPTO_MAX_ALG_NAME];
        struct crypto_ablkcipher *tfm;
        unsigned long flags;
        unsigned int key_size;
@@ -999,82 +996,135 @@ static int crypt_wipe_key(struct crypt_config *cc)
        return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
 }
 
-/*
- * Construct an encryption mapping:
- * <cipher> <key> <iv_offset> <dev_path> <start>
- */
-static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+static void crypt_dtr(struct dm_target *ti)
 {
-       struct crypt_config *cc;
-       struct crypto_ablkcipher *tfm;
-       char *tmp;
-       char *cipher;
-       char *chainmode;
-       char *ivmode;
-       char *ivopts;
-       unsigned int key_size;
-       unsigned long long tmpll;
+       struct crypt_config *cc = ti->private;
 
-       if (argc != 5) {
-               ti->error = "Not enough arguments";
+       ti->private = NULL;
+
+       if (!cc)
+               return;
+
+       if (cc->io_queue)
+               destroy_workqueue(cc->io_queue);
+       if (cc->crypt_queue)
+               destroy_workqueue(cc->crypt_queue);
+
+       if (cc->bs)
+               bioset_free(cc->bs);
+
+       if (cc->page_pool)
+               mempool_destroy(cc->page_pool);
+       if (cc->req_pool)
+               mempool_destroy(cc->req_pool);
+       if (cc->io_pool)
+               mempool_destroy(cc->io_pool);
+
+       if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
+               cc->iv_gen_ops->dtr(cc);
+
+       if (cc->tfm && !IS_ERR(cc->tfm))
+               crypto_free_ablkcipher(cc->tfm);
+
+       if (cc->dev)
+               dm_put_device(ti, cc->dev);
+
+       kzfree(cc->cipher);
+       kzfree(cc->cipher_mode);
+
+       /* Must zero key material before freeing */
+       kzfree(cc);
+}
+
+static int crypt_ctr_cipher(struct dm_target *ti,
+                           char *cipher_in, char *key)
+{
+       struct crypt_config *cc = ti->private;
+       char *tmp, *cipher, *chainmode, *ivmode, *ivopts;
+       char *cipher_api = NULL;
+       int ret = -EINVAL;
+
+       /* Convert to crypto api definition? */
+       if (strchr(cipher_in, '(')) {
+               ti->error = "Bad cipher specification";
                return -EINVAL;
        }
 
-       tmp = argv[0];
+       /*
+        * Legacy dm-crypt cipher specification
+        * cipher-mode-iv:ivopts
+        */
+       tmp = cipher_in;
        cipher = strsep(&tmp, "-");
+
+       cc->cipher = kstrdup(cipher, GFP_KERNEL);
+       if (!cc->cipher)
+               goto bad_mem;
+
+       if (tmp) {
+               cc->cipher_mode = kstrdup(tmp, GFP_KERNEL);
+               if (!cc->cipher_mode)
+                       goto bad_mem;
+       }
+
        chainmode = strsep(&tmp, "-");
        ivopts = strsep(&tmp, "-");
        ivmode = strsep(&ivopts, ":");
 
        if (tmp)
-               DMWARN("Unexpected additional cipher options");
-
-       key_size = strlen(argv[1]) >> 1;
-
-       cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
-       if (cc == NULL) {
-               ti->error =
-                       "Cannot allocate transparent encryption context";
-               return -ENOMEM;
-       }
+               DMWARN("Ignoring unexpected additional cipher options");
 
-       /* Compatibility mode for old dm-crypt cipher strings */
-       if (!chainmode || (strcmp(chainmode, "plain") == 0 && !ivmode)) {
+       /* Compatibility mode for old dm-crypt mappings */
+       if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) {
+               kfree(cc->cipher_mode);
+               cc->cipher_mode = kstrdup("cbc-plain", GFP_KERNEL);
                chainmode = "cbc";
                ivmode = "plain";
        }
 
        if (strcmp(chainmode, "ecb") && !ivmode) {
-               ti->error = "This chaining mode requires an IV mechanism";
-               goto bad_cipher;
+               ti->error = "IV mechanism required";
+               return -EINVAL;
        }
 
-       if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
-                    chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
-               ti->error = "Chain mode + cipher name is too long";
-               goto bad_cipher;
+       cipher_api = kmalloc(CRYPTO_MAX_ALG_NAME, GFP_KERNEL);
+       if (!cipher_api)
+               goto bad_mem;
+
+       ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
+                      "%s(%s)", chainmode, cipher);
+       if (ret < 0) {
+               kfree(cipher_api);
+               goto bad_mem;
        }
 
-       tfm = crypto_alloc_ablkcipher(cc->cipher, 0, 0);
-       if (IS_ERR(tfm)) {
+       /* Allocate cipher */
+       cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
+       if (IS_ERR(cc->tfm)) {
+               ret = PTR_ERR(cc->tfm);
                ti->error = "Error allocating crypto tfm";
-               goto bad_cipher;
+               goto bad;
        }
 
-       strcpy(cc->cipher, cipher);
-       strcpy(cc->chainmode, chainmode);
-       cc->tfm = tfm;
-
-       if (crypt_set_key(cc, argv[1]) < 0) {
+       /* Initialize and set key */
+       ret = crypt_set_key(cc, key);
+       if (ret < 0) {
                ti->error = "Error decoding and setting key";
-               goto bad_ivmode;
+               goto bad;
        }
 
-       /*
-        * Choose ivmode. Valid modes: "plain", "essiv:<esshash>", "benbi".
-        * See comments at iv code
-        */
+       /* Initialize IV */
+       cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm);
+       if (cc->iv_size)
+               /* at least a 64 bit sector number should fit in our buffer */
+               cc->iv_size = max(cc->iv_size,
+                                 (unsigned int)(sizeof(u64) / sizeof(u8)));
+       else if (ivmode) {
+               DMWARN("Selected cipher does not support IVs");
+               ivmode = NULL;
+       }
 
+       /* Choose ivmode, see comments at iv code. */
        if (ivmode == NULL)
                cc->iv_gen_ops = NULL;
        else if (strcmp(ivmode, "plain") == 0)
@@ -1088,159 +1138,138 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        else if (strcmp(ivmode, "null") == 0)
                cc->iv_gen_ops = &crypt_iv_null_ops;
        else {
+               ret = -EINVAL;
                ti->error = "Invalid IV mode";
-               goto bad_ivmode;
+               goto bad;
        }
 
-       if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
-           cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
-               goto bad_ivmode;
-
-       if (cc->iv_gen_ops && cc->iv_gen_ops->init &&
-           cc->iv_gen_ops->init(cc) < 0) {
-               ti->error = "Error initialising IV";
-               goto bad_slab_pool;
+       /* Allocate IV */
+       if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) {
+               ret = cc->iv_gen_ops->ctr(cc, ti, ivopts);
+               if (ret < 0) {
+                       ti->error = "Error creating IV";
+                       goto bad;
+               }
        }
 
-       cc->iv_size = crypto_ablkcipher_ivsize(tfm);
-       if (cc->iv_size)
-               /* at least a 64 bit sector number should fit in our buffer */
-               cc->iv_size = max(cc->iv_size,
-                                 (unsigned int)(sizeof(u64) / sizeof(u8)));
-       else {
-               if (cc->iv_gen_ops) {
-                       DMWARN("Selected cipher does not support IVs");
-                       if (cc->iv_gen_ops->dtr)
-                               cc->iv_gen_ops->dtr(cc);
-                       cc->iv_gen_ops = NULL;
+       /* Initialize IV (set keys for ESSIV etc) */
+       if (cc->iv_gen_ops && cc->iv_gen_ops->init) {
+               ret = cc->iv_gen_ops->init(cc);
+               if (ret < 0) {
+                       ti->error = "Error initialising IV";
+                       goto bad;
                }
        }
 
+       ret = 0;
+bad:
+       kfree(cipher_api);
+       return ret;
+
+bad_mem:
+       ti->error = "Cannot allocate cipher strings";
+       return -ENOMEM;
+}
+
+/*
+ * Construct an encryption mapping:
+ * <cipher> <key> <iv_offset> <dev_path> <start>
+ */
+static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+       struct crypt_config *cc;
+       unsigned int key_size;
+       unsigned long long tmpll;
+       int ret;
+
+       if (argc != 5) {
+               ti->error = "Not enough arguments";
+               return -EINVAL;
+       }
+
+       key_size = strlen(argv[1]) >> 1;
+
+       cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
+       if (!cc) {
+               ti->error = "Cannot allocate encryption context";
+               return -ENOMEM;
+       }
+
+       ti->private = cc;
+       ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
+       if (ret < 0)
+               goto bad;
+
+       ret = -ENOMEM;
        cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
        if (!cc->io_pool) {
                ti->error = "Cannot allocate crypt io mempool";
-               goto bad_slab_pool;
+               goto bad;
        }
 
        cc->dmreq_start = sizeof(struct ablkcipher_request);
-       cc->dmreq_start += crypto_ablkcipher_reqsize(tfm);
+       cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm);
        cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
-       cc->dmreq_start += crypto_ablkcipher_alignmask(tfm) &
+       cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) &
                           ~(crypto_tfm_ctx_alignment() - 1);
 
        cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
                        sizeof(struct dm_crypt_request) + cc->iv_size);
        if (!cc->req_pool) {
                ti->error = "Cannot allocate crypt request mempool";
-               goto bad_req_pool;
+               goto bad;
        }
        cc->req = NULL;
 
        cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
        if (!cc->page_pool) {
                ti->error = "Cannot allocate page mempool";
-               goto bad_page_pool;
+               goto bad;
        }
 
        cc->bs = bioset_create(MIN_IOS, 0);
        if (!cc->bs) {
                ti->error = "Cannot allocate crypt bioset";
-               goto bad_bs;
+               goto bad;
        }
 
+       ret = -EINVAL;
        if (sscanf(argv[2], "%llu", &tmpll) != 1) {
                ti->error = "Invalid iv_offset sector";
-               goto bad_device;
+               goto bad;
        }
        cc->iv_offset = tmpll;
 
-       if (sscanf(argv[4], "%llu", &tmpll) != 1) {
-               ti->error = "Invalid device sector";
-               goto bad_device;
-       }
-       cc->start = tmpll;
-
        if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
                ti->error = "Device lookup failed";
-               goto bad_device;
+               goto bad;
        }
 
-       if (ivmode && cc->iv_gen_ops) {
-               if (ivopts)
-                       *(ivopts - 1) = ':';
-               cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
-               if (!cc->iv_mode) {
-                       ti->error = "Error kmallocing iv_mode string";
-                       goto bad_ivmode_string;
-               }
-               strcpy(cc->iv_mode, ivmode);
-       } else
-               cc->iv_mode = NULL;
+       if (sscanf(argv[4], "%llu", &tmpll) != 1) {
+               ti->error = "Invalid device sector";
+               goto bad;
+       }
+       cc->start = tmpll;
 
+       ret = -ENOMEM;
        cc->io_queue = create_singlethread_workqueue("kcryptd_io");
        if (!cc->io_queue) {
                ti->error = "Couldn't create kcryptd io queue";
-               goto bad_io_queue;
+               goto bad;
        }
 
        cc->crypt_queue = create_singlethread_workqueue("kcryptd");
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
-               goto bad_crypt_queue;
+               goto bad;
        }
 
        ti->num_flush_requests = 1;
-       ti->private = cc;
        return 0;
 
-bad_crypt_queue:
-       destroy_workqueue(cc->io_queue);
-bad_io_queue:
-       kfree(cc->iv_mode);
-bad_ivmode_string:
-       dm_put_device(ti, cc->dev);
-bad_device:
-       bioset_free(cc->bs);
-bad_bs:
-       mempool_destroy(cc->page_pool);
-bad_page_pool:
-       mempool_destroy(cc->req_pool);
-bad_req_pool:
-       mempool_destroy(cc->io_pool);
-bad_slab_pool:
-       if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
-               cc->iv_gen_ops->dtr(cc);
-bad_ivmode:
-       crypto_free_ablkcipher(tfm);
-bad_cipher:
-       /* Must zero key material before freeing */
-       kzfree(cc);
-       return -EINVAL;
-}
-
-static void crypt_dtr(struct dm_target *ti)
-{
-       struct crypt_config *cc = (struct crypt_config *) ti->private;
-
-       destroy_workqueue(cc->io_queue);
-       destroy_workqueue(cc->crypt_queue);
-
-       if (cc->req)
-               mempool_free(cc->req, cc->req_pool);
-
-       bioset_free(cc->bs);
-       mempool_destroy(cc->page_pool);
-       mempool_destroy(cc->req_pool);
-       mempool_destroy(cc->io_pool);
-
-       kfree(cc->iv_mode);
-       if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
-               cc->iv_gen_ops->dtr(cc);
-       crypto_free_ablkcipher(cc->tfm);
-       dm_put_device(ti, cc->dev);
-
-       /* Must zero key material before freeing */
-       kzfree(cc);
+bad:
+       crypt_dtr(ti);
+       return ret;
 }
 
 static int crypt_map(struct dm_target *ti, struct bio *bio,
@@ -1255,7 +1284,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
                return DM_MAPIO_REMAPPED;
        }
 
-       io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
+       io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector));
 
        if (bio_data_dir(io->base_bio) == READ)
                kcryptd_queue_io(io);
@@ -1268,7 +1297,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
 static int crypt_status(struct dm_target *ti, status_type_t type,
                        char *result, unsigned int maxlen)
 {
-       struct crypt_config *cc = (struct crypt_config *) ti->private;
+       struct crypt_config *cc = ti->private;
        unsigned int sz = 0;
 
        switch (type) {
@@ -1277,11 +1306,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
                break;
 
        case STATUSTYPE_TABLE:
-               if (cc->iv_mode)
-                       DMEMIT("%s-%s-%s ", cc->cipher, cc->chainmode,
-                              cc->iv_mode);
+               if (cc->cipher_mode)
+                       DMEMIT("%s-%s ", cc->cipher, cc->cipher_mode);
                else
-                       DMEMIT("%s-%s ", cc->cipher, cc->chainmode);
+                       DMEMIT("%s ", cc->cipher);
 
                if (cc->key_size > 0) {
                        if ((maxlen - sz) < ((cc->key_size << 1) + 1))
@@ -1378,7 +1406,7 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
                return max_size;
 
        bvm->bi_bdev = cc->dev->bdev;
-       bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin;
+       bvm->bi_sector = cc->start + dm_target_offset(ti, bvm->bi_sector);
 
        return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
index 852052880d7a3b5574b3a718325678d201033506..baa11912cc94049bf966f9e62d3451fefd954971 100644 (file)
@@ -198,6 +198,7 @@ out:
        atomic_set(&dc->may_delay, 1);
 
        ti->num_flush_requests = 1;
+       ti->num_discard_requests = 1;
        ti->private = dc;
        return 0;
 
@@ -281,14 +282,13 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
                bio->bi_bdev = dc->dev_write->bdev;
                if (bio_sectors(bio))
                        bio->bi_sector = dc->start_write +
-                                        (bio->bi_sector - ti->begin);
+                                        dm_target_offset(ti, bio->bi_sector);
 
                return delay_bio(dc, dc->write_delay, bio);
        }
 
        bio->bi_bdev = dc->dev_read->bdev;
-       bio->bi_sector = dc->start_read +
-                        (bio->bi_sector - ti->begin);
+       bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
 
        return delay_bio(dc, dc->read_delay, bio);
 }
index 2b7907b6dd094611c8cc9f31fbead6d7b553e1d7..0bdb201c2c2af04ceea2905db5f56c10fe1091df 100644 (file)
@@ -173,7 +173,9 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
 
        /* Validate the chunk size against the device block size */
        if (chunk_size %
-           (bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9)) {
+           (bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9) ||
+           chunk_size %
+           (bdev_logical_block_size(dm_snap_origin(store->snap)->bdev) >> 9)) {
                *error = "Chunk size is not a multiple of device blocksize";
                return -EINVAL;
        }
index e8dfa06af3ba531b6cca02842effd91cc083eb1c..0b2536247cf55a3215223b8b0c72ff29a629b87a 100644 (file)
@@ -126,8 +126,9 @@ struct dm_exception_store {
 };
 
 /*
- * Obtain the cow device used by a given snapshot.
+ * Obtain the origin or cow device used by a given snapshot.
  */
+struct dm_dev *dm_snap_origin(struct dm_snapshot *snap);
 struct dm_dev *dm_snap_cow(struct dm_snapshot *snap);
 
 /*
index d7500e1c26f24dc878bff7bfde6c16d1ac872245..3e39193e5036e709665e08345ba242d83662c606 100644 (file)
@@ -249,55 +249,66 @@ static void __hash_remove(struct hash_cell *hc)
 
 static void dm_hash_remove_all(int keep_open_devices)
 {
-       int i, dev_skipped, dev_removed;
+       int i, dev_skipped;
        struct hash_cell *hc;
-       struct list_head *tmp, *n;
+       struct mapped_device *md;
+
+retry:
+       dev_skipped = 0;
 
        down_write(&_hash_lock);
 
-retry:
-       dev_skipped = dev_removed = 0;
        for (i = 0; i < NUM_BUCKETS; i++) {
-               list_for_each_safe (tmp, n, _name_buckets + i) {
-                       hc = list_entry(tmp, struct hash_cell, name_list);
+               list_for_each_entry(hc, _name_buckets + i, name_list) {
+                       md = hc->md;
+                       dm_get(md);
 
-                       if (keep_open_devices &&
-                           dm_lock_for_deletion(hc->md)) {
+                       if (keep_open_devices && dm_lock_for_deletion(md)) {
+                               dm_put(md);
                                dev_skipped++;
                                continue;
                        }
+
                        __hash_remove(hc);
-                       dev_removed = 1;
-               }
-       }
 
-       /*
-        * Some mapped devices may be using other mapped devices, so if any
-        * still exist, repeat until we make no further progress.
-        */
-       if (dev_skipped) {
-               if (dev_removed)
-                       goto retry;
+                       up_write(&_hash_lock);
 
-               DMWARN("remove_all left %d open device(s)", dev_skipped);
+                       dm_put(md);
+                       if (likely(keep_open_devices))
+                               dm_destroy(md);
+                       else
+                               dm_destroy_immediate(md);
+
+                       /*
+                        * Some mapped devices may be using other mapped
+                        * devices, so repeat until we make no further
+                        * progress.  If a new mapped device is created
+                        * here it will also get removed.
+                        */
+                       goto retry;
+               }
        }
 
        up_write(&_hash_lock);
+
+       if (dev_skipped)
+               DMWARN("remove_all left %d open device(s)", dev_skipped);
 }
 
-static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
-                         const char *new)
+static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
+                                           const char *new)
 {
        char *new_name, *old_name;
        struct hash_cell *hc;
        struct dm_table *table;
+       struct mapped_device *md;
 
        /*
         * duplicate new.
         */
        new_name = kstrdup(new, GFP_KERNEL);
        if (!new_name)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        down_write(&_hash_lock);
 
@@ -306,24 +317,24 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
         */
        hc = __get_name_cell(new);
        if (hc) {
-               DMWARN("asked to rename to an already existing name %s -> %s",
-                      old, new);
+               DMWARN("asked to rename to an already-existing name %s -> %s",
+                      param->name, new);
                dm_put(hc->md);
                up_write(&_hash_lock);
                kfree(new_name);
-               return -EBUSY;
+               return ERR_PTR(-EBUSY);
        }
 
        /*
         * Is there such a device as 'old' ?
         */
-       hc = __get_name_cell(old);
+       hc = __get_name_cell(param->name);
        if (!hc) {
-               DMWARN("asked to rename a non existent device %s -> %s",
-                      old, new);
+               DMWARN("asked to rename a non-existent device %s -> %s",
+                      param->name, new);
                up_write(&_hash_lock);
                kfree(new_name);
-               return -ENXIO;
+               return ERR_PTR(-ENXIO);
        }
 
        /*
@@ -345,13 +356,14 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
                dm_table_put(table);
        }
 
-       if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie))
-               *flags |= DM_UEVENT_GENERATED_FLAG;
+       if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
+               param->flags |= DM_UEVENT_GENERATED_FLAG;
 
-       dm_put(hc->md);
+       md = hc->md;
        up_write(&_hash_lock);
        kfree(old_name);
-       return 0;
+
+       return md;
 }
 
 /*-----------------------------------------------------------------
@@ -573,7 +585,7 @@ static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
  * Fills in a dm_ioctl structure, ready for sending back to
  * userland.
  */
-static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
+static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 {
        struct gendisk *disk = dm_disk(md);
        struct dm_table *table;
@@ -617,8 +629,6 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
                        dm_table_put(table);
                }
        }
-
-       return 0;
 }
 
 static int dev_create(struct dm_ioctl *param, size_t param_size)
@@ -640,15 +650,17 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
        r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
        if (r) {
                dm_put(md);
+               dm_destroy(md);
                return r;
        }
 
        param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
 
-       r = __dev_status(md, param);
+       __dev_status(md, param);
+
        dm_put(md);
 
-       return r;
+       return 0;
 }
 
 /*
@@ -742,6 +754,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
                param->flags |= DM_UEVENT_GENERATED_FLAG;
 
        dm_put(md);
+       dm_destroy(md);
        return 0;
 }
 
@@ -762,6 +775,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 {
        int r;
        char *new_name = (char *) param + param->data_start;
+       struct mapped_device *md;
 
        if (new_name < param->data ||
            invalid_str(new_name, (void *) param + param_size) ||
@@ -774,10 +788,14 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
        if (r)
                return r;
 
-       param->data_size = 0;
+       md = dm_hash_rename(param, new_name);
+       if (IS_ERR(md))
+               return PTR_ERR(md);
+
+       __dev_status(md, param);
+       dm_put(md);
 
-       return dm_hash_rename(param->event_nr, &param->flags, param->name,
-                             new_name);
+       return 0;
 }
 
 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -818,8 +836,6 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
        geometry.start = indata[3];
 
        r = dm_set_geometry(md, &geometry);
-       if (!r)
-               r = __dev_status(md, param);
 
        param->data_size = 0;
 
@@ -843,13 +859,17 @@ static int do_suspend(struct dm_ioctl *param)
        if (param->flags & DM_NOFLUSH_FLAG)
                suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
 
-       if (!dm_suspended_md(md))
+       if (!dm_suspended_md(md)) {
                r = dm_suspend(md, suspend_flags);
+               if (r)
+                       goto out;
+       }
 
-       if (!r)
-               r = __dev_status(md, param);
+       __dev_status(md, param);
 
+out:
        dm_put(md);
+
        return r;
 }
 
@@ -911,7 +931,7 @@ static int do_resume(struct dm_ioctl *param)
                dm_table_destroy(old_map);
 
        if (!r)
-               r = __dev_status(md, param);
+               __dev_status(md, param);
 
        dm_put(md);
        return r;
@@ -935,16 +955,16 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size)
  */
 static int dev_status(struct dm_ioctl *param, size_t param_size)
 {
-       int r;
        struct mapped_device *md;
 
        md = find_device(param);
        if (!md)
                return -ENXIO;
 
-       r = __dev_status(md, param);
+       __dev_status(md, param);
        dm_put(md);
-       return r;
+
+       return 0;
 }
 
 /*
@@ -1019,7 +1039,7 @@ static void retrieve_status(struct dm_table *table,
  */
 static int dev_wait(struct dm_ioctl *param, size_t param_size)
 {
-       int r;
+       int r = 0;
        struct mapped_device *md;
        struct dm_table *table;
 
@@ -1040,9 +1060,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
         * changed to trigger the event, so we may as well tell
         * him and save an ioctl.
         */
-       r = __dev_status(md, param);
-       if (r)
-               goto out;
+       __dev_status(md, param);
 
        table = dm_get_live_or_inactive_table(md, param);
        if (table) {
@@ -1050,8 +1068,9 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
                dm_table_put(table);
        }
 
- out:
+out:
        dm_put(md);
+
        return r;
 }
 
@@ -1112,28 +1131,9 @@ static int populate_table(struct dm_table *table,
                next = spec->next;
        }
 
-       r = dm_table_set_type(table);
-       if (r) {
-               DMWARN("unable to set table type");
-               return r;
-       }
-
        return dm_table_complete(table);
 }
 
-static int table_prealloc_integrity(struct dm_table *t,
-                                   struct mapped_device *md)
-{
-       struct list_head *devices = dm_table_get_devices(t);
-       struct dm_dev_internal *dd;
-
-       list_for_each_entry(dd, devices, list)
-               if (bdev_get_integrity(dd->dm_dev.bdev))
-                       return blk_integrity_register(dm_disk(md), NULL);
-
-       return 0;
-}
-
 static int table_load(struct dm_ioctl *param, size_t param_size)
 {
        int r;
@@ -1155,21 +1155,30 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
                goto out;
        }
 
-       r = table_prealloc_integrity(t, md);
-       if (r) {
-               DMERR("%s: could not register integrity profile.",
-                     dm_device_name(md));
+       /* Protect md->type and md->queue against concurrent table loads. */
+       dm_lock_md_type(md);
+       if (dm_get_md_type(md) == DM_TYPE_NONE)
+               /* Initial table load: acquire type of table. */
+               dm_set_md_type(md, dm_table_get_type(t));
+       else if (dm_get_md_type(md) != dm_table_get_type(t)) {
+               DMWARN("can't change device type after initial table load.");
                dm_table_destroy(t);
+               dm_unlock_md_type(md);
+               r = -EINVAL;
                goto out;
        }
 
-       r = dm_table_alloc_md_mempools(t);
+       /* setup md->queue to reflect md's type (may block) */
+       r = dm_setup_md_queue(md);
        if (r) {
-               DMWARN("unable to allocate mempools for this table");
+               DMWARN("unable to set up device queue for new table.");
                dm_table_destroy(t);
+               dm_unlock_md_type(md);
                goto out;
        }
+       dm_unlock_md_type(md);
 
+       /* stage inactive table */
        down_write(&_hash_lock);
        hc = dm_get_mdptr(md);
        if (!hc || hc->md != md) {
@@ -1186,7 +1195,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
        up_write(&_hash_lock);
 
        param->flags |= DM_INACTIVE_PRESENT_FLAG;
-       r = __dev_status(md, param);
+       __dev_status(md, param);
 
 out:
        dm_put(md);
@@ -1196,7 +1205,6 @@ out:
 
 static int table_clear(struct dm_ioctl *param, size_t param_size)
 {
-       int r;
        struct hash_cell *hc;
        struct mapped_device *md;
 
@@ -1216,11 +1224,12 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
 
        param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
 
-       r = __dev_status(hc->md, param);
+       __dev_status(hc->md, param);
        md = hc->md;
        up_write(&_hash_lock);
        dm_put(md);
-       return r;
+
+       return 0;
 }
 
 /*
@@ -1265,7 +1274,6 @@ static void retrieve_deps(struct dm_table *table,
 
 static int table_deps(struct dm_ioctl *param, size_t param_size)
 {
-       int r = 0;
        struct mapped_device *md;
        struct dm_table *table;
 
@@ -1273,9 +1281,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
        if (!md)
                return -ENXIO;
 
-       r = __dev_status(md, param);
-       if (r)
-               goto out;
+       __dev_status(md, param);
 
        table = dm_get_live_or_inactive_table(md, param);
        if (table) {
@@ -1283,9 +1289,9 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
                dm_table_put(table);
        }
 
- out:
        dm_put(md);
-       return r;
+
+       return 0;
 }
 
 /*
@@ -1294,7 +1300,6 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
  */
 static int table_status(struct dm_ioctl *param, size_t param_size)
 {
-       int r;
        struct mapped_device *md;
        struct dm_table *table;
 
@@ -1302,9 +1307,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
        if (!md)
                return -ENXIO;
 
-       r = __dev_status(md, param);
-       if (r)
-               goto out;
+       __dev_status(md, param);
 
        table = dm_get_live_or_inactive_table(md, param);
        if (table) {
@@ -1312,9 +1315,9 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
                dm_table_put(table);
        }
 
-out:
        dm_put(md);
-       return r;
+
+       return 0;
 }
 
 /*
@@ -1333,10 +1336,6 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
        if (!md)
                return -ENXIO;
 
-       r = __dev_status(md, param);
-       if (r)
-               goto out;
-
        if (tmsg < (struct dm_target_msg *) param->data ||
            invalid_str(tmsg->message, (void *) param + param_size)) {
                DMWARN("Invalid target message parameters.");
@@ -1593,18 +1592,22 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
 #endif
 
 static const struct file_operations _ctl_fops = {
+       .open = nonseekable_open,
        .unlocked_ioctl  = dm_ctl_ioctl,
        .compat_ioctl = dm_compat_ctl_ioctl,
        .owner   = THIS_MODULE,
 };
 
 static struct miscdevice _dm_misc = {
-       .minor          = MISC_DYNAMIC_MINOR,
+       .minor          = MAPPER_CTRL_MINOR,
        .name           = DM_NAME,
-       .nodename       = "mapper/control",
+       .nodename       = DM_DIR "/" DM_CONTROL_NODE,
        .fops           = &_ctl_fops
 };
 
+MODULE_ALIAS_MISCDEV(MAPPER_CTRL_MINOR);
+MODULE_ALIAS("devname:" DM_DIR "/" DM_CONTROL_NODE);
+
 /*
  * Create misc character device and link to DM_DIR/control.
  */
index 9200dbf2391a7934c11314fe3e91ce90275852f4..3921e3bb43c15a107c90e4769fa682b1041d9a7a 100644 (file)
@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        ti->num_flush_requests = 1;
+       ti->num_discard_requests = 1;
        ti->private = lc;
        return 0;
 
@@ -73,7 +74,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
 {
        struct linear_c *lc = ti->private;
 
-       return lc->start + (bi_sector - ti->begin);
+       return lc->start + dm_target_offset(ti, bi_sector);
 }
 
 static void linear_map_bio(struct dm_target *ti, struct bio *bio)
index 826bce7343b3697e5abdfd4c62e760250f6ecf3a..487ecda90ad48f769643d03cf862cdff5ee8c76e 100644 (file)
@@ -706,6 +706,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
 
                if (as->argc < nr_params) {
                        ti->error = "not enough path parameters";
+                       r = -EINVAL;
                        goto bad;
                }
 
@@ -892,6 +893,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
        }
 
        ti->num_flush_requests = 1;
+       ti->num_discard_requests = 1;
 
        return 0;
 
@@ -1271,6 +1273,15 @@ static int do_end_io(struct multipath *m, struct request *clone,
        if (error == -EOPNOTSUPP)
                return error;
 
+       if (clone->cmd_flags & REQ_DISCARD)
+               /*
+                * Pass all discard request failures up.
+                * FIXME: only fail_path if the discard failed due to a
+                * transport problem.  This requires precise understanding
+                * of the underlying failure (e.g. the SCSI sense).
+                */
+               return error;
+
        if (mpio->pgpath)
                fail_path(mpio->pgpath);
 
index 74136262d6542cbd599519645b6aaac3c6f1db74..7c081bcbc3cf31f141ea1e7c1c7bdd03f7742582 100644 (file)
@@ -445,7 +445,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio)
 {
        if (unlikely(!bio->bi_size))
                return 0;
-       return m->offset + (bio->bi_sector - m->ms->ti->begin);
+       return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
 }
 
 static void map_bio(struct mirror *m, struct bio *bio)
index c097d8a4823d65f7e3408c9f116c235b19cec735..cc2bdb83f9ad685c7e87211f2e84b0b3a7aed871 100644 (file)
@@ -266,7 +266,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
  */
 static chunk_t area_location(struct pstore *ps, chunk_t area)
 {
-       return 1 + ((ps->exceptions_per_area + 1) * area);
+       return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
 }
 
 /*
@@ -780,8 +780,8 @@ static int persistent_commit_merge(struct dm_exception_store *store,
         * ps->current_area does not get reduced by prepare_merge() until
         * after commit_merge() has removed the nr_merged previous exceptions.
         */
-       ps->next_free = (area_location(ps, ps->current_area) - 1) +
-                       (ps->current_committed + 1) + NUM_SNAPSHOT_HDR_CHUNKS;
+       ps->next_free = area_location(ps, ps->current_area) +
+                       ps->current_committed + 1;
 
        return 0;
 }
index 54853773510c41216c263c5096164559df7782b8..5974d3094d979297cde5267b0cbaa6abbd0481cd 100644 (file)
@@ -148,6 +148,12 @@ struct dm_snapshot {
 #define RUNNING_MERGE          0
 #define SHUTDOWN_MERGE         1
 
+struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
+{
+       return s->origin;
+}
+EXPORT_SYMBOL(dm_snap_origin);
+
 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
 {
        return s->cow;
@@ -1065,10 +1071,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                origin_mode = FMODE_WRITE;
        }
 
-       origin_path = argv[0];
-       argv++;
-       argc--;
-
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s) {
                ti->error = "Cannot allocate snapshot context private "
@@ -1077,6 +1079,16 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad;
        }
 
+       origin_path = argv[0];
+       argv++;
+       argc--;
+
+       r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
+       if (r) {
+               ti->error = "Cannot get origin device";
+               goto bad_origin;
+       }
+
        cow_path = argv[0];
        argv++;
        argc--;
@@ -1097,12 +1109,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        argv += args_used;
        argc -= args_used;
 
-       r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
-       if (r) {
-               ti->error = "Cannot get origin device";
-               goto bad_origin;
-       }
-
        s->ti = ti;
        s->valid = 1;
        s->active = 0;
@@ -1212,15 +1218,15 @@ bad_kcopyd:
        dm_exception_table_exit(&s->complete, exception_cache);
 
 bad_hash_tables:
-       dm_put_device(ti, s->origin);
-
-bad_origin:
        dm_exception_store_destroy(s->store);
 
 bad_store:
        dm_put_device(ti, s->cow);
 
 bad_cow:
+       dm_put_device(ti, s->origin);
+
+bad_origin:
        kfree(s);
 
 bad:
@@ -1314,12 +1320,12 @@ static void snapshot_dtr(struct dm_target *ti)
 
        mempool_destroy(s->pending_pool);
 
-       dm_put_device(ti, s->origin);
-
        dm_exception_store_destroy(s->store);
 
        dm_put_device(ti, s->cow);
 
+       dm_put_device(ti, s->origin);
+
        kfree(s);
 }
 
@@ -1686,7 +1692,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
        chunk_t chunk;
 
        if (unlikely(bio_empty_barrier(bio))) {
-               if (!map_context->flush_request)
+               if (!map_context->target_request_nr)
                        bio->bi_bdev = s->origin->bdev;
                else
                        bio->bi_bdev = s->cow->bdev;
@@ -1899,8 +1905,14 @@ static int snapshot_iterate_devices(struct dm_target *ti,
                                    iterate_devices_callout_fn fn, void *data)
 {
        struct dm_snapshot *snap = ti->private;
+       int r;
+
+       r = fn(ti, snap->origin, 0, ti->len, data);
+
+       if (!r)
+               r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
 
-       return fn(ti, snap->origin, 0, ti->len, data);
+       return r;
 }
 
 
@@ -2159,6 +2171,21 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
        return 0;
 }
 
+static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+                       struct bio_vec *biovec, int max_size)
+{
+       struct dm_dev *dev = ti->private;
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+
+       if (!q->merge_bvec_fn)
+               return max_size;
+
+       bvm->bi_bdev = dev->bdev;
+       bvm->bi_sector = bvm->bi_sector;
+
+       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
 static int origin_iterate_devices(struct dm_target *ti,
                                  iterate_devices_callout_fn fn, void *data)
 {
@@ -2176,6 +2203,7 @@ static struct target_type origin_target = {
        .map     = origin_map,
        .resume  = origin_resume,
        .status  = origin_status,
+       .merge   = origin_merge,
        .iterate_devices = origin_iterate_devices,
 };
 
index d6e28d732b4d7de33e339ab32145634b6b170034..c297f6da91ea3cb1b05e4f35af21c0760ab25456 100644 (file)
@@ -25,6 +25,8 @@ struct stripe {
 
 struct stripe_c {
        uint32_t stripes;
+       int stripes_shift;
+       sector_t stripes_mask;
 
        /* The size of this target / num. stripes */
        sector_t stripe_width;
@@ -162,16 +164,22 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        /* Set pointer to dm target; used in trigger_event */
        sc->ti = ti;
-
        sc->stripes = stripes;
        sc->stripe_width = width;
+
+       if (stripes & (stripes - 1))
+               sc->stripes_shift = -1;
+       else {
+               sc->stripes_shift = ffs(stripes) - 1;
+               sc->stripes_mask = ((sector_t) stripes) - 1;
+       }
+
        ti->split_io = chunk_size;
        ti->num_flush_requests = stripes;
+       ti->num_discard_requests = stripes;
 
+       sc->chunk_shift = ffs(chunk_size) - 1;
        sc->chunk_mask = ((sector_t) chunk_size) - 1;
-       for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
-               chunk_size >>= 1;
-       sc->chunk_shift--;
 
        /*
         * Get the stripe destinations.
@@ -207,26 +215,79 @@ static void stripe_dtr(struct dm_target *ti)
        kfree(sc);
 }
 
+static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
+                             uint32_t *stripe, sector_t *result)
+{
+       sector_t offset = dm_target_offset(sc->ti, sector);
+       sector_t chunk = offset >> sc->chunk_shift;
+
+       if (sc->stripes_shift < 0)
+               *stripe = sector_div(chunk, sc->stripes);
+       else {
+               *stripe = chunk & sc->stripes_mask;
+               chunk >>= sc->stripes_shift;
+       }
+
+       *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
+}
+
+static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
+                                   uint32_t target_stripe, sector_t *result)
+{
+       uint32_t stripe;
+
+       stripe_map_sector(sc, sector, &stripe, result);
+       if (stripe == target_stripe)
+               return;
+       *result &= ~sc->chunk_mask;                     /* round down */
+       if (target_stripe < stripe)
+               *result += sc->chunk_mask + 1;          /* next chunk */
+}
+
+static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
+                             uint32_t target_stripe)
+{
+       sector_t begin, end;
+
+       stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
+       stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
+                               target_stripe, &end);
+       if (begin < end) {
+               bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
+               bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
+               bio->bi_size = to_bytes(end - begin);
+               return DM_MAPIO_REMAPPED;
+       } else {
+               /* The range doesn't map to the target stripe */
+               bio_endio(bio, 0);
+               return DM_MAPIO_SUBMITTED;
+       }
+}
+
 static int stripe_map(struct dm_target *ti, struct bio *bio,
                      union map_info *map_context)
 {
-       struct stripe_c *sc = (struct stripe_c *) ti->private;
-       sector_t offset, chunk;
+       struct stripe_c *sc = ti->private;
        uint32_t stripe;
+       unsigned target_request_nr;
 
        if (unlikely(bio_empty_barrier(bio))) {
-               BUG_ON(map_context->flush_request >= sc->stripes);
-               bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev;
+               target_request_nr = map_context->target_request_nr;
+               BUG_ON(target_request_nr >= sc->stripes);
+               bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
                return DM_MAPIO_REMAPPED;
        }
+       if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+               target_request_nr = map_context->target_request_nr;
+               BUG_ON(target_request_nr >= sc->stripes);
+               return stripe_map_discard(sc, bio, target_request_nr);
+       }
 
-       offset = bio->bi_sector - ti->begin;
-       chunk = offset >> sc->chunk_shift;
-       stripe = sector_div(chunk, sc->stripes);
+       stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
 
+       bio->bi_sector += sc->stripe[stripe].physical_start;
        bio->bi_bdev = sc->stripe[stripe].dev->bdev;
-       bio->bi_sector = sc->stripe[stripe].physical_start +
-           (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
+
        return DM_MAPIO_REMAPPED;
 }
 
index 9924ea23032d6f7418f15f1f8b4198b5c5dedd88..f9fc07d7a4b91f9db6aeb688cb5346bf604d1ac4 100644 (file)
@@ -54,6 +54,8 @@ struct dm_table {
        sector_t *highs;
        struct dm_target *targets;
 
+       unsigned discards_supported:1;
+
        /*
         * Indicates the rw permissions for the new logical
         * device.  This should be a combination of FMODE_READ
@@ -203,6 +205,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 
        INIT_LIST_HEAD(&t->devices);
        atomic_set(&t->holders, 0);
+       t->discards_supported = 1;
 
        if (!num_targets)
                num_targets = KEYS_PER_NODE;
@@ -245,7 +248,7 @@ void dm_table_destroy(struct dm_table *t)
                msleep(1);
        smp_mb();
 
-       /* free the indexes (see dm_table_complete) */
+       /* free the indexes */
        if (t->depth >= 2)
                vfree(t->index[t->depth - 2]);
 
@@ -770,6 +773,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
        t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
+       if (!tgt->num_discard_requests)
+               t->discards_supported = 0;
+
        return 0;
 
  bad:
@@ -778,7 +784,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
        return r;
 }
 
-int dm_table_set_type(struct dm_table *t)
+static int dm_table_set_type(struct dm_table *t)
 {
        unsigned i;
        unsigned bio_based = 0, request_based = 0;
@@ -900,7 +906,7 @@ static int setup_indexes(struct dm_table *t)
 /*
  * Builds the btree to index the map.
  */
-int dm_table_complete(struct dm_table *t)
+static int dm_table_build_index(struct dm_table *t)
 {
        int r = 0;
        unsigned int leaf_nodes;
@@ -919,6 +925,55 @@ int dm_table_complete(struct dm_table *t)
        return r;
 }
 
+/*
+ * Register the mapped device for blk_integrity support if
+ * the underlying devices support it.
+ */
+static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md)
+{
+       struct list_head *devices = dm_table_get_devices(t);
+       struct dm_dev_internal *dd;
+
+       list_for_each_entry(dd, devices, list)
+               if (bdev_get_integrity(dd->dm_dev.bdev))
+                       return blk_integrity_register(dm_disk(md), NULL);
+
+       return 0;
+}
+
+/*
+ * Prepares the table for use by building the indices,
+ * setting the type, and allocating mempools.
+ */
+int dm_table_complete(struct dm_table *t)
+{
+       int r;
+
+       r = dm_table_set_type(t);
+       if (r) {
+               DMERR("unable to set table type");
+               return r;
+       }
+
+       r = dm_table_build_index(t);
+       if (r) {
+               DMERR("unable to build btrees");
+               return r;
+       }
+
+       r = dm_table_prealloc_integrity(t, t->md);
+       if (r) {
+               DMERR("could not register integrity profile.");
+               return r;
+       }
+
+       r = dm_table_alloc_md_mempools(t);
+       if (r)
+               DMERR("unable to allocate mempools");
+
+       return r;
+}
+
 static DEFINE_MUTEX(_event_lock);
 void dm_table_event_callback(struct dm_table *t,
                             void (*fn)(void *), void *context)
@@ -1086,6 +1141,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
        else
                queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
 
+       if (!dm_table_supports_discards(t))
+               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+       else
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+
        dm_table_set_integrity(t);
 
        /*
@@ -1232,6 +1292,39 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
        return t->md;
 }
 
+static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
+                                 sector_t start, sector_t len, void *data)
+{
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+
+       return q && blk_queue_discard(q);
+}
+
+bool dm_table_supports_discards(struct dm_table *t)
+{
+       struct dm_target *ti;
+       unsigned i = 0;
+
+       if (!t->discards_supported)
+               return 0;
+
+       /*
+        * Ensure that at least one underlying device supports discards.
+        * t->devices includes internal dm devices such as mirror logs
+        * so we need to use iterate_devices here, which targets
+        * supporting discard must provide.
+        */
+       while (i < dm_table_get_num_targets(t)) {
+               ti = dm_table_get_target(t, i++);
+
+               if (ti->type->iterate_devices &&
+                   ti->type->iterate_devices(ti, device_discard_capable, NULL))
+                       return 1;
+       }
+
+       return 0;
+}
+
 EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
index 11dea11dc0b649e16594e071163f79647c274293..8da366cf381cdbd2f7d1b830bd7ecbe1191a5df1 100644 (file)
@@ -113,6 +113,11 @@ void dm_unregister_target(struct target_type *tt)
  */
 static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
 {
+       /*
+        * Return error for discards instead of -EOPNOTSUPP
+        */
+       tt->num_discard_requests = 1;
+
        return 0;
 }
 
index bbc97030c0c200eb35160beddb0342693c7c7250..cc2b3cb819465c4f6becb14897d998ebfb976572 100644 (file)
@@ -22,6 +22,11 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                return -EINVAL;
        }
 
+       /*
+        * Silently drop discards, avoiding -EOPNOTSUPP.
+        */
+       ti->num_discard_requests = 1;
+
        return 0;
 }
 
index a3f21dc02bd891fb84d8df00c5b22b2ad15a2e64..ac384b2a6a3396238e9f0810cbeee52d08cd7fa7 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/hdreg.h>
+#include <linux/delay.h>
 
 #include <trace/events/block.h>
 
@@ -124,6 +125,10 @@ struct mapped_device {
        unsigned long flags;
 
        struct request_queue *queue;
+       unsigned type;
+       /* Protect queue and type against concurrent access. */
+       struct mutex type_lock;
+
        struct gendisk *disk;
        char name[16];
 
@@ -638,8 +643,14 @@ static void dec_pending(struct dm_io *io, int error)
                         * There can be just one barrier request so we use
                         * a per-device variable for error reporting.
                         * Note that you can't touch the bio after end_io_acct
+                        *
+                        * We ignore -EOPNOTSUPP for empty flush reported by
+                        * underlying devices. We assume that if the device
+                        * doesn't support empty barriers, it doesn't need
+                        * cache flushing commands.
                         */
-                       if (!md->barrier_error && io_error != -EOPNOTSUPP)
+                       if (!md->barrier_error &&
+                           !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
                                md->barrier_error = io_error;
                        end_io_acct(io);
                        free_io(md, io);
@@ -1019,17 +1030,27 @@ static void end_clone_request(struct request *clone, int error)
        dm_complete_request(clone, error);
 }
 
-static sector_t max_io_len(struct mapped_device *md,
-                          sector_t sector, struct dm_target *ti)
+/*
+ * Return maximum size of I/O possible at the supplied sector up to the current
+ * target boundary.
+ */
+static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
+{
+       sector_t target_offset = dm_target_offset(ti, sector);
+
+       return ti->len - target_offset;
+}
+
+static sector_t max_io_len(sector_t sector, struct dm_target *ti)
 {
-       sector_t offset = sector - ti->begin;
-       sector_t len = ti->len - offset;
+       sector_t len = max_io_len_target_boundary(sector, ti);
 
        /*
         * Does the target need to split even further ?
         */
        if (ti->split_io) {
                sector_t boundary;
+               sector_t offset = dm_target_offset(ti, sector);
                boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
                           - offset;
                if (len > boundary)
@@ -1171,36 +1192,96 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
        return tio;
 }
 
-static void __flush_target(struct clone_info *ci, struct dm_target *ti,
-                         unsigned flush_nr)
+static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
+                                  unsigned request_nr, sector_t len)
 {
        struct dm_target_io *tio = alloc_tio(ci, ti);
        struct bio *clone;
 
-       tio->info.flush_request = flush_nr;
+       tio->info.target_request_nr = request_nr;
 
-       clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
+       /*
+        * Discard requests require the bio's inline iovecs be initialized.
+        * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
+        * and discard, so no need for concern about wasted bvec allocations.
+        */
+       clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
        __bio_clone(clone, ci->bio);
        clone->bi_destructor = dm_bio_destructor;
+       if (len) {
+               clone->bi_sector = ci->sector;
+               clone->bi_size = to_bytes(len);
+       }
 
        __map_bio(ti, clone, tio);
 }
 
+static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
+                                   unsigned num_requests, sector_t len)
+{
+       unsigned request_nr;
+
+       for (request_nr = 0; request_nr < num_requests; request_nr++)
+               __issue_target_request(ci, ti, request_nr, len);
+}
+
 static int __clone_and_map_empty_barrier(struct clone_info *ci)
 {
-       unsigned target_nr = 0, flush_nr;
+       unsigned target_nr = 0;
        struct dm_target *ti;
 
        while ((ti = dm_table_get_target(ci->map, target_nr++)))
-               for (flush_nr = 0; flush_nr < ti->num_flush_requests;
-                    flush_nr++)
-                       __flush_target(ci, ti, flush_nr);
+               __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
 
        ci->sector_count = 0;
 
        return 0;
 }
 
+/*
+ * Perform all io with a single clone.
+ */
+static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
+{
+       struct bio *clone, *bio = ci->bio;
+       struct dm_target_io *tio;
+
+       tio = alloc_tio(ci, ti);
+       clone = clone_bio(bio, ci->sector, ci->idx,
+                         bio->bi_vcnt - ci->idx, ci->sector_count,
+                         ci->md->bs);
+       __map_bio(ti, clone, tio);
+       ci->sector_count = 0;
+}
+
+static int __clone_and_map_discard(struct clone_info *ci)
+{
+       struct dm_target *ti;
+       sector_t len;
+
+       do {
+               ti = dm_table_find_target(ci->map, ci->sector);
+               if (!dm_target_is_valid(ti))
+                       return -EIO;
+
+               /*
+                * Even though the device advertised discard support,
+                * reconfiguration might have changed that since the
+                * check was performed.
+                */
+               if (!ti->num_discard_requests)
+                       return -EOPNOTSUPP;
+
+               len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
+
+               __issue_target_requests(ci, ti, ti->num_discard_requests, len);
+
+               ci->sector += len;
+       } while (ci->sector_count -= len);
+
+       return 0;
+}
+
 static int __clone_and_map(struct clone_info *ci)
 {
        struct bio *clone, *bio = ci->bio;
@@ -1211,27 +1292,21 @@ static int __clone_and_map(struct clone_info *ci)
        if (unlikely(bio_empty_barrier(bio)))
                return __clone_and_map_empty_barrier(ci);
 
+       if (unlikely(bio->bi_rw & REQ_DISCARD))
+               return __clone_and_map_discard(ci);
+
        ti = dm_table_find_target(ci->map, ci->sector);
        if (!dm_target_is_valid(ti))
                return -EIO;
 
-       max = max_io_len(ci->md, ci->sector, ti);
-
-       /*
-        * Allocate a target io object.
-        */
-       tio = alloc_tio(ci, ti);
+       max = max_io_len(ci->sector, ti);
 
        if (ci->sector_count <= max) {
                /*
                 * Optimise for the simple case where we can do all of
                 * the remaining io with a single clone.
                 */
-               clone = clone_bio(bio, ci->sector, ci->idx,
-                                 bio->bi_vcnt - ci->idx, ci->sector_count,
-                                 ci->md->bs);
-               __map_bio(ti, clone, tio);
-               ci->sector_count = 0;
+               __clone_and_map_simple(ci, ti);
 
        } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
                /*
@@ -1252,6 +1327,7 @@ static int __clone_and_map(struct clone_info *ci)
                        len += bv_len;
                }
 
+               tio = alloc_tio(ci, ti);
                clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
                                  ci->md->bs);
                __map_bio(ti, clone, tio);
@@ -1274,13 +1350,12 @@ static int __clone_and_map(struct clone_info *ci)
                                if (!dm_target_is_valid(ti))
                                        return -EIO;
 
-                               max = max_io_len(ci->md, ci->sector, ti);
-
-                               tio = alloc_tio(ci, ti);
+                               max = max_io_len(ci->sector, ti);
                        }
 
                        len = min(remaining, max);
 
+                       tio = alloc_tio(ci, ti);
                        clone = split_bvec(bio, ci->sector, ci->idx,
                                           bv->bv_offset + offset, len,
                                           ci->md->bs);
@@ -1362,7 +1437,7 @@ static int dm_merge_bvec(struct request_queue *q,
        /*
         * Find maximum amount of I/O that won't need splitting
         */
-       max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
+       max_sectors = min(max_io_len(bvm->bi_sector, ti),
                          (sector_t) BIO_MAX_SECTORS);
        max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
        if (max_size < 0)
@@ -1845,6 +1920,28 @@ static const struct block_device_operations dm_blk_dops;
 static void dm_wq_work(struct work_struct *work);
 static void dm_rq_barrier_work(struct work_struct *work);
 
+static void dm_init_md_queue(struct mapped_device *md)
+{
+       /*
+        * Request-based dm devices cannot be stacked on top of bio-based dm
+        * devices.  The type of this dm device has not been decided yet.
+        * The type is decided at the first table loading time.
+        * To prevent problematic device stacking, clear the queue flag
+        * for request stacking support until then.
+        *
+        * This queue is new, so no concurrency on the queue_flags.
+        */
+       queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+
+       md->queue->queuedata = md;
+       md->queue->backing_dev_info.congested_fn = dm_any_congested;
+       md->queue->backing_dev_info.congested_data = md;
+       blk_queue_make_request(md->queue, dm_request);
+       blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
+       md->queue->unplug_fn = dm_unplug_all;
+       blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+}
+
 /*
  * Allocate and initialise a blank device with a given minor.
  */
@@ -1870,8 +1967,10 @@ static struct mapped_device *alloc_dev(int minor)
        if (r < 0)
                goto bad_minor;
 
+       md->type = DM_TYPE_NONE;
        init_rwsem(&md->io_lock);
        mutex_init(&md->suspend_lock);
+       mutex_init(&md->type_lock);
        spin_lock_init(&md->deferred_lock);
        spin_lock_init(&md->barrier_error_lock);
        rwlock_init(&md->map_lock);
@@ -1882,33 +1981,11 @@ static struct mapped_device *alloc_dev(int minor)
        INIT_LIST_HEAD(&md->uevent_list);
        spin_lock_init(&md->uevent_lock);
 
-       md->queue = blk_init_queue(dm_request_fn, NULL);
+       md->queue = blk_alloc_queue(GFP_KERNEL);
        if (!md->queue)
                goto bad_queue;
 
-       /*
-        * Request-based dm devices cannot be stacked on top of bio-based dm
-        * devices.  The type of this dm device has not been decided yet,
-        * although we initialized the queue using blk_init_queue().
-        * The type is decided at the first table loading time.
-        * To prevent problematic device stacking, clear the queue flag
-        * for request stacking support until then.
-        *
-        * This queue is new, so no concurrency on the queue_flags.
-        */
-       queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
-       md->saved_make_request_fn = md->queue->make_request_fn;
-       md->queue->queuedata = md;
-       md->queue->backing_dev_info.congested_fn = dm_any_congested;
-       md->queue->backing_dev_info.congested_data = md;
-       blk_queue_make_request(md->queue, dm_request);
-       blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
-       md->queue->unplug_fn = dm_unplug_all;
-       blk_queue_merge_bvec(md->queue, dm_merge_bvec);
-       blk_queue_softirq_done(md->queue, dm_softirq_done);
-       blk_queue_prep_rq(md->queue, dm_prep_fn);
-       blk_queue_lld_busy(md->queue, dm_lld_busy);
-       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
+       dm_init_md_queue(md);
 
        md->disk = alloc_disk(1);
        if (!md->disk)
@@ -2123,6 +2200,72 @@ int dm_create(int minor, struct mapped_device **result)
        return 0;
 }
 
+/*
+ * Functions to manage md->type.
+ * All are required to hold md->type_lock.
+ */
+void dm_lock_md_type(struct mapped_device *md)
+{
+       mutex_lock(&md->type_lock);
+}
+
+void dm_unlock_md_type(struct mapped_device *md)
+{
+       mutex_unlock(&md->type_lock);
+}
+
+void dm_set_md_type(struct mapped_device *md, unsigned type)
+{
+       md->type = type;
+}
+
+unsigned dm_get_md_type(struct mapped_device *md)
+{
+       return md->type;
+}
+
+/*
+ * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
+ */
+static int dm_init_request_based_queue(struct mapped_device *md)
+{
+       struct request_queue *q = NULL;
+
+       if (md->queue->elevator)
+               return 1;
+
+       /* Fully initialize the queue */
+       q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
+       if (!q)
+               return 0;
+
+       md->queue = q;
+       md->saved_make_request_fn = md->queue->make_request_fn;
+       dm_init_md_queue(md);
+       blk_queue_softirq_done(md->queue, dm_softirq_done);
+       blk_queue_prep_rq(md->queue, dm_prep_fn);
+       blk_queue_lld_busy(md->queue, dm_lld_busy);
+       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
+
+       elv_register_queue(md->queue);
+
+       return 1;
+}
+
+/*
+ * Setup the DM device's queue based on md's type
+ */
+int dm_setup_md_queue(struct mapped_device *md)
+{
+       if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
+           !dm_init_request_based_queue(md)) {
+               DMWARN("Cannot initialize queue for request-based mapped device");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static struct mapped_device *dm_find_md(dev_t dev)
 {
        struct mapped_device *md;
@@ -2136,6 +2279,7 @@ static struct mapped_device *dm_find_md(dev_t dev)
        md = idr_find(&_minor_idr, minor);
        if (md && (md == MINOR_ALLOCED ||
                   (MINOR(disk_devt(dm_disk(md))) != minor) ||
+                  dm_deleting_md(md) ||
                   test_bit(DMF_FREEING, &md->flags))) {
                md = NULL;
                goto out;
@@ -2170,6 +2314,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr)
 void dm_get(struct mapped_device *md)
 {
        atomic_inc(&md->holders);
+       BUG_ON(test_bit(DMF_FREEING, &md->flags));
 }
 
 const char *dm_device_name(struct mapped_device *md)
@@ -2178,27 +2323,55 @@ const char *dm_device_name(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_device_name);
 
-void dm_put(struct mapped_device *md)
+static void __dm_destroy(struct mapped_device *md, bool wait)
 {
        struct dm_table *map;
 
-       BUG_ON(test_bit(DMF_FREEING, &md->flags));
+       might_sleep();
 
-       if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
-               map = dm_get_live_table(md);
-               idr_replace(&_minor_idr, MINOR_ALLOCED,
-                           MINOR(disk_devt(dm_disk(md))));
-               set_bit(DMF_FREEING, &md->flags);
-               spin_unlock(&_minor_lock);
-               if (!dm_suspended_md(md)) {
-                       dm_table_presuspend_targets(map);
-                       dm_table_postsuspend_targets(map);
-               }
-               dm_sysfs_exit(md);
-               dm_table_put(map);
-               dm_table_destroy(__unbind(md));
-               free_dev(md);
+       spin_lock(&_minor_lock);
+       map = dm_get_live_table(md);
+       idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
+       set_bit(DMF_FREEING, &md->flags);
+       spin_unlock(&_minor_lock);
+
+       if (!dm_suspended_md(md)) {
+               dm_table_presuspend_targets(map);
+               dm_table_postsuspend_targets(map);
        }
+
+       /*
+        * Rare, but there may be I/O requests still going to complete,
+        * for example.  Wait for all references to disappear.
+        * No one should increment the reference count of the mapped_device,
+        * after the mapped_device state becomes DMF_FREEING.
+        */
+       if (wait)
+               while (atomic_read(&md->holders))
+                       msleep(1);
+       else if (atomic_read(&md->holders))
+               DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
+                      dm_device_name(md), atomic_read(&md->holders));
+
+       dm_sysfs_exit(md);
+       dm_table_put(map);
+       dm_table_destroy(__unbind(md));
+       free_dev(md);
+}
+
+void dm_destroy(struct mapped_device *md)
+{
+       __dm_destroy(md, true);
+}
+
+void dm_destroy_immediate(struct mapped_device *md)
+{
+       __dm_destroy(md, false);
+}
+
+void dm_put(struct mapped_device *md)
+{
+       atomic_dec(&md->holders);
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
@@ -2253,7 +2426,12 @@ static void process_barrier(struct mapped_device *md, struct bio *bio)
 
        if (!bio_empty_barrier(bio)) {
                __split_and_process_bio(md, bio);
-               dm_flush(md);
+               /*
+                * If the request isn't supported, don't waste time with
+                * the second flush.
+                */
+               if (md->barrier_error != -EOPNOTSUPP)
+                       dm_flush(md);
        }
 
        if (md->barrier_error != DM_ENDIO_REQUEUE)
@@ -2310,11 +2488,11 @@ static void dm_queue_flush(struct mapped_device *md)
        queue_work(md->wq, &md->work);
 }
 
-static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
+static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
 {
        struct dm_rq_target_io *tio = clone->end_io_data;
 
-       tio->info.flush_request = flush_nr;
+       tio->info.target_request_nr = request_nr;
 }
 
 /* Issue barrier requests to targets and wait for their completion. */
@@ -2332,7 +2510,7 @@ static int dm_rq_barrier(struct mapped_device *md)
                ti = dm_table_get_target(map, i);
                for (j = 0; j < ti->num_flush_requests; j++) {
                        clone = clone_rq(md->flush_request, md, GFP_NOIO);
-                       dm_rq_set_flush_nr(clone, j);
+                       dm_rq_set_target_request_nr(clone, j);
                        atomic_inc(&md->pending[rq_data_dir(clone)]);
                        map_request(ti, clone, md);
                }
@@ -2398,13 +2576,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
                goto out;
        }
 
-       /* cannot change the device type, once a table is bound */
-       if (md->map &&
-           (dm_table_get_type(md->map) != dm_table_get_type(table))) {
-               DMWARN("can't change the device type after a table is bound");
-               goto out;
-       }
-
        map = __bind(md, table, &limits);
 
 out:
index bad1724d4869606902740410898d3dfa0e09b1a4..0c2dd5f4af7658936a2c44bfa62a6aa774e56723 100644 (file)
@@ -59,13 +59,20 @@ void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 int dm_table_any_busy_target(struct dm_table *t);
-int dm_table_set_type(struct dm_table *t);
 unsigned dm_table_get_type(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
+bool dm_table_supports_discards(struct dm_table *t);
 int dm_table_alloc_md_mempools(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
+void dm_lock_md_type(struct mapped_device *md);
+void dm_unlock_md_type(struct mapped_device *md);
+void dm_set_md_type(struct mapped_device *md, unsigned type);
+unsigned dm_get_md_type(struct mapped_device *md);
+
+int dm_setup_md_queue(struct mapped_device *md);
+
 /*
  * To check the return value from dm_table_find_target().
  */
@@ -122,6 +129,11 @@ void dm_linear_exit(void);
 int dm_stripe_init(void);
 void dm_stripe_exit(void);
 
+/*
+ * mapped_device operations
+ */
+void dm_destroy(struct mapped_device *md);
+void dm_destroy_immediate(struct mapped_device *md);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
index 1381cd97b4eda06c6154e6d0be04823979262b08..2970022faa632715461ab11ea8955cf3aa0fb1c0 100644 (file)
@@ -22,7 +22,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
        void *ptr;
        unsigned long long ll;
-       unsigned flush_request;
+       unsigned target_request_nr;
 };
 
 /*
@@ -174,12 +174,18 @@ struct dm_target {
         * A number of zero-length barrier requests that will be submitted
         * to the target for the purpose of flushing cache.
         *
-        * The request number will be placed in union map_info->flush_request.
+        * The request number will be placed in union map_info->target_request_nr.
         * It is a responsibility of the target driver to remap these requests
         * to the real underlying devices.
         */
        unsigned num_flush_requests;
 
+       /*
+        * The number of discard requests that will be submitted to the
+        * target.  map_info->request_nr is used just like num_flush_requests.
+        */
+       unsigned num_discard_requests;
+
        /* target specific data */
        void *private;
 
@@ -392,6 +398,12 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
 #define dm_array_too_big(fixed, obj, num) \
        ((num) > (UINT_MAX - (fixed)) / (obj))
 
+/*
+ * Sector offset taken relative to the start of the target instead of
+ * relative to the start of the device.
+ */
+#define dm_target_offset(ti, sector) ((sector) - (ti)->begin)
+
 static inline sector_t to_sector(unsigned long n)
 {
        return (n >> SECTOR_SHIFT);
index 2c445e11379026041cb795844711379546da2a20..49eab360d5d487395c7abefd700ffe294913495c 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 
 #define DM_DIR "mapper"                /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
 #define DM_MAX_TYPE_NAME 16
 #define DM_NAME_LEN 128
 #define DM_UUID_LEN 129
@@ -266,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       17
+#define DM_VERSION_MINOR       18
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2010-03-05)"
+#define DM_VERSION_EXTRA       "-ioctl (2010-06-29)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
index f6c9b7dcb9fdd77ccf7410e07fb5d89960d26453..bafffc737903971934037fc334f7d4158d9eda48 100644 (file)
@@ -38,6 +38,7 @@
 #define KVM_MINOR              232
 #define BTRFS_MINOR            234
 #define AUTOFS_MINOR           235
+#define MAPPER_CTRL_MINOR      236
 #define MISC_DYNAMIC_MINOR     255
 
 struct device;