]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/md/dm-thin.c
Merge tag 'pci-v3.16-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / drivers / md / dm-thin.c
index 53728be84dee35ac8dfabbf48087919841049f1a..242ac2ea5f295c0bf2ad2db85fdd86812936851d 100644 (file)
@@ -27,6 +27,9 @@
 #define MAPPING_POOL_SIZE 1024
 #define PRISON_CELLS 1024
 #define COMMIT_PERIOD HZ
+#define NO_SPACE_TIMEOUT_SECS 60
+
+static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
 
 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
                "A percentage of time allocated for copy on write");
@@ -175,6 +178,7 @@ struct pool {
        struct workqueue_struct *wq;
        struct work_struct worker;
        struct delayed_work waker;
+       struct delayed_work no_space_timeout;
 
        unsigned long last_commit_jiffies;
        unsigned ref_count;
@@ -232,6 +236,13 @@ struct thin_c {
        struct bio_list deferred_bio_list;
        struct bio_list retry_on_resume_list;
        struct rb_root sort_bio_list; /* sorted list of deferred bios */
+
+       /*
+        * Ensures the thin is not destroyed until the worker has finished
+        * iterating the active_thins list.
+        */
+       atomic_t refcount;
+       struct completion can_destroy;
 };
 
 /*----------------------------------------------------------------*/
@@ -928,7 +939,7 @@ static int commit(struct pool *pool)
 {
        int r;
 
-       if (get_pool_mode(pool) != PM_WRITE)
+       if (get_pool_mode(pool) >= PM_READ_ONLY)
                return -EINVAL;
 
        r = dm_pool_commit_metadata(pool->pmd);
@@ -1486,6 +1497,45 @@ static void process_thin_deferred_bios(struct thin_c *tc)
        blk_finish_plug(&plug);
 }
 
+static void thin_get(struct thin_c *tc);
+static void thin_put(struct thin_c *tc);
+
+/*
+ * We can't hold rcu_read_lock() around code that can block.  So we
+ * find a thin with the rcu lock held; bump a refcount; then drop
+ * the lock.
+ */
+static struct thin_c *get_first_thin(struct pool *pool)
+{
+       struct thin_c *tc = NULL;
+
+       rcu_read_lock();
+       if (!list_empty(&pool->active_thins)) {
+               tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
+               thin_get(tc);
+       }
+       rcu_read_unlock();
+
+       return tc;
+}
+
+static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
+{
+       struct thin_c *old_tc = tc;
+
+       rcu_read_lock();
+       list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
+               thin_get(tc);
+               thin_put(old_tc);
+               rcu_read_unlock();
+               return tc;
+       }
+       thin_put(old_tc);
+       rcu_read_unlock();
+
+       return NULL;
+}
+
 static void process_deferred_bios(struct pool *pool)
 {
        unsigned long flags;
@@ -1493,10 +1543,11 @@ static void process_deferred_bios(struct pool *pool)
        struct bio_list bios;
        struct thin_c *tc;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(tc, &pool->active_thins, list)
+       tc = get_first_thin(pool);
+       while (tc) {
                process_thin_deferred_bios(tc);
-       rcu_read_unlock();
+               tc = get_next_thin(pool, tc);
+       }
 
        /*
         * If there are any deferred flush bios, we must commit
@@ -1543,6 +1594,20 @@ static void do_waker(struct work_struct *ws)
        queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
 }
 
+/*
+ * We're holding onto IO to allow userland time to react.  After the
+ * timeout either the pool will have been resized (and thus back in
+ * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
+ */
+static void do_no_space_timeout(struct work_struct *ws)
+{
+       struct pool *pool = container_of(to_delayed_work(ws), struct pool,
+                                        no_space_timeout);
+
+       if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
+               set_pool_mode(pool, PM_READ_ONLY);
+}
+
 /*----------------------------------------------------------------*/
 
 struct noflush_work {
@@ -1578,7 +1643,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
 {
        struct noflush_work w;
 
-       INIT_WORK(&w.worker, fn);
+       INIT_WORK_ONSTACK(&w.worker, fn);
        w.tc = tc;
        atomic_set(&w.complete, 0);
        init_waitqueue_head(&w.wait);
@@ -1607,6 +1672,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
        struct pool_c *pt = pool->ti->private;
        bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
        enum pool_mode old_mode = get_pool_mode(pool);
+       unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ;
 
        /*
         * Never allow the pool to transition to PM_WRITE mode if user
@@ -1668,6 +1734,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                pool->process_discard = process_discard;
                pool->process_prepared_mapping = process_prepared_mapping;
                pool->process_prepared_discard = process_prepared_discard_passdown;
+
+               if (!pool->pf.error_if_no_space && no_space_timeout)
+                       queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
                break;
 
        case PM_WRITE:
@@ -2053,6 +2122,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
        INIT_WORK(&pool->worker, do_worker);
        INIT_DELAYED_WORK(&pool->waker, do_waker);
+       INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
        spin_lock_init(&pool->lock);
        bio_list_init(&pool->deferred_flush_bios);
        INIT_LIST_HEAD(&pool->prepared_mappings);
@@ -2615,6 +2685,7 @@ static void pool_postsuspend(struct dm_target *ti)
        struct pool *pool = pt->pool;
 
        cancel_delayed_work(&pool->waker);
+       cancel_delayed_work(&pool->no_space_timeout);
        flush_workqueue(pool->wq);
        (void) commit(pool);
 }
@@ -3061,11 +3132,25 @@ static struct target_type pool_target = {
 /*----------------------------------------------------------------
  * Thin target methods
  *--------------------------------------------------------------*/
+static void thin_get(struct thin_c *tc)
+{
+       atomic_inc(&tc->refcount);
+}
+
+static void thin_put(struct thin_c *tc)
+{
+       if (atomic_dec_and_test(&tc->refcount))
+               complete(&tc->can_destroy);
+}
+
 static void thin_dtr(struct dm_target *ti)
 {
        struct thin_c *tc = ti->private;
        unsigned long flags;
 
+       thin_put(tc);
+       wait_for_completion(&tc->can_destroy);
+
        spin_lock_irqsave(&tc->pool->lock, flags);
        list_del_rcu(&tc->list);
        spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3101,6 +3186,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
        struct thin_c *tc;
        struct dm_dev *pool_dev, *origin_dev;
        struct mapped_device *pool_md;
+       unsigned long flags;
 
        mutex_lock(&dm_thin_pool_table.mutex);
 
@@ -3191,9 +3277,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
        mutex_unlock(&dm_thin_pool_table.mutex);
 
-       spin_lock(&tc->pool->lock);
+       atomic_set(&tc->refcount, 1);
+       init_completion(&tc->can_destroy);
+
+       spin_lock_irqsave(&tc->pool->lock, flags);
        list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
-       spin_unlock(&tc->pool->lock);
+       spin_unlock_irqrestore(&tc->pool->lock, flags);
        /*
         * This synchronize_rcu() call is needed here otherwise we risk a
         * wake_worker() call finding no bios to process (because the newly
@@ -3422,6 +3511,9 @@ static void dm_thin_exit(void)
 module_init(dm_thin_init);
 module_exit(dm_thin_exit);
 
+module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
+
 MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
 MODULE_LICENSE("GPL");