Merge tag 'pci-v3.16-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git...

[karo-tx-linux.git] / drivers / md / dm-thin.c
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 53728be84dee35ac8dfabbf48087919841049f1a..242ac2ea5f295c0bf2ad2db85fdd86812936851d 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -27,6 +27,9 @@
  #define MAPPING_POOL_SIZE 1024
  #define PRISON_CELLS 1024
  #define COMMIT_PERIOD HZ
+#define NO_SPACE_TIMEOUT_SECS 60
+
+static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
  
  DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
                 "A percentage of time allocated for copy on write");
@@ -175,6 +178,7 @@ struct pool {
         struct workqueue_struct *wq;
         struct work_struct worker;
         struct delayed_work waker;
+       struct delayed_work no_space_timeout;
  
         unsigned long last_commit_jiffies;
         unsigned ref_count;
@@ -232,6 +236,13 @@ struct thin_c {
         struct bio_list deferred_bio_list;
         struct bio_list retry_on_resume_list;
         struct rb_root sort_bio_list; /* sorted list of deferred bios */
+
+       /*
+        * Ensures the thin is not destroyed until the worker has finished
+        * iterating the active_thins list.
+        */
+       atomic_t refcount;
+       struct completion can_destroy;
  };
  
  /*----------------------------------------------------------------*/
@@ -928,7 +939,7 @@ static int commit(struct pool *pool)
  {
         int r;
  
-       if (get_pool_mode(pool) != PM_WRITE)
+       if (get_pool_mode(pool) >= PM_READ_ONLY)
                 return -EINVAL;
  
         r = dm_pool_commit_metadata(pool->pmd);
@@ -1486,6 +1497,45 @@ static void process_thin_deferred_bios(struct thin_c *tc)
         blk_finish_plug(&plug);
  }
  
+static void thin_get(struct thin_c *tc);
+static void thin_put(struct thin_c *tc);
+
+/*
+ * We can't hold rcu_read_lock() around code that can block.  So we
+ * find a thin with the rcu lock held; bump a refcount; then drop
+ * the lock.
+ */
+static struct thin_c *get_first_thin(struct pool *pool)
+{
+       struct thin_c *tc = NULL;
+
+       rcu_read_lock();
+       if (!list_empty(&pool->active_thins)) {
+               tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
+               thin_get(tc);
+       }
+       rcu_read_unlock();
+
+       return tc;
+}
+
+static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
+{
+       struct thin_c *old_tc = tc;
+
+       rcu_read_lock();
+       list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
+               thin_get(tc);
+               thin_put(old_tc);
+               rcu_read_unlock();
+               return tc;
+       }
+       thin_put(old_tc);
+       rcu_read_unlock();
+
+       return NULL;
+}
+
  static void process_deferred_bios(struct pool *pool)
  {
         unsigned long flags;
@@ -1493,10 +1543,11 @@ static void process_deferred_bios(struct pool *pool)
         struct bio_list bios;
         struct thin_c *tc;
  
-       rcu_read_lock();
-       list_for_each_entry_rcu(tc, &pool->active_thins, list)
+       tc = get_first_thin(pool);
+       while (tc) {
                 process_thin_deferred_bios(tc);
-       rcu_read_unlock();
+               tc = get_next_thin(pool, tc);
+       }
  
         /*
          * If there are any deferred flush bios, we must commit
@@ -1543,6 +1594,20 @@ static void do_waker(struct work_struct *ws)
         queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
  }
  
+/*
+ * We're holding onto IO to allow userland time to react.  After the
+ * timeout either the pool will have been resized (and thus back in
+ * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
+ */
+static void do_no_space_timeout(struct work_struct *ws)
+{
+       struct pool *pool = container_of(to_delayed_work(ws), struct pool,
+                                        no_space_timeout);
+
+       if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
+               set_pool_mode(pool, PM_READ_ONLY);
+}
+
  /*----------------------------------------------------------------*/
  
  struct noflush_work {
@@ -1578,7 +1643,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
  {
         struct noflush_work w;
  
-       INIT_WORK(&w.worker, fn);
+       INIT_WORK_ONSTACK(&w.worker, fn);
         w.tc = tc;
         atomic_set(&w.complete, 0);
         init_waitqueue_head(&w.wait);
@@ -1607,6 +1672,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
         struct pool_c *pt = pool->ti->private;
         bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
         enum pool_mode old_mode = get_pool_mode(pool);
+       unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ;
  
         /*
          * Never allow the pool to transition to PM_WRITE mode if user
@@ -1668,6 +1734,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                 pool->process_discard = process_discard;
                 pool->process_prepared_mapping = process_prepared_mapping;
                 pool->process_prepared_discard = process_prepared_discard_passdown;
+
+               if (!pool->pf.error_if_no_space && no_space_timeout)
+                       queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
                 break;
  
         case PM_WRITE:
@@ -2053,6 +2122,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
  
         INIT_WORK(&pool->worker, do_worker);
         INIT_DELAYED_WORK(&pool->waker, do_waker);
+       INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
         spin_lock_init(&pool->lock);
         bio_list_init(&pool->deferred_flush_bios);
         INIT_LIST_HEAD(&pool->prepared_mappings);
@@ -2615,6 +2685,7 @@ static void pool_postsuspend(struct dm_target *ti)
         struct pool *pool = pt->pool;
  
         cancel_delayed_work(&pool->waker);
+       cancel_delayed_work(&pool->no_space_timeout);
         flush_workqueue(pool->wq);
         (void) commit(pool);
  }
@@ -3061,11 +3132,25 @@ static struct target_type pool_target = {
  /*----------------------------------------------------------------
   * Thin target methods
   *--------------------------------------------------------------*/
+static void thin_get(struct thin_c *tc)
+{
+       atomic_inc(&tc->refcount);
+}
+
+static void thin_put(struct thin_c *tc)
+{
+       if (atomic_dec_and_test(&tc->refcount))
+               complete(&tc->can_destroy);
+}
+
  static void thin_dtr(struct dm_target *ti)
  {
         struct thin_c *tc = ti->private;
         unsigned long flags;
  
+       thin_put(tc);
+       wait_for_completion(&tc->can_destroy);
+
         spin_lock_irqsave(&tc->pool->lock, flags);
         list_del_rcu(&tc->list);
         spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3101,6 +3186,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
         struct thin_c *tc;
         struct dm_dev *pool_dev, *origin_dev;
         struct mapped_device *pool_md;
+       unsigned long flags;
  
         mutex_lock(&dm_thin_pool_table.mutex);
  
@@ -3191,9 +3277,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
  
         mutex_unlock(&dm_thin_pool_table.mutex);
  
-       spin_lock(&tc->pool->lock);
+       atomic_set(&tc->refcount, 1);
+       init_completion(&tc->can_destroy);
+
+       spin_lock_irqsave(&tc->pool->lock, flags);
         list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
-       spin_unlock(&tc->pool->lock);
+       spin_unlock_irqrestore(&tc->pool->lock, flags);
         /*
          * This synchronize_rcu() call is needed here otherwise we risk a
          * wake_worker() call finding no bios to process (because the newly
@@ -3422,6 +3511,9 @@ static void dm_thin_exit(void)
  module_init(dm_thin_init);
  module_exit(dm_thin_exit);
  
+module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
+
  MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
  MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  MODULE_LICENSE("GPL");