Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

[karo-tx-linux.git] / fs / xfs / xfs_log_cil.c
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index d4fadbe8ac90d732d6eca14b735eeb22a3e90f12..7d6197c5849381e8d8f0f0e6fbef91f332f4e108 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -18,9 +18,7 @@
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_types.h"
-#include "xfs_bit.h"
  #include "xfs_log.h"
-#include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_log_priv.h"
@@ -29,60 +27,9 @@
  #include "xfs_mount.h"
  #include "xfs_error.h"
  #include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
  #include "xfs_discard.h"
  
-/*
- * Perform initial CIL structure initialisation.
- */
-int
-xlog_cil_init(
-       struct log      *log)
-{
-       struct xfs_cil  *cil;
-       struct xfs_cil_ctx *ctx;
-
-       cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
-       if (!cil)
-               return ENOMEM;
-
-       ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
-       if (!ctx) {
-               kmem_free(cil);
-               return ENOMEM;
-       }
-
-       INIT_LIST_HEAD(&cil->xc_cil);
-       INIT_LIST_HEAD(&cil->xc_committing);
-       spin_lock_init(&cil->xc_cil_lock);
-       init_rwsem(&cil->xc_ctx_lock);
-       init_waitqueue_head(&cil->xc_commit_wait);
-
-       INIT_LIST_HEAD(&ctx->committing);
-       INIT_LIST_HEAD(&ctx->busy_extents);
-       ctx->sequence = 1;
-       ctx->cil = cil;
-       cil->xc_ctx = ctx;
-       cil->xc_current_sequence = ctx->sequence;
-
-       cil->xc_log = log;
-       log->l_cilp = cil;
-       return 0;
-}
-
-void
-xlog_cil_destroy(
-       struct log      *log)
-{
-       if (log->l_cilp->xc_ctx) {
-               if (log->l_cilp->xc_ctx->ticket)
-                       xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
-               kmem_free(log->l_cilp->xc_ctx);
-       }
-
-       ASSERT(list_empty(&log->l_cilp->xc_cil));
-       kmem_free(log->l_cilp);
-}
-
  /*
   * Allocate a new ticket. Failing to get a new ticket makes it really hard to
   * recover, so we don't allow failure here. Also, we allocate in a context that
@@ -390,8 +337,8 @@ xlog_cil_committed(
         xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
                                         ctx->start_lsn, abort);
  
-       xfs_alloc_busy_sort(&ctx->busy_extents);
-       xfs_alloc_busy_clear(mp, &ctx->busy_extents,
+       xfs_extent_busy_sort(&ctx->busy_extents);
+       xfs_extent_busy_clear(mp, &ctx->busy_extents,
                              (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
  
         spin_lock(&ctx->cil->xc_cil_lock);
@@ -404,7 +351,7 @@ xlog_cil_committed(
                 ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
  
                 xfs_discard_extents(mp, &ctx->busy_extents);
-               xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
+               xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
         }
  
         kmem_free(ctx);
@@ -426,8 +373,7 @@ xlog_cil_committed(
   */
  STATIC int
  xlog_cil_push(
-       struct log              *log,
-       xfs_lsn_t               push_seq)
+       struct log              *log)
  {
         struct xfs_cil          *cil = log->l_cilp;
         struct xfs_log_vec      *lv;
@@ -443,39 +389,36 @@ xlog_cil_push(
         struct xfs_log_iovec    lhdr;
         struct xfs_log_vec      lvhdr = { NULL };
         xfs_lsn_t               commit_lsn;
+       xfs_lsn_t               push_seq;
  
         if (!cil)
                 return 0;
  
-       ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
-
         new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
         new_ctx->ticket = xlog_cil_ticket_alloc(log);
  
-       /*
-        * Lock out transaction commit, but don't block for background pushes
-        * unless we are well over the CIL space limit. See the definition of
-        * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
-        * used here.
-        */
-       if (!down_write_trylock(&cil->xc_ctx_lock)) {
-               if (!push_seq &&
-                   cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
-                       goto out_free_ticket;
-               down_write(&cil->xc_ctx_lock);
-       }
+       down_write(&cil->xc_ctx_lock);
         ctx = cil->xc_ctx;
  
-       /* check if we've anything to push */
-       if (list_empty(&cil->xc_cil))
-               goto out_skip;
+       spin_lock(&cil->xc_cil_lock);
+       push_seq = cil->xc_push_seq;
+       ASSERT(push_seq <= ctx->sequence);
  
-       /* check for spurious background flush */
-       if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+       /*
+        * Check if we've anything to push. If there is nothing, then we don't
+        * move on to a new sequence number and so we have to be able to push
+        * this sequence again later.
+        */
+       if (list_empty(&cil->xc_cil)) {
+               cil->xc_push_seq = 0;
+               spin_unlock(&cil->xc_cil_lock);
                 goto out_skip;
+       }
+       spin_unlock(&cil->xc_cil_lock);
+
  
         /* check for a previously pushed seqeunce */
-       if (push_seq && push_seq < cil->xc_ctx->sequence)
+       if (push_seq < cil->xc_ctx->sequence)
                 goto out_skip;
  
         /*
@@ -629,7 +572,6 @@ restart:
  
  out_skip:
         up_write(&cil->xc_ctx_lock);
-out_free_ticket:
         xfs_log_ticket_put(new_ctx->ticket);
         kmem_free(new_ctx);
         return 0;
@@ -641,6 +583,82 @@ out_abort:
         return XFS_ERROR(EIO);
  }
  
+static void
+xlog_cil_push_work(
+       struct work_struct      *work)
+{
+       struct xfs_cil          *cil = container_of(work, struct xfs_cil,
+                                                       xc_push_work);
+       xlog_cil_push(cil->xc_log);
+}
+
+/*
+ * We need to push CIL every so often so we don't cache more than we can fit in
+ * the log. The limit really is that a checkpoint can't be more than half the
+ * log (the current checkpoint is not allowed to overwrite the previous
+ * checkpoint), but commit latency and memory usage limit this to a smaller
+ * size.
+ */
+static void
+xlog_cil_push_background(
+       struct log      *log)
+{
+       struct xfs_cil  *cil = log->l_cilp;
+
+       /*
+        * The cil won't be empty because we are called while holding the
+        * context lock so whatever we added to the CIL will still be there
+        */
+       ASSERT(!list_empty(&cil->xc_cil));
+
+       /*
+        * don't do a background push if we haven't used up all the
+        * space available yet.
+        */
+       if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+               return;
+
+       spin_lock(&cil->xc_cil_lock);
+       if (cil->xc_push_seq < cil->xc_current_sequence) {
+               cil->xc_push_seq = cil->xc_current_sequence;
+               queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+       }
+       spin_unlock(&cil->xc_cil_lock);
+
+}
+
+static void
+xlog_cil_push_foreground(
+       struct log      *log,
+       xfs_lsn_t       push_seq)
+{
+       struct xfs_cil  *cil = log->l_cilp;
+
+       if (!cil)
+               return;
+
+       ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
+
+       /* start on any pending background push to minimise wait time on it */
+       flush_work(&cil->xc_push_work);
+
+       /*
+        * If the CIL is empty or we've already pushed the sequence then
+        * there's no work we need to do.
+        */
+       spin_lock(&cil->xc_cil_lock);
+       if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
+               spin_unlock(&cil->xc_cil_lock);
+               return;
+       }
+
+       cil->xc_push_seq = push_seq;
+       spin_unlock(&cil->xc_cil_lock);
+
+       /* do the push now */
+       xlog_cil_push(log);
+}
+
  /*
   * Commit a transaction with the given vector to the Committed Item List.
   *
@@ -667,7 +685,6 @@ xfs_log_commit_cil(
  {
         struct log              *log = mp->m_log;
         int                     log_flags = 0;
-       int                     push = 0;
         struct xfs_log_vec      *log_vector;
  
         if (flags & XFS_TRANS_RELEASE_LOG_RES)
@@ -719,21 +736,9 @@ xfs_log_commit_cil(
          */
         xfs_trans_free_items(tp, *commit_lsn, 0);
  
-       /* check for background commit before unlock */
-       if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
-               push = 1;
+       xlog_cil_push_background(log);
  
         up_read(&log->l_cilp->xc_ctx_lock);
-
-       /*
-        * We need to push CIL every so often so we don't cache more than we
-        * can fit in the log. The limit really is that a checkpoint can't be
-        * more than half the log (the current checkpoint is not allowed to
-        * overwrite the previous checkpoint), but commit latency and memory
-        * usage limit this to a smaller size in most cases.
-        */
-       if (push)
-               xlog_cil_push(log, 0);
         return 0;
  }
  
@@ -746,9 +751,6 @@ xfs_log_commit_cil(
   *
   * We return the current commit lsn to allow the callers to determine if a
   * iclog flush is necessary following this call.
- *
- * XXX: Initially, just push the CIL unconditionally and return whatever
- * commit lsn is there. It'll be empty, so this is broken for now.
   */
  xfs_lsn_t
  xlog_cil_force_lsn(
@@ -766,8 +768,7 @@ xlog_cil_force_lsn(
          * xlog_cil_push() handles racing pushes for the same sequence,
          * so no need to deal with it here.
          */
-       if (sequence == cil->xc_current_sequence)
-               xlog_cil_push(log, sequence);
+       xlog_cil_push_foreground(log, sequence);
  
         /*
          * See if we can find a previous sequence still committing.
@@ -826,3 +827,57 @@ xfs_log_item_in_current_chkpt(
                 return false;
         return true;
  }
+
+/*
+ * Perform initial CIL structure initialisation.
+ */
+int
+xlog_cil_init(
+       struct log      *log)
+{
+       struct xfs_cil  *cil;
+       struct xfs_cil_ctx *ctx;
+
+       cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
+       if (!cil)
+               return ENOMEM;
+
+       ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
+       if (!ctx) {
+               kmem_free(cil);
+               return ENOMEM;
+       }
+
+       INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
+       INIT_LIST_HEAD(&cil->xc_cil);
+       INIT_LIST_HEAD(&cil->xc_committing);
+       spin_lock_init(&cil->xc_cil_lock);
+       init_rwsem(&cil->xc_ctx_lock);
+       init_waitqueue_head(&cil->xc_commit_wait);
+
+       INIT_LIST_HEAD(&ctx->committing);
+       INIT_LIST_HEAD(&ctx->busy_extents);
+       ctx->sequence = 1;
+       ctx->cil = cil;
+       cil->xc_ctx = ctx;
+       cil->xc_current_sequence = ctx->sequence;
+
+       cil->xc_log = log;
+       log->l_cilp = cil;
+       return 0;
+}
+
+void
+xlog_cil_destroy(
+       struct log      *log)
+{
+       if (log->l_cilp->xc_ctx) {
+               if (log->l_cilp->xc_ctx->ticket)
+                       xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
+               kmem_free(log->l_cilp->xc_ctx);
+       }
+
+       ASSERT(list_empty(&log->l_cilp->xc_cil));
+       kmem_free(log->l_cilp);
+}
+