]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/xfs/xfs_log_cil.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
[karo-tx-linux.git] / fs / xfs / xfs_log_cil.c
index 02b9cf3f8252baeade5d4e99b3e88853a7b50b98..cfe97973ba36d1d586c3704b536aebce2e391af1 100644 (file)
@@ -80,6 +80,83 @@ xlog_cil_init_post_recovery(
                                                                log->l_curr_block);
 }
 
+STATIC int
+xlog_cil_lv_item_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_vec      *lv)
+{
+       int     index;
+       char    *ptr;
+
+       /* format new vectors into array */
+       lip->li_ops->iop_format(lip, lv->lv_iovecp);
+
+       /* copy data into existing array */
+       ptr = lv->lv_buf;
+       for (index = 0; index < lv->lv_niovecs; index++) {
+               struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+
+               memcpy(ptr, vec->i_addr, vec->i_len);
+               vec->i_addr = ptr;
+               ptr += vec->i_len;
+       }
+
+       /*
+        * some size calculations for log vectors over-estimate, so the caller
+        * doesn't know the amount of space actually used by the item. Return
+        * the byte count to the caller so they can check and store it
+        * appropriately.
+        */
+       return ptr - lv->lv_buf;
+}
+
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+       struct xlog             *log,
+       struct xfs_log_vec      *lv,
+       struct xfs_log_vec      *old_lv,
+       int                     *diff_len,
+       int                     *diff_iovecs)
+{
+       /* Account for the new LV being passed in */
+       if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+               *diff_len += lv->lv_buf_len;
+               *diff_iovecs += lv->lv_niovecs;
+       }
+
+       /*
+        * If there is no old LV, this is the first time we've seen the item in
+        * this CIL context and so we need to pin it. If we are replacing the
+        * old_lv, then remove the space it accounts for and free it.
+        */
+       if (!old_lv)
+               lv->lv_item->li_ops->iop_pin(lv->lv_item);
+       else if (old_lv != lv) {
+               ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+
+               *diff_len -= old_lv->lv_buf_len;
+               *diff_iovecs -= old_lv->lv_niovecs;
+               kmem_free(old_lv);
+       }
+
+       /* attach new log vector to log item */
+       lv->lv_item->li_lv = lv;
+
+       /*
+        * If this is the first time the item is being committed to the
+        * CIL, store the sequence number on the log item so we can
+        * tell in future commits whether this is the first checkpoint
+        * the item is being committed into.
+        */
+       if (!lv->lv_item->li_seq)
+               lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
 /*
  * Format log item into a flat buffers
  *
@@ -106,35 +183,39 @@ xlog_cil_init_post_recovery(
  * format the regions into the iclog as though they are being formatted
  * directly out of the objects themselves.
  */
-static struct xfs_log_vec *
-xlog_cil_prepare_log_vecs(
-       struct xfs_trans        *tp)
+static void
+xlog_cil_insert_format_items(
+       struct xlog             *log,
+       struct xfs_trans        *tp,
+       int                     *diff_len,
+       int                     *diff_iovecs)
 {
        struct xfs_log_item_desc *lidp;
-       struct xfs_log_vec      *lv = NULL;
-       struct xfs_log_vec      *ret_lv = NULL;
 
 
        /* Bail out if we didn't find a log item.  */
        if (list_empty(&tp->t_items)) {
                ASSERT(0);
-               return NULL;
+               return;
        }
 
        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-               struct xfs_log_vec *new_lv;
-               void    *ptr;
-               int     index;
-               int     len = 0;
-               uint    niovecs;
+               struct xfs_log_item *lip = lidp->lid_item;
+               struct xfs_log_vec *lv;
+               struct xfs_log_vec *old_lv;
+               int     niovecs = 0;
+               int     nbytes = 0;
+               int     buf_size;
                bool    ordered = false;
 
                /* Skip items which aren't dirty in this transaction. */
                if (!(lidp->lid_flags & XFS_LID_DIRTY))
                        continue;
 
+               /* get number of vecs and size of data to be stored */
+               lip->li_ops->iop_size(lip, &niovecs, &nbytes);
+
                /* Skip items that do not have any vectors for writing */
-               niovecs = IOP_SIZE(lidp->lid_item);
                if (!niovecs)
                        continue;
 
@@ -146,109 +227,63 @@ xlog_cil_prepare_log_vecs(
                if (niovecs == XFS_LOG_VEC_ORDERED) {
                        ordered = true;
                        niovecs = 0;
+                       nbytes = 0;
                }
 
-               new_lv = kmem_zalloc(sizeof(*new_lv) +
-                               niovecs * sizeof(struct xfs_log_iovec),
-                               KM_SLEEP|KM_NOFS);
-
-               new_lv->lv_item = lidp->lid_item;
-               new_lv->lv_niovecs = niovecs;
-               if (ordered) {
-                       /* track as an ordered logvec */
-                       new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
-                       goto next;
-               }
-
-               /* The allocated iovec region lies beyond the log vector. */
-               new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
+               /* grab the old item if it exists for reservation accounting */
+               old_lv = lip->li_lv;
 
-               /* build the vector array and calculate it's length */
-               IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
-               for (index = 0; index < new_lv->lv_niovecs; index++)
-                       len += new_lv->lv_iovecp[index].i_len;
+               /* calc buffer size */
+               buf_size = sizeof(struct xfs_log_vec) + nbytes +
+                               niovecs * sizeof(struct xfs_log_iovec);
 
-               new_lv->lv_buf_len = len;
-               new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
-                               KM_SLEEP|KM_NOFS);
-               ptr = new_lv->lv_buf;
+               /* compare to existing item size */
+               if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
+                       /* same or smaller, optimise common overwrite case */
+                       lv = lip->li_lv;
+                       lv->lv_next = NULL;
 
-               for (index = 0; index < new_lv->lv_niovecs; index++) {
-                       struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
+                       if (ordered)
+                               goto insert;
 
-                       memcpy(ptr, vec->i_addr, vec->i_len);
-                       vec->i_addr = ptr;
-                       ptr += vec->i_len;
-               }
-               ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
-
-next:
-               if (!ret_lv)
-                       ret_lv = new_lv;
-               else
-                       lv->lv_next = new_lv;
-               lv = new_lv;
-       }
-
-       return ret_lv;
-}
-
-/*
- * Prepare the log item for insertion into the CIL. Calculate the difference in
- * log space and vectors it will consume, and if it is a new item pin it as
- * well.
- */
-STATIC void
-xfs_cil_prepare_item(
-       struct xlog             *log,
-       struct xfs_log_vec      *lv,
-       int                     *len,
-       int                     *diff_iovecs)
-{
-       struct xfs_log_vec      *old = lv->lv_item->li_lv;
+                       /*
+                        * set the item up as though it is a new insertion so
+                        * that the space reservation accounting is correct.
+                        */
+                       *diff_iovecs -= lv->lv_niovecs;
+                       *diff_len -= lv->lv_buf_len;
 
-       if (old) {
-               /* existing lv on log item, space used is a delta */
-               ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
-                       old->lv_buf_len == XFS_LOG_VEC_ORDERED);
+                       /* Ensure the lv is set up according to ->iop_size */
+                       lv->lv_niovecs = niovecs;
+                       lv->lv_buf = (char *)lv + buf_size - nbytes;
 
-               /*
-                * If the new item is ordered, keep the old one that is already
-                * tracking dirty or ordered regions
-                */
-               if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
-                       ASSERT(!lv->lv_buf);
-                       kmem_free(lv);
-                       return;
+                       lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+                       goto insert;
                }
 
-               *len += lv->lv_buf_len - old->lv_buf_len;
-               *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
-               kmem_free(old->lv_buf);
-               kmem_free(old);
-       } else {
-               /* new lv, must pin the log item */
-               ASSERT(!lv->lv_item->li_lv);
-
-               if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
-                       *len += lv->lv_buf_len;
-                       *diff_iovecs += lv->lv_niovecs;
+               /* allocate new data chunk */
+               lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+               lv->lv_item = lip;
+               lv->lv_size = buf_size;
+               lv->lv_niovecs = niovecs;
+               if (ordered) {
+                       /* track as an ordered logvec */
+                       ASSERT(lip->li_lv == NULL);
+                       lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+                       goto insert;
                }
-               IOP_PIN(lv->lv_item);
 
-       }
+               /* The allocated iovec region lies beyond the log vector. */
+               lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
 
-       /* attach new log vector to log item */
-       lv->lv_item->li_lv = lv;
+               /* The allocated data region lies beyond the iovec region */
+               lv->lv_buf = (char *)lv + buf_size - nbytes;
 
-       /*
-        * If this is the first time the item is being committed to the
-        * CIL, store the sequence number on the log item so we can
-        * tell in future commits whether this is the first checkpoint
-        * the item is being committed into.
-        */
-       if (!lv->lv_item->li_seq)
-               lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+               lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+insert:
+               ASSERT(lv->lv_buf_len <= nbytes);
+               xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
+       }
 }
 
 /*
@@ -261,53 +296,47 @@ xfs_cil_prepare_item(
 static void
 xlog_cil_insert_items(
        struct xlog             *log,
-       struct xfs_log_vec      *log_vector,
-       struct xlog_ticket      *ticket)
+       struct xfs_trans        *tp)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-       struct xfs_log_vec      *lv;
+       struct xfs_log_item_desc *lidp;
        int                     len = 0;
        int                     diff_iovecs = 0;
        int                     iclog_space;
 
-       ASSERT(log_vector);
+       ASSERT(tp);
 
        /*
-        * Do all the accounting aggregation and switching of log vectors
-        * around in a separate loop to the insertion of items into the CIL.
-        * Then we can do a separate loop to update the CIL within a single
-        * lock/unlock pair. This reduces the number of round trips on the CIL
-        * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
-        * hold time for the transaction commit.
-        *
-        * If this is the first time the item is being placed into the CIL in
-        * this context, pin it so it can't be written to disk until the CIL is
-        * flushed to the iclog and the iclog written to disk.
-        *
         * We can do this safely because the context can't checkpoint until we
         * are done so it doesn't matter exactly how we update the CIL.
         */
+       xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
+
+       /*
+        * Now (re-)position everything modified at the tail of the CIL.
+        * We do this here so we only need to take the CIL lock once during
+        * the transaction commit.
+        */
        spin_lock(&cil->xc_cil_lock);
-       for (lv = log_vector; lv; ) {
-               struct xfs_log_vec *next = lv->lv_next;
+       list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+               struct xfs_log_item     *lip = lidp->lid_item;
 
-               ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil));
-               lv->lv_next = NULL;
+               /* Skip items which aren't dirty in this transaction. */
+               if (!(lidp->lid_flags & XFS_LID_DIRTY))
+                       continue;
 
-               /*
-                * xfs_cil_prepare_item() may free the lv, so move the item on
-                * the CIL first.
-                */
-               list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
-               xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
-               lv = next;
+               list_move_tail(&lip->li_cil, &cil->xc_cil);
        }
 
        /* account for space used by new iovec headers  */
        len += diff_iovecs * sizeof(xlog_op_header_t);
        ctx->nvecs += diff_iovecs;
 
+       /* attach the transaction to the CIL if it has any busy extents */
+       if (!list_empty(&tp->t_busy))
+               list_splice_init(&tp->t_busy, &ctx->busy_extents);
+
        /*
         * Now transfer enough transaction reservation to the context ticket
         * for the checkpoint. The context ticket is special - the unit
@@ -316,10 +345,8 @@ xlog_cil_insert_items(
         * during the transaction commit.
         */
        if (ctx->ticket->t_curr_res == 0) {
-               /* first commit in checkpoint, steal the header reservation */
-               ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
                ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-               ticket->t_curr_res -= ctx->ticket->t_unit_res;
+               tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
        }
 
        /* do we need space for more log record headers? */
@@ -333,10 +360,10 @@ xlog_cil_insert_items(
                hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
                ctx->ticket->t_unit_res += hdrs;
                ctx->ticket->t_curr_res += hdrs;
-               ticket->t_curr_res -= hdrs;
-               ASSERT(ticket->t_curr_res >= len);
+               tp->t_ticket->t_curr_res -= hdrs;
+               ASSERT(tp->t_ticket->t_curr_res >= len);
        }
-       ticket->t_curr_res -= len;
+       tp->t_ticket->t_curr_res -= len;
        ctx->space_used += len;
 
        spin_unlock(&cil->xc_cil_lock);
@@ -350,7 +377,6 @@ xlog_cil_free_logvec(
 
        for (lv = log_vector; lv; ) {
                struct xfs_log_vec *next = lv->lv_next;
-               kmem_free(lv->lv_buf);
                kmem_free(lv);
                lv = next;
        }
@@ -376,9 +402,9 @@ xlog_cil_committed(
        xfs_extent_busy_clear(mp, &ctx->busy_extents,
                             (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
 
-       spin_lock(&ctx->cil->xc_cil_lock);
+       spin_lock(&ctx->cil->xc_push_lock);
        list_del(&ctx->committing);
-       spin_unlock(&ctx->cil->xc_cil_lock);
+       spin_unlock(&ctx->cil->xc_push_lock);
 
        xlog_cil_free_logvec(ctx->lv_chain);
 
@@ -433,7 +459,7 @@ xlog_cil_push(
        down_write(&cil->xc_ctx_lock);
        ctx = cil->xc_ctx;
 
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        push_seq = cil->xc_push_seq;
        ASSERT(push_seq <= ctx->sequence);
 
@@ -444,10 +470,10 @@ xlog_cil_push(
         */
        if (list_empty(&cil->xc_cil)) {
                cil->xc_push_seq = 0;
-               spin_unlock(&cil->xc_cil_lock);
+               spin_unlock(&cil->xc_push_lock);
                goto out_skip;
        }
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
 
 
        /* check for a previously pushed seqeunce */
@@ -515,9 +541,9 @@ xlog_cil_push(
         * that higher sequences will wait for us to write out a commit record
         * before they do.
         */
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        list_add(&ctx->committing, &cil->xc_committing);
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
        up_write(&cil->xc_ctx_lock);
 
        /*
@@ -552,7 +578,7 @@ xlog_cil_push(
         * order the commit records so replay will get them in the right order.
         */
 restart:
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
                /*
                 * Higher sequences will wait for this one so skip them.
@@ -565,11 +591,11 @@ restart:
                         * It is still being pushed! Wait for the push to
                         * complete, then start again from the beginning.
                         */
-                       xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+                       xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
                        goto restart;
                }
        }
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
 
        /* xfs_log_done always frees the ticket on error. */
        commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
@@ -588,10 +614,10 @@ restart:
         * callbacks to the iclog we can assign the commit LSN to the context
         * and wake up anyone who is waiting for the commit to complete.
         */
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        ctx->commit_lsn = commit_lsn;
        wake_up_all(&cil->xc_commit_wait);
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
 
        /* release the hounds! */
        return xfs_log_release_iclog(log->l_mp, commit_iclog);
@@ -644,12 +670,12 @@ xlog_cil_push_background(
        if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
                return;
 
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        if (cil->xc_push_seq < cil->xc_current_sequence) {
                cil->xc_push_seq = cil->xc_current_sequence;
                queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
        }
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
 
 }
 
@@ -672,14 +698,14 @@ xlog_cil_push_foreground(
         * If the CIL is empty or we've already pushed the sequence then
         * there's no work we need to do.
         */
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
-               spin_unlock(&cil->xc_cil_lock);
+               spin_unlock(&cil->xc_push_lock);
                return;
        }
 
        cil->xc_push_seq = push_seq;
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
 
        /* do the push now */
        xlog_cil_push(log);
@@ -706,43 +732,25 @@ xfs_log_commit_cil(
        int                     flags)
 {
        struct xlog             *log = mp->m_log;
+       struct xfs_cil          *cil = log->l_cilp;
        int                     log_flags = 0;
-       struct xfs_log_vec      *log_vector;
 
        if (flags & XFS_TRANS_RELEASE_LOG_RES)
                log_flags = XFS_LOG_REL_PERM_RESERV;
 
-       /*
-        * Do all the hard work of formatting items (including memory
-        * allocation) outside the CIL context lock. This prevents stalling CIL
-        * pushes when we are low on memory and a transaction commit spends a
-        * lot of time in memory reclaim.
-        */
-       log_vector = xlog_cil_prepare_log_vecs(tp);
-       if (!log_vector)
-               return ENOMEM;
-
        /* lock out background commit */
-       down_read(&log->l_cilp->xc_ctx_lock);
-       if (commit_lsn)
-               *commit_lsn = log->l_cilp->xc_ctx->sequence;
+       down_read(&cil->xc_ctx_lock);
 
-       /* xlog_cil_insert_items() destroys log_vector list */
-       xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+       xlog_cil_insert_items(log, tp);
 
        /* check we didn't blow the reservation */
        if (tp->t_ticket->t_curr_res < 0)
-               xlog_print_tic_res(log->l_mp, tp->t_ticket);
+               xlog_print_tic_res(mp, tp->t_ticket);
 
-       /* attach the transaction to the CIL if it has any busy extents */
-       if (!list_empty(&tp->t_busy)) {
-               spin_lock(&log->l_cilp->xc_cil_lock);
-               list_splice_init(&tp->t_busy,
-                                       &log->l_cilp->xc_ctx->busy_extents);
-               spin_unlock(&log->l_cilp->xc_cil_lock);
-       }
+       tp->t_commit_lsn = cil->xc_ctx->sequence;
+       if (commit_lsn)
+               *commit_lsn = tp->t_commit_lsn;
 
-       tp->t_commit_lsn = *commit_lsn;
        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
        xfs_trans_unreserve_and_mod_sb(tp);
 
@@ -757,11 +765,11 @@ xfs_log_commit_cil(
         * the log items. This affects (at least) processing of stale buffers,
         * inodes and EFIs.
         */
-       xfs_trans_free_items(tp, *commit_lsn, 0);
+       xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
 
        xlog_cil_push_background(log);
 
-       up_read(&log->l_cilp->xc_ctx_lock);
+       up_read(&cil->xc_ctx_lock);
        return 0;
 }
 
@@ -800,7 +808,7 @@ xlog_cil_force_lsn(
         * on commits for those as well.
         */
 restart:
-       spin_lock(&cil->xc_cil_lock);
+       spin_lock(&cil->xc_push_lock);
        list_for_each_entry(ctx, &cil->xc_committing, committing) {
                if (ctx->sequence > sequence)
                        continue;
@@ -809,7 +817,7 @@ restart:
                         * It is still being pushed! Wait for the push to
                         * complete, then start again from the beginning.
                         */
-                       xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+                       xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
                        goto restart;
                }
                if (ctx->sequence != sequence)
@@ -817,7 +825,7 @@ restart:
                /* found it! */
                commit_lsn = ctx->commit_lsn;
        }
-       spin_unlock(&cil->xc_cil_lock);
+       spin_unlock(&cil->xc_push_lock);
        return commit_lsn;
 }
 
@@ -875,6 +883,7 @@ xlog_cil_init(
        INIT_LIST_HEAD(&cil->xc_cil);
        INIT_LIST_HEAD(&cil->xc_committing);
        spin_lock_init(&cil->xc_cil_lock);
+       spin_lock_init(&cil->xc_push_lock);
        init_rwsem(&cil->xc_ctx_lock);
        init_waitqueue_head(&cil->xc_commit_wait);