Merge branch 'xfs-buf-iosubmit' into for-next

author Dave Chinner <david@fromorbit.com>

Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)

committer Dave Chinner <david@fromorbit.com>

Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)
author Dave Chinner <david@fromorbit.com>
Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)
committer Dave Chinner <david@fromorbit.com>
Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index 4bffffe038a1a8ac329b9dc19e5a885878270088..eff34218f405a0d7473ccfa35221b1f9969f168d 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2209,6 +2209,10 @@ xfs_agf_verify(
               be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
                 return false;
  
+       if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
+           be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
+               return false;
+
         /*
          * during growfs operations, the perag is not fully initialised,
          * so we can't use it for any useful checking. growfs ensures we can't
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index de2d26d328446e65698dca0ff9f4fe2317c3f04e..79c981984dcad0b60e2405cd2ba718405b8bd7ab 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5403,40 +5403,238 @@ error0:
         return error;
  }
  
+/*
+ * Determine whether an extent shift can be accomplished by a merge with the
+ * extent that precedes the target hole of the shift.
+ */
+STATIC bool
+xfs_bmse_can_merge(
+       struct xfs_bmbt_irec    *left,  /* preceding extent */
+       struct xfs_bmbt_irec    *got,   /* current extent to shift */
+       xfs_fileoff_t           shift)  /* shift fsb */
+{
+       xfs_fileoff_t           startoff;
+
+       startoff = got->br_startoff - shift;
+
+       /*
+        * The extent, once shifted, must be adjacent in-file and on-disk with
+        * the preceding extent.
+        */
+       if ((left->br_startoff + left->br_blockcount != startoff) ||
+           (left->br_startblock + left->br_blockcount != got->br_startblock) ||
+           (left->br_state != got->br_state) ||
+           (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
+               return false;
+
+       return true;
+}
+
+/*
+ * A bmap extent shift adjusts the file offset of an extent to fill a preceding
+ * hole in the file. If an extent shift would result in the extent being fully
+ * adjacent to the extent that currently precedes the hole, we can merge with
+ * the preceding extent rather than do the shift.
+ *
+ * This function assumes the caller has verified a shift-by-merge is possible
+ * with the provided extents via xfs_bmse_can_merge().
+ */
+STATIC int
+xfs_bmse_merge(
+       struct xfs_inode                *ip,
+       int                             whichfork,
+       xfs_fileoff_t                   shift,          /* shift fsb */
+       int                             current_ext,    /* idx of gotp */
+       struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
+       struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
+       struct xfs_btree_cur            *cur,
+       int                             *logflags)      /* output */
+{
+       struct xfs_ifork                *ifp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       xfs_filblks_t                   blockcount;
+       int                             error, i;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       xfs_bmbt_get_all(gotp, &got);
+       xfs_bmbt_get_all(leftp, &left);
+       blockcount = left.br_blockcount + got.br_blockcount;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(xfs_bmse_can_merge(&left, &got, shift));
+
+       /*
+        * Merge the in-core extents. Note that the host record pointers and
+        * current_ext index are invalid once the extent has been removed via
+        * xfs_iext_remove().
+        */
+       xfs_bmbt_set_blockcount(leftp, blockcount);
+       xfs_iext_remove(ip, current_ext, 1, 0);
+
+       /*
+        * Update the on-disk extent count, the btree if necessary and log the
+        * inode.
+        */
+       XFS_IFORK_NEXT_SET(ip, whichfork,
+                          XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+       *logflags |= XFS_ILOG_CORE;
+       if (!cur) {
+               *logflags |= XFS_ILOG_DEXT;
+               return 0;
+       }
+
+       /* lookup and remove the extent to merge */
+       error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
+                                  got.br_blockcount, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
+
+       error = xfs_btree_delete(cur, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
+
+       /* lookup and update size of the previous extent */
+       error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
+                                  left.br_blockcount, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
+
+       left.br_blockcount = blockcount;
+
+       error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
+                               left.br_blockcount, left.br_state);
+       if (error)
+               goto out_error;
+
+       return 0;
+
+out_error:
+       return error;
+}
+
+/*
+ * Shift a single extent.
+ */
+STATIC int
+xfs_bmse_shift_one(
+       struct xfs_inode                *ip,
+       int                             whichfork,
+       xfs_fileoff_t                   offset_shift_fsb,
+       int                             *current_ext,
+       struct xfs_bmbt_rec_host        *gotp,
+       struct xfs_btree_cur            *cur,
+       int                             *logflags)
+{
+       struct xfs_ifork                *ifp;
+       xfs_fileoff_t                   startoff;
+       struct xfs_bmbt_rec_host        *leftp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       int                             error;
+       int                             i;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       xfs_bmbt_get_all(gotp, &got);
+       startoff = got.br_startoff - offset_shift_fsb;
+
+       /* delalloc extents should be prevented by caller */
+       XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock),
+                               out_error);
+
+       /*
+        * If this is the first extent in the file, make sure there's enough
+        * room at the start of the file and jump right to the shift as there's
+        * no left extent to merge.
+        */
+       if (*current_ext == 0) {
+               if (got.br_startoff < offset_shift_fsb)
+                       return -EINVAL;
+               goto shift_extent;
+       }
+
+       /* grab the left extent and check for a large enough hole */
+       leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
+       xfs_bmbt_get_all(leftp, &left);
+
+       if (startoff < left.br_startoff + left.br_blockcount)
+               return -EINVAL;
+
+       /* check whether to merge the extent or shift it down */
+       if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb))
+               goto shift_extent;
+
+       return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext,
+                             gotp, leftp, cur, logflags);
+
+shift_extent:
+       /*
+        * Increment the extent index for the next iteration, update the start
+        * offset of the in-core extent and update the btree if applicable.
+        */
+       (*current_ext)++;
+       xfs_bmbt_set_startoff(gotp, startoff);
+       *logflags |= XFS_ILOG_CORE;
+       if (!cur) {
+               *logflags |= XFS_ILOG_DEXT;
+               return 0;
+       }
+
+       error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
+                                  got.br_blockcount, &i);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
+
+       got.br_startoff = startoff;
+       error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
+                               got.br_blockcount, got.br_state);
+       if (error)
+               return error;
+
+       return 0;
+
+out_error:
+       return error;
+}
+
  /*
   * Shift extent records to the left to cover a hole.
   *
- * The maximum number of extents to be shifted in a single operation
- * is @num_exts, and @current_ext keeps track of the current extent
- * index we have shifted. @offset_shift_fsb is the length by which each
- * extent is shifted. If there is no hole to shift the extents
- * into, this will be considered invalid operation and we abort immediately.
+ * The maximum number of extents to be shifted in a single operation is
+ * @num_exts. @start_fsb specifies the file offset to start the shift and the
+ * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
+ * is the length by which each extent is shifted. If there is no hole to shift
+ * the extents into, this will be considered invalid operation and we abort
+ * immediately.
   */
  int
  xfs_bmap_shift_extents(
         struct xfs_trans        *tp,
         struct xfs_inode        *ip,
-       int                     *done,
         xfs_fileoff_t           start_fsb,
         xfs_fileoff_t           offset_shift_fsb,
-       xfs_extnum_t            *current_ext,
+       int                     *done,
+       xfs_fileoff_t           *next_fsb,
         xfs_fsblock_t           *firstblock,
         struct xfs_bmap_free    *flist,
         int                     num_exts)
  {
-       struct xfs_btree_cur            *cur;
+       struct xfs_btree_cur            *cur = NULL;
         struct xfs_bmbt_rec_host        *gotp;
         struct xfs_bmbt_irec            got;
-       struct xfs_bmbt_irec            left;
         struct xfs_mount                *mp = ip->i_mount;
         struct xfs_ifork                *ifp;
         xfs_extnum_t                    nexts = 0;
-       xfs_fileoff_t                   startoff;
+       xfs_extnum_t                    current_ext;
         int                             error = 0;
-       int                             i;
         int                             whichfork = XFS_DATA_FORK;
-       int                             logflags;
-       xfs_filblks_t                   blockcount = 0;
+       int                             logflags = 0;
         int                             total_extents;
  
         if (unlikely(XFS_TEST_ERROR(
@@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       ASSERT(current_ext != NULL);
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
         ifp = XFS_IFORK_PTR(ip, whichfork);
         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5461,142 +5660,62 @@ xfs_bmap_shift_extents(
                         return error;
         }
  
-       /*
-        * If *current_ext is 0, we would need to lookup the extent
-        * from where we would start shifting and store it in gotp.
-        */
-       if (!*current_ext) {
-               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
-               /*
-                * gotp can be null in 2 cases: 1) if there are no extents
-                * or 2) start_fsb lies in a hole beyond which there are
-                * no extents. Either way, we are done.
-                */
-               if (!gotp) {
-                       *done = 1;
-                       return 0;
-               }
-       }
-
-       /* We are going to change core inode */
-       logflags = XFS_ILOG_CORE;
         if (ifp->if_flags & XFS_IFBROOT) {
                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
                 cur->bc_private.b.firstblock = *firstblock;
                 cur->bc_private.b.flist = flist;
                 cur->bc_private.b.flags = 0;
-       } else {
-               cur = NULL;
-               logflags |= XFS_ILOG_DEXT;
+       }
+
+       /*
+        * Look up the extent index for the fsb where we start shifting. We can
+        * henceforth iterate with current_ext as extent list changes are locked
+        * out via ilock.
+        *
+        * gotp can be null in 2 cases: 1) if there are no extents or 2)
+        * start_fsb lies in a hole beyond which there are no extents. Either
+        * way, we are done.
+        */
+       gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
+       if (!gotp) {
+               *done = 1;
+               goto del_cursor;
         }
  
         /*
          * There may be delalloc extents in the data fork before the range we
-        * are collapsing out, so we cannot
-        * use the count of real extents here. Instead we have to calculate it
-        * from the incore fork.
+        * are collapsing out, so we cannot use the count of real extents here.
+        * Instead we have to calculate it from the incore fork.
          */
         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       while (nexts++ < num_exts && *current_ext < total_extents) {
-
-               gotp = xfs_iext_get_ext(ifp, *current_ext);
-               xfs_bmbt_get_all(gotp, &got);
-               startoff = got.br_startoff - offset_shift_fsb;
-
-               /*
-                * Before shifting extent into hole, make sure that the hole
-                * is large enough to accomodate the shift.
-                */
-               if (*current_ext) {
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
-                                               *current_ext - 1), &left);
-
-                       if (startoff < left.br_startoff + left.br_blockcount)
-                               error = -EINVAL;
-               } else if (offset_shift_fsb > got.br_startoff) {
-                       /*
-                        * When first extent is shifted, offset_shift_fsb
-                        * should be less than the stating offset of
-                        * the first extent.
-                        */
-                       error = -EINVAL;
-               }
-
+       while (nexts++ < num_exts && current_ext < total_extents) {
+               error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
+                                       &current_ext, gotp, cur, &logflags);
                 if (error)
                         goto del_cursor;
  
-               if (cur) {
-                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
-                                                  got.br_startblock,
-                                                  got.br_blockcount,
-                                                  &i);
-                       if (error)
-                               goto del_cursor;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-               }
-
-               /* Check if we can merge 2 adjacent extents */
-               if (*current_ext &&
-                   left.br_startoff + left.br_blockcount == startoff &&
-                   left.br_startblock + left.br_blockcount ==
-                               got.br_startblock &&
-                   left.br_state == got.br_state &&
-                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
-                       blockcount = left.br_blockcount +
-                               got.br_blockcount;
-                       xfs_iext_remove(ip, *current_ext, 1, 0);
-                       if (cur) {
-                               error = xfs_btree_delete(cur, &i);
-                               if (error)
-                                       goto del_cursor;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-                       }
-                       XFS_IFORK_NEXT_SET(ip, whichfork,
-                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
-                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
-                       xfs_bmbt_get_all(gotp, &got);
-
-                       /* Make cursor point to the extent we will update */
-                       if (cur) {
-                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
-                                                          got.br_startblock,
-                                                          got.br_blockcount,
-                                                          &i);
-                               if (error)
-                                       goto del_cursor;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-                       }
-
-                       xfs_bmbt_set_blockcount(gotp, blockcount);
-                       got.br_blockcount = blockcount;
-               } else {
-                       /* We have to update the startoff */
-                       xfs_bmbt_set_startoff(gotp, startoff);
-                       got.br_startoff = startoff;
-               }
-
-               if (cur) {
-                       error = xfs_bmbt_update(cur, got.br_startoff,
-                                               got.br_startblock,
-                                               got.br_blockcount,
-                                               got.br_state);
-                       if (error)
-                               goto del_cursor;
-               }
-
-               (*current_ext)++;
+               /* update total extent count and grab the next record */
                 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+               if (current_ext >= total_extents)
+                       break;
+               gotp = xfs_iext_get_ext(ifp, current_ext);
         }
  
         /* Check if we are done */
-       if (*current_ext == total_extents)
+       if (current_ext == total_extents) {
                 *done = 1;
+       } else if (next_fsb) {
+               xfs_bmbt_get_all(gotp, &got);
+               *next_fsb = got.br_startoff;
+       }
  
  del_cursor:
         if (cur)
                 xfs_btree_del_cursor(cur,
                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
  
-       xfs_trans_log_inode(tp, ip, logflags);
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+
         return error;
  }
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h

index b879ca56a64ccfab5b2a42502a5b50f68b85f1df..44db6db8640241c063a88641d5e10d61fe046c54 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
  int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
-               int *done, xfs_fileoff_t start_fsb,
-               xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
-               xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
-               int num_exts);
+               xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
+               int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
+               struct xfs_bmap_free *flist, int num_exts);
  
  #endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c

index c9aee52a37e202987141ddaf1468fe7ee5b03094..7e42fdfd2f1de5f0732689de22aadcdf0c07282f 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype(
  {
         __uint8_t       ftype = dep->name[dep->namelen];
  
-       ASSERT(ftype < XFS_DIR3_FT_MAX);
         if (ftype >= XFS_DIR3_FT_MAX)
                 return XFS_DIR3_FT_UNKNOWN;
         return ftype;
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c

index 6cef22152fd6440d46581bc141b4267b34119aea..7075aaf131f4c5511fbc726ea7c10017b5966e54 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -237,7 +237,8 @@ xfs_dir_init(
  }
  
  /*
-  Enter a name in a directory.
+ * Enter a name in a directory, or check for available space.
+ * If inum is 0, only the available space test is performed.
   */
  int
  xfs_dir_createname(
@@ -254,10 +255,12 @@ xfs_dir_createname(
         int                     v;              /* type-checking value */
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
-       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
-       if (rval)
-               return rval;
-       XFS_STATS_INC(xs_dir_create);
+       if (inum) {
+               rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+               if (rval)
+                       return rval;
+               XFS_STATS_INC(xs_dir_create);
+       }
  
         args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
         if (!args)
@@ -276,6 +279,8 @@ xfs_dir_createname(
         args->whichfork = XFS_DATA_FORK;
         args->trans = tp;
         args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+       if (!inum)
+               args->op_flags |= XFS_DA_OP_JUSTCHECK;
  
         if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
                 rval = xfs_dir2_sf_addname(args);
@@ -535,62 +540,14 @@ out_free:
  
  /*
   * See if this entry can be added to the directory without allocating space.
- * First checks that the caller couldn't reserve enough space (resblks = 0).
   */
  int
  xfs_dir_canenter(
         xfs_trans_t     *tp,
         xfs_inode_t     *dp,
-       struct xfs_name *name,          /* name of entry to add */
-       uint            resblks)
+       struct xfs_name *name)          /* name of entry to add */
  {
-       struct xfs_da_args *args;
-       int             rval;
-       int             v;              /* type-checking value */
-
-       if (resblks)
-               return 0;
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return -ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->dp = dp;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-       args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
-                                                       XFS_DA_OP_OKNOENT;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_addname(args);
-       else
-               rval = xfs_dir2_node_addname(args);
-out_free:
-       kmem_free(args);
-       return rval;
+       return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0);
  }
  
  /*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h

index c8e86b0b5e9954f64b271b7569a198fa5ab1425f..4dff261e6ed59ee8b9749c189554428115e8c4df 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -136,7 +136,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
                                 xfs_fsblock_t *first,
                                 struct xfs_bmap_free *flist, xfs_extlen_t tot);
  extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
-                               struct xfs_name *name, uint resblks);
+                               struct xfs_name *name);
  
  /*
   * Direct call from the bmap code, bypassing the generic directory layer.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index b62771f1f4b5b7028569b32af3b16bc806005132..23dcb72fc5e688f7d550f4cd79b7f4fbac8ec7b3 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1076,8 +1076,8 @@ xfs_dialloc_ag_finobt_newino(
         int i;
  
         if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
-               error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
-                                        &i);
+               error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
+                                        XFS_LOOKUP_EQ, &i);
                 if (error)
                         return error;
                 if (i == 1) {
@@ -1085,7 +1085,6 @@ xfs_dialloc_ag_finobt_newino(
                         if (error)
                                 return error;
                         XFS_WANT_CORRUPTED_RETURN(i == 1);
-
                         return 0;
                 }
         }
@@ -2051,6 +2050,8 @@ xfs_agi_verify(
         if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
                 return false;
  
+       if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
+               return false;
         /*
          * during growfs operations, the perag is not fully initialised,
          * so we can't use it for any useful checking. growfs ensures we can't
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c

index f4dd697cac08720f057b58c094bc4bfe3346ca5b..7c818f1e448416da6bcfae580a6aa898e5114bbb 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -424,20 +424,24 @@ xfs_rtfind_forw(
  }
  
  /*
- * Read and modify the summary information for a given extent size,
+ * Read and/or modify the summary information for a given extent size,
   * bitmap block combination.
   * Keeps track of a current summary block, so we don't keep reading
   * it from the buffer cache.
+ *
+ * Summary information is returned in *sum if specified.
+ * If no delta is specified, returns summary only.
   */
  int
-xfs_rtmodify_summary(
-       xfs_mount_t     *mp,            /* file system mount point */
+xfs_rtmodify_summary_int(
+       xfs_mount_t     *mp,            /* file system mount structure */
         xfs_trans_t     *tp,            /* transaction pointer */
         int             log,            /* log2 of extent size */
         xfs_rtblock_t   bbno,           /* bitmap block number */
         int             delta,          /* change to make to summary info */
         xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
-       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+       xfs_fsblock_t   *rsb,           /* in/out: summary block number */
+       xfs_suminfo_t   *sum)           /* out: summary info for this block */
  {
         xfs_buf_t       *bp;            /* buffer for the summary block */
         int             error;          /* error value */
@@ -456,7 +460,7 @@ xfs_rtmodify_summary(
         /*
          * If we have an old buffer, and the block number matches, use that.
          */
-       if (rbpp && *rbpp && *rsb == sb)
+       if (*rbpp && *rsb == sb)
                 bp = *rbpp;
         /*
          * Otherwise we have to get the buffer.
@@ -465,7 +469,7 @@ xfs_rtmodify_summary(
                 /*
                  * If there was an old one, get rid of it first.
                  */
-               if (rbpp && *rbpp)
+               if (*rbpp)
                         xfs_trans_brelse(tp, *rbpp);
                 error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
                 if (error) {
@@ -474,21 +478,38 @@ xfs_rtmodify_summary(
                 /*
                  * Remember this buffer and block for the next call.
                  */
-               if (rbpp) {
-                       *rbpp = bp;
-                       *rsb = sb;
-               }
+               *rbpp = bp;
+               *rsb = sb;
         }
         /*
-        * Point to the summary information, modify and log it.
+        * Point to the summary information, modify/log it, and/or copy it out.
          */
         sp = XFS_SUMPTR(mp, bp, so);
-       *sp += delta;
-       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
-               (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
+       if (delta) {
+               uint first = (uint)((char *)sp - (char *)bp->b_addr);
+
+               *sp += delta;
+               xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
+       }
+       if (sum)
+               *sum = *sp;
         return 0;
  }
  
+int
+xfs_rtmodify_summary(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       int             log,            /* log2 of extent size */
+       xfs_rtblock_t   bbno,           /* bitmap block number */
+       int             delta,          /* change to make to summary info */
+       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+{
+       return xfs_rtmodify_summary_int(mp, tp, log, bbno,
+                                       delta, rbpp, rsb, NULL);
+}
+
  /*
   * Set the given range of bitmap bits to the given value.
   * Do whatever I/O and logging is required.
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index ad525a5623a49557c9554f23aa72d323e9ed8b77..8426e5e2682ef47e677a174b9e4d12e58fe190a1 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -279,11 +279,13 @@ xfs_mount_validate_sb(
             sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG                    ||
             sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG                    ||
             sbp->sb_blocksize != (1 << sbp->sb_blocklog)                ||
+           sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG                   ||
             sbp->sb_inodesize < XFS_DINODE_MIN_SIZE                     ||
             sbp->sb_inodesize > XFS_DINODE_MAX_SIZE                     ||
             sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
             sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
             sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
+           sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE                    ||
             sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
             (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
             (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 11e9b4caa54f168f7e429f5d7e5a01302d026e1c..2f502537a39c6bfe57466c6c66ad9b5d3fa63fba 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -434,10 +434,22 @@ xfs_start_page_writeback(
  {
         ASSERT(PageLocked(page));
         ASSERT(!PageWriteback(page));
-       if (clear_dirty)
+
+       /*
+        * if the page was not fully cleaned, we need to ensure that the higher
+        * layers come back to it correctly. That means we need to keep the page
+        * dirty, and for WB_SYNC_ALL writeback we need to ensure the
+        * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
+        * write this page in this writeback sweep will be made.
+        */
+       if (clear_dirty) {
                 clear_page_dirty_for_io(page);
-       set_page_writeback(page);
+               set_page_writeback(page);
+       } else
+               set_page_writeback_keepwrite(page);
+
         unlock_page(page);
+
         /* If no buffers on the page are to be written, finish it here */
         if (!buffers)
                 end_page_writeback(page);
@@ -1753,11 +1765,72 @@ xfs_vm_readpages(
         return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
  }
  
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+       struct page             *page)
+{
+       struct address_space    *mapping = page->mapping;
+       struct inode            *inode = mapping->host;
+       loff_t                  end_offset;
+       loff_t                  offset;
+       int                     newly_dirty;
+
+       if (unlikely(!mapping))
+               return !TestSetPageDirty(page);
+
+       end_offset = i_size_read(inode);
+       offset = page_offset(page);
+
+       spin_lock(&mapping->private_lock);
+       if (page_has_buffers(page)) {
+               struct buffer_head *head = page_buffers(page);
+               struct buffer_head *bh = head;
+
+               do {
+                       if (offset < end_offset)
+                               set_buffer_dirty(bh);
+                       bh = bh->b_this_page;
+                       offset += 1 << inode->i_blkbits;
+               } while (bh != head);
+       }
+       newly_dirty = !TestSetPageDirty(page);
+       spin_unlock(&mapping->private_lock);
+
+       if (newly_dirty) {
+               /* sigh - __set_page_dirty() is static, so copy it here, too */
+               unsigned long flags;
+
+               spin_lock_irqsave(&mapping->tree_lock, flags);
+               if (page->mapping) {    /* Race with truncate? */
+                       WARN_ON_ONCE(!PageUptodate(page));
+                       account_page_dirtied(page, mapping);
+                       radix_tree_tag_set(&mapping->page_tree,
+                                       page_index(page), PAGECACHE_TAG_DIRTY);
+               }
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       }
+       return newly_dirty;
+}
+
  const struct address_space_operations xfs_address_space_operations = {
         .readpage               = xfs_vm_readpage,
         .readpages              = xfs_vm_readpages,
         .writepage              = xfs_vm_writepage,
         .writepages             = xfs_vm_writepages,
+       .set_page_dirty         = xfs_vm_set_page_dirty,
         .releasepage            = xfs_vm_releasepage,
         .invalidatepage         = xfs_vm_invalidatepage,
         .write_begin            = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index aa70620e0e62dffb649cc760ab4268d9e314bf86..c2aaa58e59eea7392341533b80159a0befda4c2c 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1179,6 +1179,7 @@ xfs_free_file_space(
         xfs_bmap_free_t         free_list;
         xfs_bmbt_irec_t         imap;
         xfs_off_t               ioffset;
+       xfs_off_t               iendoffset;
         xfs_extlen_t            mod=0;
         xfs_mount_t             *mp;
         int                     nimap;
@@ -1207,12 +1208,13 @@ xfs_free_file_space(
         inode_dio_wait(VFS_I(ip));
  
         rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
-       ioffset = offset & ~(rounding - 1);
-       error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                             ioffset, -1);
+       ioffset = round_down(offset, rounding);
+       iendoffset = round_up(offset + len, rounding) - 1;
+       error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
+                                            iendoffset);
         if (error)
                 goto out;
-       truncate_pagecache_range(VFS_I(ip), ioffset, -1);
+       truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
  
         /*
          * Need to zero the stuff we're not freeing, on disk.
@@ -1430,24 +1432,50 @@ xfs_collapse_file_space(
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
         int                     error;
-       xfs_extnum_t            current_ext = 0;
         struct xfs_bmap_free    free_list;
         xfs_fsblock_t           first_block;
         int                     committed;
         xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           next_fsb;
         xfs_fileoff_t           shift_fsb;
  
         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
  
         trace_xfs_collapse_file_space(ip);
  
-       start_fsb = XFS_B_TO_FSB(mp, offset + len);
+       next_fsb = XFS_B_TO_FSB(mp, offset + len);
         shift_fsb = XFS_B_TO_FSB(mp, len);
  
         error = xfs_free_file_space(ip, offset, len);
         if (error)
                 return error;
  
+       /*
+        * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
+        * into the accessible region of the file.
+        */
+       if (xfs_can_free_eofblocks(ip, true)) {
+               error = xfs_free_eofblocks(mp, ip, false);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Writeback and invalidate cache for the remainder of the file as we're
+        * about to shift down every extent from the collapse range to EOF. The
+        * free of the collapse range above might have already done some of
+        * this, but we shouldn't rely on it to do anything outside of the range
+        * that was freed.
+        */
+       error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+                                            offset + len, -1);
+       if (error)
+               return error;
+       error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+                                       (offset + len) >> PAGE_CACHE_SHIFT, -1);
+       if (error)
+               return error;
+
         while (!error && !done) {
                 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
                 /*
@@ -1479,10 +1507,10 @@ xfs_collapse_file_space(
                  * We are using the write transaction in which max 2 bmbt
                  * updates are allowed
                  */
-               error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
-                                              shift_fsb, &current_ext,
-                                              &first_block, &free_list,
-                                              XFS_BMAP_MAX_SHIFT_EXTENTS);
+               start_fsb = next_fsb;
+               error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
+                               &done, &next_fsb, &first_block, &free_list,
+                               XFS_BMAP_MAX_SHIFT_EXTENTS);
                 if (error)
                         goto out;
  
@@ -1592,7 +1620,7 @@ xfs_swap_extents_check_format(
         return 0;
  }
  
-int
+static int
  xfs_swap_extent_flush(
         struct xfs_inode        *ip)
  {
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 6fbcbbfb4d1a894248780537a5bbaab3374d7c9f..017b6afe340b891db5b487b6c30740f08613ff2b 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1885,7 +1885,7 @@ xfs_buf_init(void)
                 goto out;
  
         xfslogd_workqueue = alloc_workqueue("xfslogd",
-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
+                               WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1);
         if (!xfslogd_workqueue)
                 goto out_free_buf_zone;
  
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index cbea9099b843d4c406e2ca8f686fc03151360a3d..f1596954332616f46ec00031a37c6888b53f1117 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -501,7 +501,7 @@ xfs_buf_item_unpin(
   * buffer being bad..
   */
  
-DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
+static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
  
  STATIC uint
  xfs_buf_item_push(
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 076b1708d1345474ec2fd4e3a4c2e3bc605cb75a..eb596b4199420fa8378dcd49146640c9783037b4 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -291,12 +291,22 @@ xfs_file_read_iter(
                 if (inode->i_mapping->nrpages) {
                         ret = filemap_write_and_wait_range(
                                                         VFS_I(ip)->i_mapping,
-                                                       pos, -1);
+                                                       pos, pos + size - 1);
                         if (ret) {
                                 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
                                 return ret;
                         }
-                       truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+                       /*
+                        * Invalidate whole pages. This can return an error if
+                        * we fail to invalidate a page, but this should never
+                        * happen on XFS. Warn if it does fail.
+                        */
+                       ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+                                       pos >> PAGE_CACHE_SHIFT,
+                                       (pos + size - 1) >> PAGE_CACHE_SHIFT);
+                       WARN_ON_ONCE(ret);
+                       ret = 0;
                 }
                 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
         }
@@ -632,10 +642,19 @@ xfs_file_dio_aio_write(
  
         if (mapping->nrpages) {
                 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                   pos, -1);
+                                                   pos, pos + count - 1);
                 if (ret)
                         goto out;
-               truncate_pagecache_range(VFS_I(ip), pos, -1);
+               /*
+                * Invalidate whole pages. This can return an error if
+                * we fail to invalidate a page, but this should never
+                * happen on XFS. Warn if it does fail.
+                */
+               ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+                                       pos >> PAGE_CACHE_SHIFT,
+                                       (pos + count - 1) >> PAGE_CACHE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
         }
  
         /*
@@ -964,7 +983,7 @@ xfs_vm_page_mkwrite(
  
  /*
   * This type is designed to indicate the type of offset we would like
- * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
+ * to search from page cache for xfs_seek_hole_data().
   */
  enum {
         HOLE_OFF = 0,
@@ -1021,7 +1040,7 @@ xfs_lookup_buffer_offset(
  /*
   * This routine is called to find out and return a data or hole offset
   * from the page cache for unwritten extents according to the desired
- * type for xfs_seek_data() or xfs_seek_hole().
+ * type for xfs_seek_hole_data().
   *
   * The argument offset is used to tell where we start to search from the
   * page cache.  Map is used to figure out the end points of the range to
@@ -1181,9 +1200,10 @@ out:
  }
  
  STATIC loff_t
-xfs_seek_data(
+xfs_seek_hole_data(
         struct file             *file,
-       loff_t                  start)
+       loff_t                  start,
+       int                     whence)
  {
         struct inode            *inode = file->f_mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
@@ -1195,6 +1215,9 @@ xfs_seek_data(
         uint                    lock;
         int                     error;
  
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
         lock = xfs_ilock_data_map_shared(ip);
  
         isize = i_size_read(inode);
@@ -1209,6 +1232,7 @@ xfs_seek_data(
          */
         fsbno = XFS_B_TO_FSBT(mp, start);
         end = XFS_B_TO_FSB(mp, isize);
+
         for (;;) {
                 struct xfs_bmbt_irec    map[2];
                 int                     nmap = 2;
@@ -1229,29 +1253,48 @@ xfs_seek_data(
                         offset = max_t(loff_t, start,
                                        XFS_FSB_TO_B(mp, map[i].br_startoff));
  
-                       /* Landed in a data extent */
-                       if (map[i].br_startblock == DELAYSTARTBLOCK ||
-                           (map[i].br_state == XFS_EXT_NORM &&
-                            !isnullstartblock(map[i].br_startblock)))
+                       /* Landed in the hole we wanted? */
+                       if (whence == SEEK_HOLE &&
+                           map[i].br_startblock == HOLESTARTBLOCK)
+                               goto out;
+
+                       /* Landed in the data extent we wanted? */
+                       if (whence == SEEK_DATA &&
+                           (map[i].br_startblock == DELAYSTARTBLOCK ||
+                            (map[i].br_state == XFS_EXT_NORM &&
+                             !isnullstartblock(map[i].br_startblock))))
                                 goto out;
  
                         /*
-                        * Landed in an unwritten extent, try to search data
-                        * from page cache.
+                        * Landed in an unwritten extent, try to search
+                        * for hole or data from page cache.
                          */
                         if (map[i].br_state == XFS_EXT_UNWRITTEN) {
                                 if (xfs_find_get_desired_pgoff(inode, &map[i],
-                                                       DATA_OFF, &offset))
+                                     whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
+                                                       &offset))
                                         goto out;
                         }
                 }
  
                 /*
-                * map[0] is hole or its an unwritten extent but
-                * without data in page cache.  Probably means that
-                * we are reading after EOF if nothing in map[1].
+                * We only received one extent out of the two requested. This
+                * means we've hit EOF and didn't find what we are looking for.
                  */
                 if (nmap == 1) {
+                       /*
+                        * If we were looking for a hole, set offset to
+                        * the end of the file (i.e., there is an implicit
+                        * hole at the end of any file).
+                        */
+                       if (whence == SEEK_HOLE) {
+                               offset = isize;
+                               break;
+                       }
+                       /*
+                        * If we were looking for data, it's nowhere to be found
+                        */
+                       ASSERT(whence == SEEK_DATA);
                         error = -ENXIO;
                         goto out_unlock;
                 }
@@ -1260,125 +1303,30 @@ xfs_seek_data(
  
                 /*
                  * Nothing was found, proceed to the next round of search
-                * if reading offset not beyond or hit EOF.
+                * if the next reading offset is not at or beyond EOF.
                  */
                 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
                 start = XFS_FSB_TO_B(mp, fsbno);
                 if (start >= isize) {
+                       if (whence == SEEK_HOLE) {
+                               offset = isize;
+                               break;
+                       }
+                       ASSERT(whence == SEEK_DATA);
                         error = -ENXIO;
                         goto out_unlock;
                 }
         }
  
-out:
-       offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-
-out_unlock:
-       xfs_iunlock(ip, lock);
-
-       if (error)
-               return error;
-       return offset;
-}
-
-STATIC loff_t
-xfs_seek_hole(
-       struct file             *file,
-       loff_t                  start)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       loff_t                  uninitialized_var(offset);
-       xfs_fsize_t             isize;
-       xfs_fileoff_t           fsbno;
-       xfs_filblks_t           end;
-       uint                    lock;
-       int                     error;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       lock = xfs_ilock_data_map_shared(ip);
-
-       isize = i_size_read(inode);
-       if (start >= isize) {
-               error = -ENXIO;
-               goto out_unlock;
-       }
-
-       fsbno = XFS_B_TO_FSBT(mp, start);
-       end = XFS_B_TO_FSB(mp, isize);
-
-       for (;;) {
-               struct xfs_bmbt_irec    map[2];
-               int                     nmap = 2;
-               unsigned int            i;
-
-               error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
-                                      XFS_BMAPI_ENTIRE);
-               if (error)
-                       goto out_unlock;
-
-               /* No extents at given offset, must be beyond EOF */
-               if (nmap == 0) {
-                       error = -ENXIO;
-                       goto out_unlock;
-               }
-
-               for (i = 0; i < nmap; i++) {
-                       offset = max_t(loff_t, start,
-                                      XFS_FSB_TO_B(mp, map[i].br_startoff));
-
-                       /* Landed in a hole */
-                       if (map[i].br_startblock == HOLESTARTBLOCK)
-                               goto out;
-
-                       /*
-                        * Landed in an unwritten extent, try to search hole
-                        * from page cache.
-                        */
-                       if (map[i].br_state == XFS_EXT_UNWRITTEN) {
-                               if (xfs_find_get_desired_pgoff(inode, &map[i],
-                                                       HOLE_OFF, &offset))
-                                       goto out;
-                       }
-               }
-
-               /*
-                * map[0] contains data or its unwritten but contains
-                * data in page cache, probably means that we are
-                * reading after EOF.  We should fix offset to point
-                * to the end of the file(i.e., there is an implicit
-                * hole at the end of any file).
-                */
-               if (nmap == 1) {
-                       offset = isize;
-                       break;
-               }
-
-               ASSERT(i > 1);
-
-               /*
-                * Both mappings contains data, proceed to the next round of
-                * search if the current reading offset not beyond or hit EOF.
-                */
-               fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
-               start = XFS_FSB_TO_B(mp, fsbno);
-               if (start >= isize) {
-                       offset = isize;
-                       break;
-               }
-       }
-
  out:
         /*
-        * At this point, we must have found a hole.  However, the returned
+        * If at this point we have found the hole we wanted, the returned
          * offset may be bigger than the file size as it may be aligned to
-        * page boundary for unwritten extents, we need to deal with this
+        * page boundary for unwritten extents.  We need to deal with this
          * situation in particular.
          */
-       offset = min_t(loff_t, offset, isize);
+       if (whence == SEEK_HOLE)
+               offset = min_t(loff_t, offset, isize);
         offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
  
  out_unlock:
@@ -1393,17 +1341,16 @@ STATIC loff_t
  xfs_file_llseek(
         struct file     *file,
         loff_t          offset,
-       int             origin)
+       int             whence)
  {
-       switch (origin) {
+       switch (whence) {
         case SEEK_END:
         case SEEK_CUR:
         case SEEK_SET:
-               return generic_file_llseek(file, offset, origin);
-       case SEEK_DATA:
-               return xfs_seek_data(file, offset);
+               return generic_file_llseek(file, offset, whence);
         case SEEK_HOLE:
-               return xfs_seek_hole(file, offset);
+       case SEEK_DATA:
+               return xfs_seek_hole_data(file, offset, whence);
         default:
                 return -EINVAL;
         }
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c

index 5399ef222dd738b85d39096ac7db5a39ebb8c359..4d41b241298fba5031fb93e59e39e209bf36b308 100644 (file)
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -43,3 +43,7 @@ xfs_param_t xfs_params = {
         .fstrm_timer    = {     1,              30*100,         3600*100},
         .eofb_timer     = {     1,              300,            3600*24},
  };
+
+struct xfs_globals xfs_globals = {
+       .log_recovery_delay     =       0,      /* no delay by default */
+};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 981b2cf519853f72c91dff609c90b78eb1626c4e..b45f7b27b5dff8e7b0b3113ea8668edc03f1a805 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,7 +33,6 @@
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_bmap_util.h"
-#include "xfs_quota.h"
  #include "xfs_dquot_item.h"
  #include "xfs_dquot.h"
  
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 00d210bbf3c38143a648686400e510436e88e1c9..e5bbc1f30f16eb211ef6a44103db9bd69c94c483 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1153,9 +1153,11 @@ xfs_create(
         if (error)
                 goto out_trans_cancel;
  
-       error = xfs_dir_canenter(tp, dp, name, resblks);
-       if (error)
-               goto out_trans_cancel;
+       if (!resblks) {
+               error = xfs_dir_canenter(tp, dp, name);
+               if (error)
+                       goto out_trans_cancel;
+       }
  
         /*
          * A newly created regular or special file just has one directory
@@ -1421,9 +1423,11 @@ xfs_link(
                 goto error_return;
         }
  
-       error = xfs_dir_canenter(tp, tdp, target_name, resblks);
-       if (error)
-               goto error_return;
+       if (!resblks) {
+               error = xfs_dir_canenter(tp, tdp, target_name);
+               if (error)
+                       goto error_return;
+       }
  
         xfs_bmap_init(&free_list, &first_block);
  
@@ -2759,9 +2763,11 @@ xfs_rename(
                  * If there's no space reservation, check the entry will
                  * fit before actually inserting it.
                  */
-               error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
-               if (error)
-                       goto error_return;
+               if (!spaceres) {
+                       error = xfs_dir_canenter(tp, target_dp, target_name);
+                       if (error)
+                               goto error_return;
+               }
                 /*
                  * If target does not exist and the rename crosses
                  * directories, adjust the target directory link count
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 3799695b92495a02e7a15722fbe77b842b77efa2..7a6b406ce6c9c9592bb9b4f79111f8268ff10f83 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1349,7 +1349,7 @@ xfs_ioc_setxflags(
  STATIC int
  xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
  {
-       struct getbmap __user   *base = *ap;
+       struct getbmap __user   *base = (struct getbmap __user *)*ap;
  
         /* copy only getbmap portion (not getbmapx) */
         if (copy_to_user(base, bmv, sizeof(struct getbmap)))
@@ -1380,7 +1380,7 @@ xfs_ioc_getbmap(
                 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
  
         error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
-                           (struct getbmap *)arg+1);
+                           (__force struct getbmap *)arg+1);
         if (error)
                 return error;
  
@@ -1393,7 +1393,7 @@ xfs_ioc_getbmap(
  STATIC int
  xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
  {
-       struct getbmapx __user  *base = *ap;
+       struct getbmapx __user  *base = (struct getbmapx __user *)*ap;
  
         if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
                 return -EFAULT;
@@ -1420,7 +1420,7 @@ xfs_ioc_getbmapx(
                 return -EINVAL;
  
         error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-                           (struct getbmapx *)arg+1);
+                           (__force struct getbmapx *)arg+1);
         if (error)
                 return error;
  
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 72129493e9d3563687a6606223a123692fe13a5c..ec6dcdc181ee45fc8a74bb6a018f1fbab01dc1e7 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -849,6 +849,36 @@ xfs_setattr_size(
                 return error;
         truncate_setsize(inode, newsize);
  
+       /*
+        * The "we can't serialise against page faults" pain gets worse.
+        *
+        * If the file is mapped then we have to clean the page at the old EOF
+        * when extending the file. Extending the file can expose changes the
+        * underlying page mapping (e.g. from beyond EOF to a hole or
+        * unwritten), and so on the next attempt to write to that page we need
+        * to remap it for write. i.e. we need .page_mkwrite() to be called.
+        * Hence we need to clean the page to clean the pte and so a new write
+        * fault will be triggered appropriately.
+        *
+        * If we do it before we change the inode size, then we can race with a
+        * page fault that maps the page with exactly the same problem. If we do
+        * it after we change the file size, then a new page fault can come in
+        * and allocate space before we've run the rest of the truncate
+        * transaction. That's kinda grotesque, but it's better than have data
+        * over a hole, and so that's the lesser evil that has been chosen here.
+        *
+        * The real solution, however, is to have some mechanism for locking out
+        * page faults while a truncate is in progress.
+        */
+       if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
+               error = filemap_write_and_wait_range(
+                               VFS_I(ip)->i_mapping,
+                               round_down(oldsize, PAGE_CACHE_SIZE),
+                               round_up(oldsize, PAGE_CACHE_SIZE) - 1);
+               if (error)
+                       return error;
+       }
+
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
         if (error)
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index f6b79e5325dd4426da54b940fd1bea0ca170a6e8..f506c457011eb1970128fa637dce35500d2a4b54 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -463,12 +463,40 @@ xlog_cil_push(
                 spin_unlock(&cil->xc_push_lock);
                 goto out_skip;
         }
-       spin_unlock(&cil->xc_push_lock);
  
  
         /* check for a previously pushed seqeunce */
-       if (push_seq < cil->xc_ctx->sequence)
+       if (push_seq < cil->xc_ctx->sequence) {
+               spin_unlock(&cil->xc_push_lock);
                 goto out_skip;
+       }
+
+       /*
+        * We are now going to push this context, so add it to the committing
+        * list before we do anything else. This ensures that anyone waiting on
+        * this push can easily detect the difference between a "push in
+        * progress" and "CIL is empty, nothing to do".
+        *
+        * IOWs, a wait loop can now check for:
+        *      the current sequence not being found on the committing list;
+        *      an empty CIL; and
+        *      an unchanged sequence number
+        * to detect a push that had nothing to do and therefore does not need
+        * waiting on. If the CIL is not empty, we get put on the committing
+        * list before emptying the CIL and bumping the sequence number. Hence
+        * an empty CIL and an unchanged sequence number means we jumped out
+        * above after doing nothing.
+        *
+        * Hence the waiter will either find the commit sequence on the
+        * committing list or the sequence number will be unchanged and the CIL
+        * still dirty. In that latter case, the push has not yet started, and
+        * so the waiter will have to continue trying to check the CIL
+        * committing list until it is found. In extreme cases of delay, the
+        * sequence may fully commit between the attempts the wait makes to wait
+        * on the commit sequence.
+        */
+       list_add(&ctx->committing, &cil->xc_committing);
+       spin_unlock(&cil->xc_push_lock);
  
         /*
          * pull all the log vectors off the items in the CIL, and
@@ -532,7 +560,6 @@ xlog_cil_push(
          */
         spin_lock(&cil->xc_push_lock);
         cil->xc_current_sequence = new_ctx->sequence;
-       list_add(&ctx->committing, &cil->xc_committing);
         spin_unlock(&cil->xc_push_lock);
         up_write(&cil->xc_ctx_lock);
  
@@ -855,13 +882,15 @@ restart:
          * Hence by the time we have got here it our sequence may not have been
          * pushed yet. This is true if the current sequence still matches the
          * push sequence after the above wait loop and the CIL still contains
-        * dirty objects.
+        * dirty objects. This is guaranteed by the push code first adding the
+        * context to the committing list before emptying the CIL.
          *
-        * When the push occurs, it will empty the CIL and atomically increment
-        * the currect sequence past the push sequence and move it into the
-        * committing list. Of course, if the CIL is clean at the time of the
-        * push, it won't have pushed the CIL at all, so in that case we should
-        * try the push for this sequence again from the start just in case.
+        * Hence if we don't find the context in the committing list and the
+        * current sequence number is unchanged then the CIL contents are
+        * significant.  If the CIL is empty, if means there was nothing to push
+        * and that means there is nothing to wait for. If the CIL is not empty,
+        * it means we haven't yet started the push, because if it had started
+        * we would have found the context on the committing list.
          */
         if (sequence == cil->xc_current_sequence &&
             !list_empty(&cil->xc_cil)) {
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 980e2968b90720329951cdb4a0b144e086a6485f..00cd7f3a8f596362bd2ae4ebe19f1211ca5d3d4e 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1443,160 +1443,6 @@ xlog_clear_stale_blocks(
   ******************************************************************************
   */
  
-STATIC xlog_recover_t *
-xlog_recover_find_tid(
-       struct hlist_head       *head,
-       xlog_tid_t              tid)
-{
-       xlog_recover_t          *trans;
-
-       hlist_for_each_entry(trans, head, r_list) {
-               if (trans->r_log_tid == tid)
-                       return trans;
-       }
-       return NULL;
-}
-
-STATIC void
-xlog_recover_new_tid(
-       struct hlist_head       *head,
-       xlog_tid_t              tid,
-       xfs_lsn_t               lsn)
-{
-       xlog_recover_t          *trans;
-
-       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
-       trans->r_log_tid   = tid;
-       trans->r_lsn       = lsn;
-       INIT_LIST_HEAD(&trans->r_itemq);
-
-       INIT_HLIST_NODE(&trans->r_list);
-       hlist_add_head(&trans->r_list, head);
-}
-
-STATIC void
-xlog_recover_add_item(
-       struct list_head        *head)
-{
-       xlog_recover_item_t     *item;
-
-       item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
-       INIT_LIST_HEAD(&item->ri_list);
-       list_add_tail(&item->ri_list, head);
-}
-
-STATIC int
-xlog_recover_add_to_cont_trans(
-       struct xlog             *log,
-       struct xlog_recover     *trans,
-       xfs_caddr_t             dp,
-       int                     len)
-{
-       xlog_recover_item_t     *item;
-       xfs_caddr_t             ptr, old_ptr;
-       int                     old_len;
-
-       if (list_empty(&trans->r_itemq)) {
-               /* finish copying rest of trans header */
-               xlog_recover_add_item(&trans->r_itemq);
-               ptr = (xfs_caddr_t) &trans->r_theader +
-                               sizeof(xfs_trans_header_t) - len;
-               memcpy(ptr, dp, len); /* d, s, l */
-               return 0;
-       }
-       /* take the tail entry */
-       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
-
-       old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
-       old_len = item->ri_buf[item->ri_cnt-1].i_len;
-
-       ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
-       memcpy(&ptr[old_len], dp, len); /* d, s, l */
-       item->ri_buf[item->ri_cnt-1].i_len += len;
-       item->ri_buf[item->ri_cnt-1].i_addr = ptr;
-       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
-       return 0;
-}
-
-/*
- * The next region to add is the start of a new region.  It could be
- * a whole region or it could be the first part of a new region.  Because
- * of this, the assumption here is that the type and size fields of all
- * format structures fit into the first 32 bits of the structure.
- *
- * This works because all regions must be 32 bit aligned.  Therefore, we
- * either have both fields or we have neither field.  In the case we have
- * neither field, the data part of the region is zero length.  We only have
- * a log_op_header and can throw away the header since a new one will appear
- * later.  If we have at least 4 bytes, then we can determine how many regions
- * will appear in the current log item.
- */
-STATIC int
-xlog_recover_add_to_trans(
-       struct xlog             *log,
-       struct xlog_recover     *trans,
-       xfs_caddr_t             dp,
-       int                     len)
-{
-       xfs_inode_log_format_t  *in_f;                  /* any will do */
-       xlog_recover_item_t     *item;
-       xfs_caddr_t             ptr;
-
-       if (!len)
-               return 0;
-       if (list_empty(&trans->r_itemq)) {
-               /* we need to catch log corruptions here */
-               if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
-                       xfs_warn(log->l_mp, "%s: bad header magic number",
-                               __func__);
-                       ASSERT(0);
-                       return -EIO;
-               }
-               if (len == sizeof(xfs_trans_header_t))
-                       xlog_recover_add_item(&trans->r_itemq);
-               memcpy(&trans->r_theader, dp, len); /* d, s, l */
-               return 0;
-       }
-
-       ptr = kmem_alloc(len, KM_SLEEP);
-       memcpy(ptr, dp, len);
-       in_f = (xfs_inode_log_format_t *)ptr;
-
-       /* take the tail entry */
-       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
-       if (item->ri_total != 0 &&
-            item->ri_total == item->ri_cnt) {
-               /* tail item is in use, get a new one */
-               xlog_recover_add_item(&trans->r_itemq);
-               item = list_entry(trans->r_itemq.prev,
-                                       xlog_recover_item_t, ri_list);
-       }
-
-       if (item->ri_total == 0) {              /* first region to be added */
-               if (in_f->ilf_size == 0 ||
-                   in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
-                       xfs_warn(log->l_mp,
-               "bad number of regions (%d) in inode log format",
-                                 in_f->ilf_size);
-                       ASSERT(0);
-                       kmem_free(ptr);
-                       return -EIO;
-               }
-
-               item->ri_total = in_f->ilf_size;
-               item->ri_buf =
-                       kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
-                                   KM_SLEEP);
-       }
-       ASSERT(item->ri_total > item->ri_cnt);
-       /* Description region is ri_buf[0] */
-       item->ri_buf[item->ri_cnt].i_addr = ptr;
-       item->ri_buf[item->ri_cnt].i_len  = len;
-       item->ri_cnt++;
-       trace_xfs_log_recover_item_add(log, trans, item, 0);
-       return 0;
-}
-
  /*
   * Sort the log items in the transaction.
   *
@@ -3252,31 +3098,6 @@ xlog_recover_do_icreate_pass2(
         return 0;
  }
  
-/*
- * Free up any resources allocated by the transaction
- *
- * Remember that EFIs, EFDs, and IUNLINKs are handled later.
- */
-STATIC void
-xlog_recover_free_trans(
-       struct xlog_recover     *trans)
-{
-       xlog_recover_item_t     *item, *n;
-       int                     i;
-
-       list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
-               /* Free the regions in the item. */
-               list_del(&item->ri_list);
-               for (i = 0; i < item->ri_cnt; i++)
-                       kmem_free(item->ri_buf[i].i_addr);
-               /* Free the item itself */
-               kmem_free(item->ri_buf);
-               kmem_free(item);
-       }
-       /* Free the transaction recover structure */
-       kmem_free(trans);
-}
-
  STATIC void
  xlog_recover_buffer_ra_pass2(
         struct xlog                     *log,
@@ -3526,21 +3347,308 @@ out:
         if (!list_empty(&done_list))
                 list_splice_init(&done_list, &trans->r_itemq);
  
-       xlog_recover_free_trans(trans);
-
         error2 = xfs_buf_delwri_submit(&buffer_list);
         return error ? error : error2;
  }
  
+STATIC void
+xlog_recover_add_item(
+       struct list_head        *head)
+{
+       xlog_recover_item_t     *item;
+
+       item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
+       INIT_LIST_HEAD(&item->ri_list);
+       list_add_tail(&item->ri_list, head);
+}
+
  STATIC int
-xlog_recover_unmount_trans(
-       struct xlog             *log)
+xlog_recover_add_to_cont_trans(
+       struct xlog             *log,
+       struct xlog_recover     *trans,
+       xfs_caddr_t             dp,
+       int                     len)
+{
+       xlog_recover_item_t     *item;
+       xfs_caddr_t             ptr, old_ptr;
+       int                     old_len;
+
+       if (list_empty(&trans->r_itemq)) {
+               /* finish copying rest of trans header */
+               xlog_recover_add_item(&trans->r_itemq);
+               ptr = (xfs_caddr_t) &trans->r_theader +
+                               sizeof(xfs_trans_header_t) - len;
+               memcpy(ptr, dp, len);
+               return 0;
+       }
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+
+       old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
+       old_len = item->ri_buf[item->ri_cnt-1].i_len;
+
+       ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
+       memcpy(&ptr[old_len], dp, len);
+       item->ri_buf[item->ri_cnt-1].i_len += len;
+       item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
+       return 0;
+}
+
+/*
+ * The next region to add is the start of a new region.  It could be
+ * a whole region or it could be the first part of a new region.  Because
+ * of this, the assumption here is that the type and size fields of all
+ * format structures fit into the first 32 bits of the structure.
+ *
+ * This works because all regions must be 32 bit aligned.  Therefore, we
+ * either have both fields or we have neither field.  In the case we have
+ * neither field, the data part of the region is zero length.  We only have
+ * a log_op_header and can throw away the header since a new one will appear
+ * later.  If we have at least 4 bytes, then we can determine how many regions
+ * will appear in the current log item.
+ */
+STATIC int
+xlog_recover_add_to_trans(
+       struct xlog             *log,
+       struct xlog_recover     *trans,
+       xfs_caddr_t             dp,
+       int                     len)
  {
-       /* Do nothing now */
-       xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
+       xfs_inode_log_format_t  *in_f;                  /* any will do */
+       xlog_recover_item_t     *item;
+       xfs_caddr_t             ptr;
+
+       if (!len)
+               return 0;
+       if (list_empty(&trans->r_itemq)) {
+               /* we need to catch log corruptions here */
+               if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
+                       xfs_warn(log->l_mp, "%s: bad header magic number",
+                               __func__);
+                       ASSERT(0);
+                       return -EIO;
+               }
+               if (len == sizeof(xfs_trans_header_t))
+                       xlog_recover_add_item(&trans->r_itemq);
+               memcpy(&trans->r_theader, dp, len);
+               return 0;
+       }
+
+       ptr = kmem_alloc(len, KM_SLEEP);
+       memcpy(ptr, dp, len);
+       in_f = (xfs_inode_log_format_t *)ptr;
+
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+       if (item->ri_total != 0 &&
+            item->ri_total == item->ri_cnt) {
+               /* tail item is in use, get a new one */
+               xlog_recover_add_item(&trans->r_itemq);
+               item = list_entry(trans->r_itemq.prev,
+                                       xlog_recover_item_t, ri_list);
+       }
+
+       if (item->ri_total == 0) {              /* first region to be added */
+               if (in_f->ilf_size == 0 ||
+                   in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+                       xfs_warn(log->l_mp,
+               "bad number of regions (%d) in inode log format",
+                                 in_f->ilf_size);
+                       ASSERT(0);
+                       kmem_free(ptr);
+                       return -EIO;
+               }
+
+               item->ri_total = in_f->ilf_size;
+               item->ri_buf =
+                       kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+                                   KM_SLEEP);
+       }
+       ASSERT(item->ri_total > item->ri_cnt);
+       /* Description region is ri_buf[0] */
+       item->ri_buf[item->ri_cnt].i_addr = ptr;
+       item->ri_buf[item->ri_cnt].i_len  = len;
+       item->ri_cnt++;
+       trace_xfs_log_recover_item_add(log, trans, item, 0);
         return 0;
  }
  
+/*
+ * Free up any resources allocated by the transaction
+ *
+ * Remember that EFIs, EFDs, and IUNLINKs are handled later.
+ */
+STATIC void
+xlog_recover_free_trans(
+       struct xlog_recover     *trans)
+{
+       xlog_recover_item_t     *item, *n;
+       int                     i;
+
+       list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+               /* Free the regions in the item. */
+               list_del(&item->ri_list);
+               for (i = 0; i < item->ri_cnt; i++)
+                       kmem_free(item->ri_buf[i].i_addr);
+               /* Free the item itself */
+               kmem_free(item->ri_buf);
+               kmem_free(item);
+       }
+       /* Free the transaction recover structure */
+       kmem_free(trans);
+}
+
+/*
+ * On error or completion, trans is freed.
+ */
+STATIC int
+xlog_recovery_process_trans(
+       struct xlog             *log,
+       struct xlog_recover     *trans,
+       xfs_caddr_t             dp,
+       unsigned int            len,
+       unsigned int            flags,
+       int                     pass)
+{
+       int                     error = 0;
+       bool                    freeit = false;
+
+       /* mask off ophdr transaction container flags */
+       flags &= ~XLOG_END_TRANS;
+       if (flags & XLOG_WAS_CONT_TRANS)
+               flags &= ~XLOG_CONTINUE_TRANS;
+
+       /*
+        * Callees must not free the trans structure. We'll decide if we need to
+        * free it or not based on the operation being done and it's result.
+        */
+       switch (flags) {
+       /* expected flag values */
+       case 0:
+       case XLOG_CONTINUE_TRANS:
+               error = xlog_recover_add_to_trans(log, trans, dp, len);
+               break;
+       case XLOG_WAS_CONT_TRANS:
+               error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
+               break;
+       case XLOG_COMMIT_TRANS:
+               error = xlog_recover_commit_trans(log, trans, pass);
+               /* success or fail, we are now done with this transaction. */
+               freeit = true;
+               break;
+
+       /* unexpected flag values */
+       case XLOG_UNMOUNT_TRANS:
+               /* just skip trans */
+               xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
+               freeit = true;
+               break;
+       case XLOG_START_TRANS:
+       default:
+               xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
+               ASSERT(0);
+               error = -EIO;
+               break;
+       }
+       if (error || freeit)
+               xlog_recover_free_trans(trans);
+       return error;
+}
+
+/*
+ * Lookup the transaction recovery structure associated with the ID in the
+ * current ophdr. If the transaction doesn't exist and the start flag is set in
+ * the ophdr, then allocate a new transaction for future ID matches to find.
+ * Either way, return what we found during the lookup - an existing transaction
+ * or nothing.
+ */
+STATIC struct xlog_recover *
+xlog_recover_ophdr_to_trans(
+       struct hlist_head       rhash[],
+       struct xlog_rec_header  *rhead,
+       struct xlog_op_header   *ohead)
+{
+       struct xlog_recover     *trans;
+       xlog_tid_t              tid;
+       struct hlist_head       *rhp;
+
+       tid = be32_to_cpu(ohead->oh_tid);
+       rhp = &rhash[XLOG_RHASH(tid)];
+       hlist_for_each_entry(trans, rhp, r_list) {
+               if (trans->r_log_tid == tid)
+                       return trans;
+       }
+
+       /*
+        * skip over non-start transaction headers - we could be
+        * processing slack space before the next transaction starts
+        */
+       if (!(ohead->oh_flags & XLOG_START_TRANS))
+               return NULL;
+
+       ASSERT(be32_to_cpu(ohead->oh_len) == 0);
+
+       /*
+        * This is a new transaction so allocate a new recovery container to
+        * hold the recovery ops that will follow.
+        */
+       trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
+       trans->r_log_tid = tid;
+       trans->r_lsn = be64_to_cpu(rhead->h_lsn);
+       INIT_LIST_HEAD(&trans->r_itemq);
+       INIT_HLIST_NODE(&trans->r_list);
+       hlist_add_head(&trans->r_list, rhp);
+
+       /*
+        * Nothing more to do for this ophdr. Items to be added to this new
+        * transaction will be in subsequent ophdr containers.
+        */
+       return NULL;
+}
+
+STATIC int
+xlog_recover_process_ophdr(
+       struct xlog             *log,
+       struct hlist_head       rhash[],
+       struct xlog_rec_header  *rhead,
+       struct xlog_op_header   *ohead,
+       xfs_caddr_t             dp,
+       xfs_caddr_t             end,
+       int                     pass)
+{
+       struct xlog_recover     *trans;
+       unsigned int            len;
+
+       /* Do we understand who wrote this op? */
+       if (ohead->oh_clientid != XFS_TRANSACTION &&
+           ohead->oh_clientid != XFS_LOG) {
+               xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
+                       __func__, ohead->oh_clientid);
+               ASSERT(0);
+               return -EIO;
+       }
+
+       /*
+        * Check the ophdr contains all the data it is supposed to contain.
+        */
+       len = be32_to_cpu(ohead->oh_len);
+       if (dp + len > end) {
+               xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
+               WARN_ON(1);
+               return -EIO;
+       }
+
+       trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
+       if (!trans) {
+               /* nothing to do, so skip over this ophdr */
+               return 0;
+       }
+
+       return xlog_recovery_process_trans(log, trans, dp, len,
+                                          ohead->oh_flags, pass);
+}
+
  /*
   * There are two valid states of the r_state field.  0 indicates that the
   * transaction structure is in a normal state.  We have either seen the
@@ -3558,86 +3666,30 @@ xlog_recover_process_data(
         xfs_caddr_t             dp,
         int                     pass)
  {
-       xfs_caddr_t             lp;
+       struct xlog_op_header   *ohead;
+       xfs_caddr_t             end;
         int                     num_logops;
-       xlog_op_header_t        *ohead;
-       xlog_recover_t          *trans;
-       xlog_tid_t              tid;
         int                     error;
-       unsigned long           hash;
-       uint                    flags;
  
-       lp = dp + be32_to_cpu(rhead->h_len);
+       end = dp + be32_to_cpu(rhead->h_len);
         num_logops = be32_to_cpu(rhead->h_num_logops);
  
         /* check the log format matches our own - else we can't recover */
         if (xlog_header_check_recover(log->l_mp, rhead))
                 return -EIO;
  
-       while ((dp < lp) && num_logops) {
-               ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
-               ohead = (xlog_op_header_t *)dp;
-               dp += sizeof(xlog_op_header_t);
-               if (ohead->oh_clientid != XFS_TRANSACTION &&
-                   ohead->oh_clientid != XFS_LOG) {
-                       xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
-                                       __func__, ohead->oh_clientid);
-                       ASSERT(0);
-                       return -EIO;
-               }
-               tid = be32_to_cpu(ohead->oh_tid);
-               hash = XLOG_RHASH(tid);
-               trans = xlog_recover_find_tid(&rhash[hash], tid);
-               if (trans == NULL) {               /* not found; add new tid */
-                       if (ohead->oh_flags & XLOG_START_TRANS)
-                               xlog_recover_new_tid(&rhash[hash], tid,
-                                       be64_to_cpu(rhead->h_lsn));
-               } else {
-                       if (dp + be32_to_cpu(ohead->oh_len) > lp) {
-                               xfs_warn(log->l_mp, "%s: bad length 0x%x",
-                                       __func__, be32_to_cpu(ohead->oh_len));
-                               WARN_ON(1);
-                               return -EIO;
-                       }
-                       flags = ohead->oh_flags & ~XLOG_END_TRANS;
-                       if (flags & XLOG_WAS_CONT_TRANS)
-                               flags &= ~XLOG_CONTINUE_TRANS;
-                       switch (flags) {
-                       case XLOG_COMMIT_TRANS:
-                               error = xlog_recover_commit_trans(log,
-                                                               trans, pass);
-                               break;
-                       case XLOG_UNMOUNT_TRANS:
-                               error = xlog_recover_unmount_trans(log);
-                               break;
-                       case XLOG_WAS_CONT_TRANS:
-                               error = xlog_recover_add_to_cont_trans(log,
-                                               trans, dp,
-                                               be32_to_cpu(ohead->oh_len));
-                               break;
-                       case XLOG_START_TRANS:
-                               xfs_warn(log->l_mp, "%s: bad transaction",
-                                       __func__);
-                               ASSERT(0);
-                               error = -EIO;
-                               break;
-                       case 0:
-                       case XLOG_CONTINUE_TRANS:
-                               error = xlog_recover_add_to_trans(log, trans,
-                                               dp, be32_to_cpu(ohead->oh_len));
-                               break;
-                       default:
-                               xfs_warn(log->l_mp, "%s: bad flag 0x%x",
-                                       __func__, flags);
-                               ASSERT(0);
-                               error = -EIO;
-                               break;
-                       }
-                       if (error) {
-                               xlog_recover_free_trans(trans);
-                               return error;
-                       }
-               }
+       while ((dp < end) && num_logops) {
+
+               ohead = (struct xlog_op_header *)dp;
+               dp += sizeof(*ohead);
+               ASSERT(dp <= end);
+
+               /* errors will abort recovery */
+               error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
+                                                   dp, end, pass);
+               if (error)
+                       return error;
+
                 dp += be32_to_cpu(ohead->oh_len);
                 num_logops--;
         }
@@ -4130,41 +4182,13 @@ xlog_do_recovery_pass(
         }
  
         memset(rhash, 0, sizeof(rhash));
-       if (tail_blk <= head_blk) {
-               for (blk_no = tail_blk; blk_no < head_blk; ) {
-                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
-                       if (error)
-                               goto bread_err2;
-
-                       rhead = (xlog_rec_header_t *)offset;
-                       error = xlog_valid_rec_header(log, rhead, blk_no);
-                       if (error)
-                               goto bread_err2;
-
-                       /* blocks in data section */
-                       bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-                       error = xlog_bread(log, blk_no + hblks, bblks, dbp,
-                                          &offset);
-                       if (error)
-                               goto bread_err2;
-
-                       error = xlog_unpack_data(rhead, offset, log);
-                       if (error)
-                               goto bread_err2;
-
-                       error = xlog_recover_process_data(log,
-                                               rhash, rhead, offset, pass);
-                       if (error)
-                               goto bread_err2;
-                       blk_no += bblks + hblks;
-               }
-       } else {
+       blk_no = tail_blk;
+       if (tail_blk > head_blk) {
                 /*
                  * Perform recovery around the end of the physical log.
                  * When the head is not on the same cycle number as the tail,
-                * we can't do a sequential recovery as above.
+                * we can't do a sequential recovery.
                  */
-               blk_no = tail_blk;
                 while (blk_no < log->l_logBBsize) {
                         /*
                          * Check for header wrapping around physical end-of-log
@@ -4278,34 +4302,35 @@ xlog_do_recovery_pass(
  
                 ASSERT(blk_no >= log->l_logBBsize);
                 blk_no -= log->l_logBBsize;
+       }
  
-               /* read first part of physical log */
-               while (blk_no < head_blk) {
-                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
-                       if (error)
-                               goto bread_err2;
+       /* read first part of physical log */
+       while (blk_no < head_blk) {
+               error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+               if (error)
+                       goto bread_err2;
  
-                       rhead = (xlog_rec_header_t *)offset;
-                       error = xlog_valid_rec_header(log, rhead, blk_no);
-                       if (error)
-                               goto bread_err2;
+               rhead = (xlog_rec_header_t *)offset;
+               error = xlog_valid_rec_header(log, rhead, blk_no);
+               if (error)
+                       goto bread_err2;
  
-                       bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-                       error = xlog_bread(log, blk_no+hblks, bblks, dbp,
-                                          &offset);
-                       if (error)
-                               goto bread_err2;
+               /* blocks in data section */
+               bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
+               error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+                                  &offset);
+               if (error)
+                       goto bread_err2;
  
-                       error = xlog_unpack_data(rhead, offset, log);
-                       if (error)
-                               goto bread_err2;
+               error = xlog_unpack_data(rhead, offset, log);
+               if (error)
+                       goto bread_err2;
  
-                       error = xlog_recover_process_data(log, rhash,
-                                                       rhead, offset, pass);
-                       if (error)
-                               goto bread_err2;
-                       blk_no += bblks + hblks;
-               }
+               error = xlog_recover_process_data(log, rhash,
+                                               rhead, offset, pass);
+               if (error)
+                       goto bread_err2;
+               blk_no += bblks + hblks;
         }
  
   bread_err2:
@@ -4503,6 +4528,18 @@ xlog_recover(
                         return -EINVAL;
                 }
  
+               /*
+                * Delay log recovery if the debug hook is set. This is debug
+                * instrumention to coordinate simulation of I/O failures with
+                * log recovery.
+                */
+               if (xfs_globals.log_recovery_delay) {
+                       xfs_notice(log->l_mp,
+                               "Delaying log recovery for %d seconds.",
+                               xfs_globals.log_recovery_delay);
+                       msleep(xfs_globals.log_recovery_delay * 1000);
+               }
+
                 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
                                 log->l_mp->m_logname ? log->l_mp->m_logname
                                                      : "internal");
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 142c460fc64ef32452e0b97dd548e394d4f35ad2..51435dbce9c4efd2407be2c12ddd58d86324bce8 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -61,8 +61,6 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
  static int xfs_uuid_table_size;
  static uuid_t *xfs_uuid_table;
  
-extern struct kset *xfs_kset;
-
  /*
   * See if the UUID is unique among mounted XFS filesystems.
   * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -726,7 +724,6 @@ xfs_mountfs(
  
         xfs_set_maxicount(mp);
  
-       mp->m_kobj.kobject.kset = xfs_kset;
         error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
         if (error)
                 goto out;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c

index 1eb6f3df698c8bb4eac11b5c03aa23790cbc65c4..30ecca3037e37bc5354fb9e37deb4db6e163fa8a 100644 (file)
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -304,7 +304,8 @@ _xfs_mru_cache_reap(
  int
  xfs_mru_cache_init(void)
  {
-       xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
+       xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
+                               WQ_MEM_RECLAIM|WQ_FREEZABLE, 1);
         if (!xfs_mru_reap_wq)
                 return -ENOMEM;
         return 0;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index 10232102b4a6ffac8a8a92f627ba0cb0caea64ab..d68f23021af3587cbaed9a3ad90d71343bceee7e 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -434,6 +434,7 @@ xfs_qm_dquot_isolate(
         struct list_head        *item,
         spinlock_t              *lru_lock,
         void                    *arg)
+               __releases(lru_lock) __acquires(lru_lock)
  {
         struct xfs_dquot        *dqp = container_of(item,
                                                 struct xfs_dquot, q_lru);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 1ad00937b485a9ad646be63f2029c2188eb28c87..e1175ea9b551b74fce1ba38e48181967e553504d 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -46,7 +46,7 @@
   * Keeps track of a current summary block, so we don't keep reading
   * it from the buffer cache.
   */
-STATIC int                             /* error */
+static int
  xfs_rtget_summary(
         xfs_mount_t     *mp,            /* file system mount structure */
         xfs_trans_t     *tp,            /* transaction pointer */
@@ -56,60 +56,9 @@ xfs_rtget_summary(
         xfs_fsblock_t   *rsb,           /* in/out: summary block number */
         xfs_suminfo_t   *sum)           /* out: summary info for this block */
  {
-       xfs_buf_t       *bp;            /* buffer for summary block */
-       int             error;          /* error value */
-       xfs_fsblock_t   sb;             /* summary fsblock */
-       int             so;             /* index into the summary file */
-       xfs_suminfo_t   *sp;            /* pointer to returned data */
-
-       /*
-        * Compute entry number in the summary file.
-        */
-       so = XFS_SUMOFFS(mp, log, bbno);
-       /*
-        * Compute the block number in the summary file.
-        */
-       sb = XFS_SUMOFFSTOBLOCK(mp, so);
-       /*
-        * If we have an old buffer, and the block number matches, use that.
-        */
-       if (rbpp && *rbpp && *rsb == sb)
-               bp = *rbpp;
-       /*
-        * Otherwise we have to get the buffer.
-        */
-       else {
-               /*
-                * If there was an old one, get rid of it first.
-                */
-               if (rbpp && *rbpp)
-                       xfs_trans_brelse(tp, *rbpp);
-               error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
-               if (error) {
-                       return error;
-               }
-               /*
-                * Remember this buffer and block for the next call.
-                */
-               if (rbpp) {
-                       *rbpp = bp;
-                       *rsb = sb;
-               }
-       }
-       /*
-        * Point to the summary information & copy it out.
-        */
-       sp = XFS_SUMPTR(mp, bp, so);
-       *sum = *sp;
-       /*
-        * Drop the buffer if we're not asked to remember it.
-        */
-       if (!rbpp)
-               xfs_trans_brelse(tp, bp);
-       return 0;
+       return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
  }
  
-
  /*
   * Return whether there are any free extents in the size range given
   * by low and high, for the bitmap block bbno.
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h

index c642795324af649c1c80666e5bdc7e0739551e1d..76c0a4a9bb170a327936d1719c8cc9fa736de324 100644 (file)
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -111,6 +111,10 @@ int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
                     xfs_rtblock_t *rtblock);
  int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
                        xfs_rtblock_t start, xfs_extlen_t len, int val);
+int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
+                            int log, xfs_rtblock_t bbno, int delta,
+                            xfs_buf_t **rbpp, xfs_fsblock_t *rsb,
+                            xfs_suminfo_t *sum);
  int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
                          xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
                          xfs_fsblock_t *rsb);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index b194652033cd11c8530ae762d01972fa455a23aa..9f622feda6a43bb32ea1983779a48d20b1446391 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -47,6 +47,7 @@
  #include "xfs_dinode.h"
  #include "xfs_filestream.h"
  #include "xfs_quota.h"
+#include "xfs_sysfs.h"
  
  #include <linux/namei.h>
  #include <linux/init.h>
@@ -61,7 +62,11 @@
  static const struct super_operations xfs_super_operations;
  static kmem_zone_t *xfs_ioend_zone;
  mempool_t *xfs_ioend_pool;
-struct kset *xfs_kset;
+
+static struct kset *xfs_kset;          /* top-level xfs sysfs dir */
+#ifdef DEBUG
+static struct xfs_kobj xfs_dbg_kobj;   /* global debug sysfs attrs */
+#endif
  
  #define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
  #define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
@@ -838,32 +843,32 @@ xfs_init_mount_workqueues(
         struct xfs_mount        *mp)
  {
         mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
-                       WQ_MEM_RECLAIM, 0, mp->m_fsname);
+                       WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_data_workqueue)
                 goto out;
  
         mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
-                       WQ_MEM_RECLAIM, 0, mp->m_fsname);
+                       WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_unwritten_workqueue)
                 goto out_destroy_data_iodone_queue;
  
         mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
-                       WQ_MEM_RECLAIM, 0, mp->m_fsname);
+                       WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_cil_workqueue)
                 goto out_destroy_unwritten;
  
         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
-                       0, 0, mp->m_fsname);
+                       WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_reclaim_workqueue)
                 goto out_destroy_cil;
  
         mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-                       0, 0, mp->m_fsname);
+                       WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_log_workqueue)
                 goto out_destroy_reclaim;
  
         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
-                       0, 0, mp->m_fsname);
+                       WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_eofblocks_workqueue)
                 goto out_destroy_log;
  
@@ -1406,6 +1411,7 @@ xfs_fs_fill_super(
         atomic_set(&mp->m_active_trans, 0);
         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
+       mp->m_kobj.kobject.kset = xfs_kset;
  
         mp->m_super = sb;
         sb->s_fs_info = mp;
@@ -1715,7 +1721,8 @@ xfs_init_workqueues(void)
          * AGs in all the filesystems mounted. Hence use the default large
          * max_active value for this workqueue.
          */
-       xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
+       xfs_alloc_wq = alloc_workqueue("xfsalloc",
+                       WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
         if (!xfs_alloc_wq)
                 return -ENOMEM;
  
@@ -1768,9 +1775,16 @@ init_xfs_fs(void)
                 goto out_sysctl_unregister;;
         }
  
-       error = xfs_qm_init();
+#ifdef DEBUG
+       xfs_dbg_kobj.kobject.kset = xfs_kset;
+       error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
         if (error)
                 goto out_kset_unregister;
+#endif
+
+       error = xfs_qm_init();
+       if (error)
+               goto out_remove_kobj;
  
         error = register_filesystem(&xfs_fs_type);
         if (error)
@@ -1779,7 +1793,11 @@ init_xfs_fs(void)
  
   out_qm_exit:
         xfs_qm_exit();
+ out_remove_kobj:
+#ifdef DEBUG
+       xfs_sysfs_del(&xfs_dbg_kobj);
   out_kset_unregister:
+#endif
         kset_unregister(xfs_kset);
   out_sysctl_unregister:
         xfs_sysctl_unregister();
@@ -1802,6 +1820,9 @@ exit_xfs_fs(void)
  {
         xfs_qm_exit();
         unregister_filesystem(&xfs_fs_type);
+#ifdef DEBUG
+       xfs_sysfs_del(&xfs_dbg_kobj);
+#endif
         kset_unregister(xfs_kset);
         xfs_sysctl_unregister();
         xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 6a944a2cd36fbf97717ea31f56e82c101caef3a9..02ae62a998e082a54fe86b3a9c6f6b7cdf071e57 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -269,9 +269,11 @@ xfs_symlink(
         /*
          * Check for ability to enter directory entry, if no space reserved.
          */
-       error = xfs_dir_canenter(tp, dp, link_name, resblks);
-       if (error)
-               goto error_return;
+       if (!resblks) {
+               error = xfs_dir_canenter(tp, dp, link_name);
+               if (error)
+                       goto error_return;
+       }
         /*
          * Initialize the bmap freelist prior to calling either
          * bmapi or the directory create code.
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h

index bd8e157c20efa254c736952967e6c894dbd706a0..ffef453757543a94d742eb2890e4858e8bcd7716 100644 (file)
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -92,6 +92,11 @@ enum {
  
  extern xfs_param_t     xfs_params;
  
+struct xfs_globals {
+       int     log_recovery_delay;     /* log recovery delay (secs) */
+};
+extern struct xfs_globals      xfs_globals;
+
  #ifdef CONFIG_SYSCTL
  extern int xfs_sysctl_register(void);
  extern void xfs_sysctl_unregister(void);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index 9835139ce1ec24cd2e1d5358377bd844d4323ab7..aa03670851d86574cc542a2eccb1b5c4607e4ca5 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -51,6 +51,80 @@ struct kobj_type xfs_mp_ktype = {
         .release = xfs_sysfs_release,
  };
  
+#ifdef DEBUG
+/* debug */
+
+STATIC ssize_t
+log_recovery_delay_store(
+       const char      *buf,
+       size_t          count,
+       void            *data)
+{
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < 0 || val > 60)
+               return -EINVAL;
+
+       xfs_globals.log_recovery_delay = val;
+
+       return count;
+}
+
+STATIC ssize_t
+log_recovery_delay_show(
+       char    *buf,
+       void    *data)
+{
+       return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
+}
+XFS_SYSFS_ATTR_RW(log_recovery_delay);
+
+static struct attribute *xfs_dbg_attrs[] = {
+       ATTR_LIST(log_recovery_delay),
+       NULL,
+};
+
+STATIC ssize_t
+xfs_dbg_show(
+       struct kobject          *kobject,
+       struct attribute        *attr,
+       char                    *buf)
+{
+       struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+       return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
+}
+
+STATIC ssize_t
+xfs_dbg_store(
+       struct kobject          *kobject,
+       struct attribute        *attr,
+       const char              *buf,
+       size_t                  count)
+{
+       struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+       return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
+}
+
+static struct sysfs_ops xfs_dbg_ops = {
+       .show = xfs_dbg_show,
+       .store = xfs_dbg_store,
+};
+
+struct kobj_type xfs_dbg_ktype = {
+       .release = xfs_sysfs_release,
+       .sysfs_ops = &xfs_dbg_ops,
+       .default_attrs = xfs_dbg_attrs,
+};
+
+#endif /* DEBUG */
+
  /* xlog */
  
  STATIC ssize_t
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h

index 54a2091183c08a066b9751cfca37ba63f1e83b01..240eee35f342b8bbae4511f0ef04d38e32e3a0c4 100644 (file)
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -20,6 +20,7 @@
  #define __XFS_SYSFS_H__
  
  extern struct kobj_type xfs_mp_ktype;  /* xfs_mount */
+extern struct kobj_type xfs_dbg_ktype; /* debug */
  extern struct kobj_type xfs_log_ktype; /* xlog */
  
  static inline struct xfs_kobj *
author	Dave Chinner <david@fromorbit.com>
	Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Wed, 1 Oct 2014 23:11:14 +0000 (09:11 +1000)
fs/xfs/libxfs/xfs_alloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.h		patch \| blob \| history
fs/xfs/libxfs/xfs_da_format.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2.h		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rtbitmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/xfs_aops.c		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_buf.c		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_globals.c		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_iops.c		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mru_cache.c		patch \| blob \| history
fs/xfs/xfs_qm.c		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_rtalloc.h		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_symlink.c		patch \| blob \| history
fs/xfs/xfs_sysctl.h		patch \| blob \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| history
fs/xfs/xfs_sysfs.h		patch \| blob \| history