Merge branch 'fallocate-insert-range' into for-next

author Dave Chinner <david@fromorbit.com>

Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)

committer Dave Chinner <david@fromorbit.com>

Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)
author Dave Chinner <david@fromorbit.com>
Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)
committer Dave Chinner <david@fromorbit.com>
Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index e8696f5a8041c5e24395a0a43205897a5d7dffdc..aeffeaaac0ec406e543730eb608de1eb3ebc40fb 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
         }
  }
  
-/*
- * Debug/sanity checking code
- */
-
-STATIC int
-xfs_bmap_sanity_check(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp,
-       int                     level)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-
-       if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
-           block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
-               return 0;
-
-       if (be16_to_cpu(block->bb_level) != level ||
-           be16_to_cpu(block->bb_numrecs) == 0 ||
-           be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return 0;
-
-       return 1;
-}
-
  #ifdef DEBUG
  STATIC struct xfs_buf *
  xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
                                 goto error_norelse;
                 }
                 block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(mp,
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
                 if (level == 0)
                         break;
  
@@ -1312,8 +1285,6 @@ xfs_bmap_read_extents(
                 if (error)
                         return error;
                 block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(mp,
-                       xfs_bmap_sanity_check(mp, bp, level), error0);
                 if (level == 0)
                         break;
                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
@@ -1346,9 +1317,6 @@ xfs_bmap_read_extents(
                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
                         goto error0;
                 }
-               XFS_WANT_CORRUPTED_GOTO(mp,
-                       xfs_bmap_sanity_check(mp, bp, 0),
-                       error0);
                 /*
                  * Read-ahead the next leaf block, if any.
                  */
@@ -2215,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
                 if (diff > 0) {
-                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       -((int64_t)diff), 0);
+                       error = xfs_mod_fdblocks(bma->ip->i_mount,
+                                                -((int64_t)diff), false);
                         ASSERT(!error);
                         if (error)
                                 goto done;
@@ -2268,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
                         temp += bma->cur->bc_private.b.allocated;
                 ASSERT(temp <= da_old);
                 if (temp < da_old)
-                       xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       (int64_t)(da_old - temp), 0);
+                       xfs_mod_fdblocks(bma->ip->i_mount,
+                                       (int64_t)(da_old - temp), false);
         }
  
         /* clear out the allocated field, done with it now in any case. */
@@ -2948,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
         }
         if (oldlen != newlen) {
                 ASSERT(oldlen > newlen);
-               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                       (int64_t)(oldlen - newlen), 0);
+               xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
+                                false);
                 /*
                  * Nothing to do for disk quota accounting here.
                  */
@@ -4166,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
         ASSERT(indlen > 0);
  
         if (rt) {
-               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)extsz), 0);
+               error = xfs_mod_frextents(mp, -((int64_t)extsz));
         } else {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                -((int64_t)alen), 0);
+               error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
         }
  
         if (error)
                 goto out_unreserve_quota;
  
-       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                        -((int64_t)indlen), 0);
+       error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
         if (error)
                 goto out_unreserve_blocks;
  
@@ -4204,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
  
  out_unreserve_blocks:
         if (rt)
-               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+               xfs_mod_frextents(mp, extsz);
         else
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+               xfs_mod_fdblocks(mp, alen, false);
  out_unreserve_quota:
         if (XFS_IS_QUOTA_ON(mp))
                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -5019,10 +4982,8 @@ xfs_bmap_del_extent(
          * Nothing to do for disk quota accounting here.
          */
         ASSERT(da_old >= da_new);
-       if (da_old > da_new) {
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                       (int64_t)(da_old - da_new), 0);
-       }
+       if (da_old > da_new)
+               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
  done:
         *logflagsp = flags;
         return error;
@@ -5291,14 +5252,13 @@ xfs_bunmapi(
  
                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
                                 do_div(rtexts, mp->m_sb.sb_rextsize);
-                               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                               (int64_t)rtexts, 0);
+                               xfs_mod_frextents(mp, (int64_t)rtexts);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_RTBLKS);
                         } else {
-                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                               (int64_t)del.br_blockcount, 0);
+                               xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
+                                                false);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_REGBLKS);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c

index 9cb0115c6bd1745576b962dd65fc9f942bc392c3..2385f8cd08ab9f1cdf19ad3a9a8473a5e4e34eb3 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
         oldroot = blk1->bp->b_addr;
         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
-               struct xfs_da3_icnode_hdr nodehdr;
+               struct xfs_da3_icnode_hdr icnodehdr;
  
-               dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
+               dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
                 btree = dp->d_ops->node_tree_p(oldroot);
-               size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
-               level = nodehdr.level;
+               size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
+               level = icnodehdr.level;
  
                 /*
                  * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index 8eb7189793836b74084afc154549979e77f6f581..4daaa662337b516f0794ea0d4680a12512ab6262 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
         /* must be padded to 64 bit alignment */
  } xfs_dsb_t;
  
-/*
- * Sequence number values for the fields.
- */
-typedef enum {
-       XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
-       XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
-       XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
-       XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
-       XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
-       XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
-       XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
-       XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
-       XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
-       XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
-       XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
-       XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-       XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
-       XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
-       XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
-       XFS_SBS_PQUOTINO, XFS_SBS_LSN,
-       XFS_SBS_FIELDCOUNT
-} xfs_sb_field_t;
-
-/*
- * Mask values, defined based on the xfs_sb_field_t values.
- * Only define the ones we're using.
- */
-#define        XFS_SB_MVAL(x)          (1LL << XFS_SBS_ ## x)
-#define        XFS_SB_UUID             XFS_SB_MVAL(UUID)
-#define        XFS_SB_FNAME            XFS_SB_MVAL(FNAME)
-#define        XFS_SB_ROOTINO          XFS_SB_MVAL(ROOTINO)
-#define        XFS_SB_RBMINO           XFS_SB_MVAL(RBMINO)
-#define        XFS_SB_RSUMINO          XFS_SB_MVAL(RSUMINO)
-#define        XFS_SB_VERSIONNUM       XFS_SB_MVAL(VERSIONNUM)
-#define XFS_SB_UQUOTINO                XFS_SB_MVAL(UQUOTINO)
-#define XFS_SB_GQUOTINO                XFS_SB_MVAL(GQUOTINO)
-#define XFS_SB_QFLAGS          XFS_SB_MVAL(QFLAGS)
-#define XFS_SB_SHARED_VN       XFS_SB_MVAL(SHARED_VN)
-#define XFS_SB_UNIT            XFS_SB_MVAL(UNIT)
-#define XFS_SB_WIDTH           XFS_SB_MVAL(WIDTH)
-#define XFS_SB_ICOUNT          XFS_SB_MVAL(ICOUNT)
-#define XFS_SB_IFREE           XFS_SB_MVAL(IFREE)
-#define XFS_SB_FDBLOCKS                XFS_SB_MVAL(FDBLOCKS)
-#define XFS_SB_FEATURES2       (XFS_SB_MVAL(FEATURES2) | \
-                                XFS_SB_MVAL(BAD_FEATURES2))
-#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
-#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
-#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
-#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
-#define XFS_SB_CRC             XFS_SB_MVAL(CRC)
-#define XFS_SB_PQUOTINO                XFS_SB_MVAL(PQUOTINO)
-#define        XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
-#define        XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
-#define        XFS_SB_MOD_BITS         \
-       (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
-        XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
-        XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-        XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
-        XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
-        XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
-        XFS_SB_PQUOTINO)
-
  
  /*
   * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index db0444893e960cebfcd9040a35f03b81e3bd4f2c..07349a183a110fdf57bdf9a7f1d704452de5cdb7 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
          */
         newlen = args.mp->m_ialloc_inos;
         if (args.mp->m_maxicount &&
-           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+           percpu_counter_read(&args.mp->m_icount) + newlen >
+                                                       args.mp->m_maxicount)
                 return -ENOSPC;
         args.minlen = args.maxlen = args.mp->m_ialloc_blks;
         /*
@@ -1340,7 +1341,8 @@ xfs_dialloc(
          * inode.
          */
         if (mp->m_maxicount &&
-           mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+           percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
+                                                       mp->m_maxicount) {
                 noroom = 1;
                 okalloc = 0;
         }
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index f3ea02bf893e33717455f0e9b11f4f156bbe7c2e..dc4bfc5d88fccf221609cf2ac13c80370de07221 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -735,17 +735,15 @@ xfs_initialize_perag_data(
                 btree += pag->pagf_btreeblks;
                 xfs_perag_put(pag);
         }
-       /*
-        * Overwrite incore superblock counters with just-read data
-        */
+
+       /* Overwrite incore superblock counters with just-read data */
         spin_lock(&mp->m_sb_lock);
         sbp->sb_ifree = ifree;
         sbp->sb_icount = ialloc;
         sbp->sb_fdblocks = bfree + bfreelst + btree;
         spin_unlock(&mp->m_sb_lock);
  
-       /* Fixup the per-cpu counters as well. */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
  
         return 0;
  }
@@ -763,6 +761,10 @@ xfs_log_sb(
         struct xfs_mount        *mp = tp->t_mountp;
         struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
  
+       mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+       mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+       mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+
         xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
         xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index fe1f11b96d0d1c2d232d15f2163f83fb2b46ba4e..1bd539321799510834c9f21bda98d563232c9f13 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1660,13 +1660,6 @@ xfs_swap_extent_flush(
         /* Verify O_DIRECT for ftmp */
         if (VFS_I(ip)->i_mapping->nrpages)
                 return -EINVAL;
-
-       /*
-        * Don't try to swap extents on mmap()d files because we can't lock
-        * out races against page faults safely.
-        */
-       if (mapping_mapped(VFS_I(ip)->i_mapping))
-               return -EBUSY;
         return 0;
  }
  
@@ -1694,13 +1687,14 @@ xfs_swap_extents(
         }
  
         /*
-        * Lock up the inodes against other IO and truncate to begin with.
-        * Then we can ensure the inodes are flushed and have no page cache
-        * safely. Once we have done this we can take the ilocks and do the rest
-        * of the checks.
+        * Lock the inodes against other IO, page faults and truncate to
+        * begin with.  Then we can ensure the inodes are flushed and have no
+        * page cache safely. Once we have done this we can take the ilocks and
+        * do the rest of the checks.
          */
-       lock_flags = XFS_IOLOCK_EXCL;
+       lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
         xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+       xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
  
         /* Verify that both files have the same format */
         if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1727,8 +1721,16 @@ xfs_swap_extents(
                 xfs_trans_cancel(tp, 0);
                 goto out_unlock;
         }
+
+       /*
+        * Lock and join the inodes to the tansaction so that transaction commit
+        * or cancel will unlock the inodes from this point onwards.
+        */
         xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
         lock_flags |= XFS_ILOCK_EXCL;
+       xfs_trans_ijoin(tp, ip, lock_flags);
+       xfs_trans_ijoin(tp, tip, lock_flags);
+
  
         /* Verify all data are being swapped */
         if (sxp->sx_offset != 0 ||
@@ -1781,9 +1783,6 @@ xfs_swap_extents(
                         goto out_trans_cancel;
         }
  
-       xfs_trans_ijoin(tp, ip, lock_flags);
-       xfs_trans_ijoin(tp, tip, lock_flags);
-
         /*
          * Before we've swapped the forks, lets set the owners of the forks
          * appropriately. We have to do this as we are demand paging the btree
@@ -1917,5 +1916,5 @@ out_unlock:
  
  out_trans_cancel:
         xfs_trans_cancel(tp, 0);
-       goto out_unlock;
+       goto out;
  }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index 3ee186ac10938855965a5907c93de487e83851bf..338e50bbfd1ec8bc8345fbbd84cdc56b82d5f43a 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
  {
         if (level <= xfs_error_level) {
                 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-               "Internal error %s at line %d of file %s.  Caller %pF",
+               "Internal error %s at line %d of file %s.  Caller %pS",
                             tag, linenum, filename, ra);
  
                 xfs_stack_trace();
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index edeaccc7961ae0bdb9d59750c0a80ea9d4946f3f..dc5f609bea88c919534348d90acf7e82359ca7e8 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int                          /* error (positive) */
  xfs_zero_last_block(
         struct xfs_inode        *ip,
         xfs_fsize_t             offset,
-       xfs_fsize_t             isize)
+       xfs_fsize_t             isize,
+       bool                    *did_zeroing)
  {
         struct xfs_mount        *mp = ip->i_mount;
         xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
         zero_len = mp->m_sb.sb_blocksize - zero_offset;
         if (isize + zero_len > offset)
                 zero_len = offset - isize;
+       *did_zeroing = true;
         return xfs_iozero(ip, isize, zero_len);
  }
  
@@ -443,7 +445,8 @@ int                                 /* error (positive) */
  xfs_zero_eof(
         struct xfs_inode        *ip,
         xfs_off_t               offset,         /* starting I/O offset */
-       xfs_fsize_t             isize)          /* current inode size */
+       xfs_fsize_t             isize,          /* current inode size */
+       bool                    *did_zeroing)
  {
         struct xfs_mount        *mp = ip->i_mount;
         xfs_fileoff_t           start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
          * We only zero a part of that block so it is handled specially.
          */
         if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-               error = xfs_zero_last_block(ip, offset, isize);
+               error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
                 if (error)
                         return error;
         }
@@ -525,6 +528,7 @@ xfs_zero_eof(
                 if (error)
                         return error;
  
+               *did_zeroing = true;
                 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
         }
@@ -567,13 +571,15 @@ restart:
          * having to redo all checks before.
          */
         if (*pos > i_size_read(inode)) {
+               bool    zero = false;
+
                 if (*iolock == XFS_IOLOCK_SHARED) {
                         xfs_rw_iunlock(ip, *iolock);
                         *iolock = XFS_IOLOCK_EXCL;
                         xfs_rw_ilock(ip, *iolock);
                         goto restart;
                 }
-               error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+               error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
                 if (error)
                         return error;
         }
@@ -846,6 +852,9 @@ xfs_file_fallocate(
         if (error)
                 goto out_unlock;
  
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
         if (mode & FALLOC_FL_PUNCH_HOLE) {
                 error = xfs_free_file_space(ip, offset, len);
                 if (error)
@@ -1027,20 +1036,6 @@ xfs_file_mmap(
         return 0;
  }
  
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
  /*
   * This type is designed to indicate the type of offset we would like
   * to search from page cache for xfs_seek_hole_data().
@@ -1416,6 +1411,55 @@ xfs_file_llseek(
         }
  }
  
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ *   i_mmap_lock (XFS - truncate serialisation)
+ *     page_lock (MM)
+ *       i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_fault(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_page_mkwrite(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
  const struct file_operations xfs_file_operations = {
         .llseek         = xfs_file_llseek,
         .read           = new_sync_read,
@@ -1448,7 +1492,7 @@ const struct file_operations xfs_dir_file_operations = {
  };
  
  static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = xfs_filemap_fault,
         .map_pages      = filemap_map_pages,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
+       .page_mkwrite   = xfs_filemap_page_mkwrite,
  };
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index a2e86e8a0feac8fabf316fb8a790fc805153469a..8f9f854376c6235089fd9812590711d7dc091002 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
  
         pip = xfs_filestream_get_parent(ip);
         if (!pip)
-               goto out;
+               return NULLAGNUMBER;
  
         mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
         if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 74efe5b760dcc2907280db8e374afe6f5914d7b3..cb7e8a29dfb6e9e8b2b0f1044cc67a9dca5e1603 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
         xfs_mount_t             *mp,
         xfs_fsop_counts_t       *cnt)
  {
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
+       cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
+       cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
+
         spin_lock(&mp->m_sb_lock);
-       cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
         cnt->freertx = mp->m_sb.sb_frextents;
-       cnt->freeino = mp->m_sb.sb_ifree;
-       cnt->allocino = mp->m_sb.sb_icount;
         spin_unlock(&mp->m_sb_lock);
         return 0;
  }
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
          * what to do. This means that the amount of free space can
          * change while we do this, so we need to retry if we end up
          * trying to reserve more space than is available.
-        *
-        * We also use the xfs_mod_incore_sb() interface so that we
-        * don't have to care about whether per cpu counter are
-        * enabled, disabled or even compiled in....
          */
  retry:
         spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, 0);
  
         /*
          * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
         } else {
                 __int64_t       free;
  
-               free =  mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+               free = percpu_counter_sum(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
                 if (!free)
                         goto out; /* ENOSPC and fdblks_delta = 0 */
  
@@ -755,8 +752,7 @@ out:
                  * the extra reserve blocks from the reserve.....
                  */
                 int error;
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                fdblks_delta, 0);
+               error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
                 if (error == -ENOSPC)
                         goto retry;
         }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index d0414f3059672bca2ab5c1386b136ca9ba525f1d..d6ebc85192b7b3f4fd21e3cbc25ccb5f54501319 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
  }
  
  /*
- * The xfs inode contains 2 locks: a multi-reader lock called the
- * i_iolock and a multi-reader lock called the i_lock.  This routine
- * allows either or both of the locks to be obtained.
+ * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
+ * the i_lock.  This routine allows various combinations of the locks to be
+ * obtained.
   *
- * The 2 locks should always be ordered so that the IO lock is
- * obtained first in order to prevent deadlock.
+ * The 3 locks should always be ordered so that the IO lock is obtained first,
+ * the mmap lock second and the ilock last in order to prevent deadlock.
   *
- * ip -- the inode being locked
- * lock_flags -- this parameter indicates the inode's locks
- *       to be locked.  It can be:
- *             XFS_IOLOCK_SHARED,
- *             XFS_IOLOCK_EXCL,
- *             XFS_ILOCK_SHARED,
- *             XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
+ * Basic locking order:
+ *
+ * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
+ *
+ * mmap_sem locking order:
+ *
+ * i_iolock -> page lock -> mmap_sem
+ * mmap_sem -> i_mmap_lock -> page_lock
+ *
+ * The difference in mmap_sem locking order mean that we cannot hold the
+ * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * in get_user_pages() to map the user pages into the kernel address space for
+ * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * page faults already hold the mmap_sem.
+ *
+ * Hence to serialise fully against both syscall and mmap based IO, we need to
+ * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * taken in places where we need to invalidate the page cache in a race
+ * free manner (e.g. truncate, hole punch and other extent manipulation
+ * functions).
   */
  void
  xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
  
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
         else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
                 if (!mrtryaccess(&ip->i_iolock))
                         goto out;
         }
+
+       if (lock_flags & XFS_MMAPLOCK_EXCL) {
+               if (!mrtryupdate(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+               if (!mrtryaccess(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       }
+
         if (lock_flags & XFS_ILOCK_EXCL) {
                 if (!mrtryupdate(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
         } else if (lock_flags & XFS_ILOCK_SHARED) {
                 if (!mrtryaccess(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
         }
         return 1;
  
- out_undo_iolock:
+out_undo_mmaplock:
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
         if (lock_flags & XFS_IOLOCK_EXCL)
                 mrunlock_excl(&ip->i_iolock);
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mrunlock_shared(&ip->i_iolock);
- out:
+out:
         return 0;
  }
  
@@ -244,6 +277,8 @@ xfs_iunlock(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mrunlock_shared(&ip->i_iolock);
  
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrunlock_excl(&ip->i_lock);
         else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
         xfs_inode_t             *ip,
         uint                    lock_flags)
  {
-       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
+       ASSERT((lock_flags &
+               ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
  
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrdemote(&ip->i_lock);
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrdemote(&ip->i_mmaplock);
         if (lock_flags & XFS_IOLOCK_EXCL)
                 mrdemote(&ip->i_iolock);
  
@@ -294,6 +337,12 @@ xfs_isilocked(
                 return rwsem_is_locked(&ip->i_lock.mr_lock);
         }
  
+       if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+               if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+                       return !!ip->i_mmaplock.mr_writer;
+               return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+       }
+
         if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
                 if (!(lock_flags & XFS_IOLOCK_SHARED))
                         return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
  #endif
  
  /*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
   */
  static inline int
  xfs_lock_inumorder(int lock_mode, int subclass)
  {
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+       }
+
+       if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+               lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+                                                       XFS_MMAPLOCK_SHIFT;
+       }
+
         if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
  
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  }
  
  /*
- * The following routine will lock n inodes in exclusive mode.
- * We assume the caller calls us with the inodes in i_ino order.
+ * The following routine will lock n inodes in exclusive mode.  We assume the
+ * caller calls us with the inodes in i_ino order.
   *
- * We need to detect deadlock where an inode that we lock
- * is in the AIL and we start waiting for another inode that is locked
- * by a thread in a long running transaction (such as truncate). This can
- * result in deadlock since the long running trans might need to wait
- * for the inode we just locked in order to push the tail and free space
- * in the log.
+ * We need to detect deadlock where an inode that we lock is in the AIL and we
+ * start waiting for another inode that is locked by a thread in a long running
+ * transaction (such as truncate). This can result in deadlock since the long
+ * running trans might need to wait for the inode we just locked in order to
+ * push the tail and free space in the log.
   */
  void
  xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
         int             attempts = 0, i, j, try_lock;
         xfs_log_item_t  *lp;
  
-       ASSERT(ips && (inodes >= 2)); /* we need at least two */
+       /* currently supports between 2 and 5 inodes */
+       ASSERT(ips && inodes >= 2 && inodes <= 5);
  
         try_lock = 0;
         i = 0;
-
  again:
         for (; i < inodes; i++) {
                 ASSERT(ips[i]);
  
-               if (i && (ips[i] == ips[i-1]))  /* Already locked */
+               if (i && (ips[i] == ips[i - 1]))        /* Already locked */
                         continue;
  
                 /*
-                * If try_lock is not set yet, make sure all locked inodes
-                * are not in the AIL.
-                * If any are, set try_lock to be used later.
+                * If try_lock is not set yet, make sure all locked inodes are
+                * not in the AIL.  If any are, set try_lock to be used later.
                  */
-
                 if (!try_lock) {
                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
                                 lp = (xfs_log_item_t *)ips[j]->i_itemp;
-                               if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+                               if (lp && (lp->li_flags & XFS_LI_IN_AIL))
                                         try_lock++;
-                               }
                         }
                 }
  
@@ -381,51 +439,42 @@ again:
                  * we can't get any, we must release all we have
                  * and try again.
                  */
+               if (!try_lock) {
+                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
+                       continue;
+               }
+
+               /* try_lock means we have an inode locked that is in the AIL. */
+               ASSERT(i != 0);
+               if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
+                       continue;
  
-               if (try_lock) {
-                       /* try_lock must be 0 if i is 0. */
+               /*
+                * Unlock all previous guys and try again.  xfs_iunlock will try
+                * to push the tail if the inode is in the AIL.
+                */
+               attempts++;
+               for (j = i - 1; j >= 0; j--) {
                         /*
-                        * try_lock means we have an inode locked
-                        * that is in the AIL.
+                        * Check to see if we've already unlocked this one.  Not
+                        * the first one going back, and the inode ptr is the
+                        * same.
                          */
-                       ASSERT(i != 0);
-                       if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
-                               attempts++;
-
-                               /*
-                                * Unlock all previous guys and try again.
-                                * xfs_iunlock will try to push the tail
-                                * if the inode is in the AIL.
-                                */
-
-                               for(j = i - 1; j >= 0; j--) {
-
-                                       /*
-                                        * Check to see if we've already
-                                        * unlocked this one.
-                                        * Not the first one going back,
-                                        * and the inode ptr is the same.
-                                        */
-                                       if ((j != (i - 1)) && ips[j] ==
-                                                               ips[j+1])
-                                               continue;
-
-                                       xfs_iunlock(ips[j], lock_mode);
-                               }
+                       if (j != (i - 1) && ips[j] == ips[j + 1])
+                               continue;
  
-                               if ((attempts % 5) == 0) {
-                                       delay(1); /* Don't just spin the CPU */
+                       xfs_iunlock(ips[j], lock_mode);
+               }
+
+               if ((attempts % 5) == 0) {
+                       delay(1); /* Don't just spin the CPU */
  #ifdef DEBUG
-                                       xfs_lock_delays++;
+                       xfs_lock_delays++;
  #endif
-                               }
-                               i = 0;
-                               try_lock = 0;
-                               goto again;
-                       }
-               } else {
-                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
                 }
+               i = 0;
+               try_lock = 0;
+               goto again;
         }
  
  #ifdef DEBUG
@@ -440,10 +489,10 @@ again:
  }
  
  /*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
   */
  void
  xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
         int                     attempts = 0;
         xfs_log_item_t          *lp;
  
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-               ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+
         ASSERT(ip0->i_ino != ip1->i_ino);
  
         if (ip0->i_ino > ip1->i_ino) {
@@ -2615,19 +2668,22 @@ xfs_remove(
  /*
   * Enter all inodes for a rename transaction into a sorted array.
   */
+#define __XFS_SORT_INODES      5
  STATIC void
  xfs_sort_for_rename(
-       xfs_inode_t     *dp1,   /* in: old (source) directory inode */
-       xfs_inode_t     *dp2,   /* in: new (target) directory inode */
-       xfs_inode_t     *ip1,   /* in: inode of old entry */
-       xfs_inode_t     *ip2,   /* in: inode of new entry, if it
-                                  already exists, NULL otherwise. */
-       xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
-       int             *num_inodes)  /* out: number of inodes in array */
+       struct xfs_inode        *dp1,   /* in: old (source) directory inode */
+       struct xfs_inode        *dp2,   /* in: new (target) directory inode */
+       struct xfs_inode        *ip1,   /* in: inode of old entry */
+       struct xfs_inode        *ip2,   /* in: inode of new entry */
+       struct xfs_inode        *wip,   /* in: whiteout inode */
+       struct xfs_inode        **i_tab,/* out: sorted array of inodes */
+       int                     *num_inodes)  /* in/out: inodes in array */
  {
-       xfs_inode_t             *temp;
         int                     i, j;
  
+       ASSERT(*num_inodes == __XFS_SORT_INODES);
+       memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
+
         /*
          * i_tab contains a list of pointers to inodes.  We initialize
          * the table here & we'll sort it.  We will then use it to
@@ -2635,25 +2691,24 @@ xfs_sort_for_rename(
          *
          * Note that the table may contain duplicates.  e.g., dp1 == dp2.
          */
-       i_tab[0] = dp1;
-       i_tab[1] = dp2;
-       i_tab[2] = ip1;
-       if (ip2) {
-               *num_inodes = 4;
-               i_tab[3] = ip2;
-       } else {
-               *num_inodes = 3;
-               i_tab[3] = NULL;
-       }
+       i = 0;
+       i_tab[i++] = dp1;
+       i_tab[i++] = dp2;
+       i_tab[i++] = ip1;
+       if (ip2)
+               i_tab[i++] = ip2;
+       if (wip)
+               i_tab[i++] = wip;
+       *num_inodes = i;
  
         /*
          * Sort the elements via bubble sort.  (Remember, there are at
-        * most 4 elements to sort, so this is adequate.)
+        * most 5 elements to sort, so this is adequate.)
          */
         for (i = 0; i < *num_inodes; i++) {
                 for (j = 1; j < *num_inodes; j++) {
                         if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
-                               temp = i_tab[j];
+                               struct xfs_inode *temp = i_tab[j];
                                 i_tab[j] = i_tab[j-1];
                                 i_tab[j-1] = temp;
                         }
@@ -2661,6 +2716,31 @@ xfs_sort_for_rename(
         }
  }
  
+static int
+xfs_finish_rename(
+       struct xfs_trans        *tp,
+       struct xfs_bmap_free    *free_list)
+{
+       int                     committed = 0;
+       int                     error;
+
+       /*
+        * If this is a synchronous mount, make sure that the rename transaction
+        * goes to disk before returning to the user.
+        */
+       if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+               xfs_trans_set_sync(tp);
+
+       error = xfs_bmap_finish(&tp, free_list, &committed);
+       if (error) {
+               xfs_bmap_cancel(free_list);
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+               return error;
+       }
+
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
+
  /*
   * xfs_cross_rename()
   *
@@ -2689,14 +2769,14 @@ xfs_cross_rename(
                                 ip2->i_ino,
                                 first_block, free_list, spaceres);
         if (error)
-               goto out;
+               goto out_trans_abort;
  
         /* Swap inode number for dirent in second parent */
         error = xfs_dir_replace(tp, dp2, name2,
                                 ip1->i_ino,
                                 first_block, free_list, spaceres);
         if (error)
-               goto out;
+               goto out_trans_abort;
  
         /*
          * If we're renaming one or more directories across different parents,
@@ -2711,16 +2791,16 @@ xfs_cross_rename(
                                                 dp1->i_ino, first_block,
                                                 free_list, spaceres);
                         if (error)
-                               goto out;
+                               goto out_trans_abort;
  
                         /* transfer ip2 ".." reference to dp1 */
                         if (!S_ISDIR(ip1->i_d.di_mode)) {
                                 error = xfs_droplink(tp, dp2);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                 error = xfs_bumplink(tp, dp1);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                         }
  
                         /*
@@ -2738,16 +2818,16 @@ xfs_cross_rename(
                                                 dp2->i_ino, first_block,
                                                 free_list, spaceres);
                         if (error)
-                               goto out;
+                               goto out_trans_abort;
  
                         /* transfer ip1 ".." reference to dp2 */
                         if (!S_ISDIR(ip2->i_d.di_mode)) {
                                 error = xfs_droplink(tp, dp1);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                 error = xfs_bumplink(tp, dp2);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                         }
  
                         /*
@@ -2775,66 +2855,108 @@ xfs_cross_rename(
         }
         xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-out:
+       return xfs_finish_rename(tp, free_list);
+
+out_trans_abort:
+       xfs_bmap_cancel(free_list);
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
         return error;
  }
  
+/*
+ * xfs_rename_alloc_whiteout()
+ *
+ * Return a referenced, unlinked, unlocked inode that that can be used as a
+ * whiteout in a rename transaction. We use a tmpfile inode here so that if we
+ * crash between allocating the inode and linking it into the rename transaction
+ * recovery will free the inode and we won't leak it.
+ */
+static int
+xfs_rename_alloc_whiteout(
+       struct xfs_inode        *dp,
+       struct xfs_inode        **wip)
+{
+       struct xfs_inode        *tmpfile;
+       int                     error;
+
+       error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+       if (error)
+               return error;
+
+       /* Satisfy xfs_bumplink that this is a real tmpfile */
+       xfs_finish_inode_setup(tmpfile);
+       VFS_I(tmpfile)->i_state |= I_LINKABLE;
+
+       *wip = tmpfile;
+       return 0;
+}
+
  /*
   * xfs_rename
   */
  int
  xfs_rename(
-       xfs_inode_t     *src_dp,
-       struct xfs_name *src_name,
-       xfs_inode_t     *src_ip,
-       xfs_inode_t     *target_dp,
-       struct xfs_name *target_name,
-       xfs_inode_t     *target_ip,
-       unsigned int    flags)
+       struct xfs_inode        *src_dp,
+       struct xfs_name         *src_name,
+       struct xfs_inode        *src_ip,
+       struct xfs_inode        *target_dp,
+       struct xfs_name         *target_name,
+       struct xfs_inode        *target_ip,
+       unsigned int            flags)
  {
-       xfs_trans_t     *tp = NULL;
-       xfs_mount_t     *mp = src_dp->i_mount;
-       int             new_parent;             /* moving to a new dir */
-       int             src_is_directory;       /* src_name is a directory */
-       int             error;
-       xfs_bmap_free_t free_list;
-       xfs_fsblock_t   first_block;
-       int             cancel_flags;
-       int             committed;
-       xfs_inode_t     *inodes[4];
-       int             spaceres;
-       int             num_inodes;
+       struct xfs_mount        *mp = src_dp->i_mount;
+       struct xfs_trans        *tp;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       struct xfs_inode        *wip = NULL;            /* whiteout inode */
+       struct xfs_inode        *inodes[__XFS_SORT_INODES];
+       int                     num_inodes = __XFS_SORT_INODES;
+       bool                    new_parent = (src_dp != target_dp);
+       bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       int                     cancel_flags = 0;
+       int                     spaceres;
+       int                     error;
  
         trace_xfs_rename(src_dp, target_dp, src_name, target_name);
  
-       new_parent = (src_dp != target_dp);
-       src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       if ((flags & RENAME_EXCHANGE) && !target_ip)
+               return -EINVAL;
+
+       /*
+        * If we are doing a whiteout operation, allocate the whiteout inode
+        * we will be placing at the target and ensure the type is set
+        * appropriately.
+        */
+       if (flags & RENAME_WHITEOUT) {
+               ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
+               error = xfs_rename_alloc_whiteout(target_dp, &wip);
+               if (error)
+                       return error;
+
+               /* setup target dirent info as whiteout */
+               src_name->type = XFS_DIR3_FT_CHRDEV;
+       }
  
-       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
                                 inodes, &num_inodes);
  
-       xfs_bmap_init(&free_list, &first_block);
         tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
         spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
         if (error == -ENOSPC) {
                 spaceres = 0;
                 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
         }
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
  
         /*
          * Attach the dquots to the inodes
          */
         error = xfs_qm_vop_rename_dqattach(inodes);
-       if (error) {
-               xfs_trans_cancel(tp, cancel_flags);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
  
         /*
          * Lock all the participating inodes. Depending upon whether
@@ -2855,6 +2977,8 @@ xfs_rename(
         xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
         if (target_ip)
                 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
+       if (wip)
+               xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
  
         /*
          * If we are using project inheritance, we only allow renames
@@ -2864,20 +2988,16 @@ xfs_rename(
         if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
                      (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                 error = -EXDEV;
-               goto error_return;
+               goto out_trans_cancel;
         }
  
-       /*
-        * Handle RENAME_EXCHANGE flags
-        */
-       if (flags & RENAME_EXCHANGE) {
-               error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
-                                        target_dp, target_name, target_ip,
-                                        &free_list, &first_block, spaceres);
-               if (error)
-                       goto abort_return;
-               goto finish_rename;
-       }
+       xfs_bmap_init(&free_list, &first_block);
+
+       /* RENAME_EXCHANGE is unique from here on. */
+       if (flags & RENAME_EXCHANGE)
+               return xfs_cross_rename(tp, src_dp, src_name, src_ip,
+                                       target_dp, target_name, target_ip,
+                                       &free_list, &first_block, spaceres);
  
         /*
          * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
                 if (!spaceres) {
                         error = xfs_dir_canenter(tp, target_dp, target_name);
                         if (error)
-                               goto error_return;
+                               goto out_trans_cancel;
                 }
                 /*
                  * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
                                                 src_ip->i_ino, &first_block,
                                                 &free_list, spaceres);
                 if (error == -ENOSPC)
-                       goto error_return;
+                       goto out_bmap_cancel;
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 xfs_trans_ichgtime(tp, target_dp,
                                         XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
                 if (new_parent && src_is_directory) {
                         error = xfs_bumplink(tp, target_dp);
                         if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                 }
         } else { /* target_ip != NULL */
                 /*
@@ -2926,7 +3046,7 @@ xfs_rename(
                         if (!(xfs_dir_isempty(target_ip)) ||
                             (target_ip->i_d.di_nlink > 2)) {
                                 error = -EEXIST;
-                               goto error_return;
+                               goto out_trans_cancel;
                         }
                 }
  
@@ -2943,7 +3063,7 @@ xfs_rename(
                                         src_ip->i_ino,
                                         &first_block, &free_list, spaceres);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 xfs_trans_ichgtime(tp, target_dp,
                                         XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
                  */
                 error = xfs_droplink(tp, target_ip);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 if (src_is_directory) {
                         /*
@@ -2962,7 +3082,7 @@ xfs_rename(
                          */
                         error = xfs_droplink(tp, target_ip);
                         if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                 }
         } /* target_ip != NULL */
  
@@ -2979,7 +3099,7 @@ xfs_rename(
                                         &first_block, &free_list, spaceres);
                 ASSERT(error != -EEXIST);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
         }
  
         /*
@@ -3005,49 +3125,67 @@ xfs_rename(
                  */
                 error = xfs_droplink(tp, src_dp);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
         }
  
-       error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+       /*
+        * For whiteouts, we only need to update the source dirent with the
+        * inode number of the whiteout inode rather than removing it
+        * altogether.
+        */
+       if (wip) {
+               error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
                                         &first_block, &free_list, spaceres);
+       } else
+               error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+                                          &first_block, &free_list, spaceres);
         if (error)
-               goto abort_return;
-
-       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-       if (new_parent)
-               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+               goto out_trans_abort;
  
-finish_rename:
         /*
-        * If this is a synchronous mount, make sure that the
-        * rename transaction goes to disk before returning to
-        * the user.
+        * For whiteouts, we need to bump the link count on the whiteout inode.
+        * This means that failures all the way up to this point leave the inode
+        * on the unlinked list and so cleanup is a simple matter of dropping
+        * the remaining reference to it. If we fail here after bumping the link
+        * count, we're shutting down the filesystem so we'll never see the
+        * intermediate state on disk.
          */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-               xfs_trans_set_sync(tp);
-       }
+       if (wip) {
+               ASSERT(wip->i_d.di_nlink == 0);
+               error = xfs_bumplink(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               error = xfs_iunlink_remove(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
  
-       error = xfs_bmap_finish(&tp, &free_list, &committed);
-       if (error) {
-               xfs_bmap_cancel(&free_list);
-               xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT));
-               goto std_return;
+               /*
+                * Now we have a real link, clear the "I'm a tmpfile" state
+                * flag from the inode so it doesn't accidentally get misused in
+                * future.
+                */
+               VFS_I(wip)->i_state &= ~I_LINKABLE;
         }
  
-       /*
-        * trans_commit will unlock src_ip, target_ip & decrement
-        * the vnode references.
-        */
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+       if (new_parent)
+               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
  
- abort_return:
+       error = xfs_finish_rename(tp, &free_list);
+       if (wip)
+               IRELE(wip);
+       return error;
+
+out_trans_abort:
         cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_bmap_cancel:
         xfs_bmap_cancel(&free_list);
+out_trans_cancel:
         xfs_trans_cancel(tp, cancel_flags);
- std_return:
+       if (wip)
+               IRELE(wip);
         return error;
  }
  
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 8e82b41d2050c2a2266eb38310453cb9d3f535ba..8f22d20368d8ff4c0232174f8bdc7c1e1bcf28eb 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
         struct xfs_inode_log_item *i_itemp;     /* logging information */
         mrlock_t                i_lock;         /* inode lock */
         mrlock_t                i_iolock;       /* inode IO lock */
+       mrlock_t                i_mmaplock;     /* inode mmap IO lock */
         atomic_t                i_pincount;     /* inode pin count */
         spinlock_t              i_flags_lock;   /* inode i_flags lock */
         /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  #define        XFS_IOLOCK_SHARED       (1<<1)
  #define        XFS_ILOCK_EXCL          (1<<2)
  #define        XFS_ILOCK_SHARED        (1<<3)
+#define        XFS_MMAPLOCK_EXCL       (1<<4)
+#define        XFS_MMAPLOCK_SHARED     (1<<5)
  
  #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                               | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
  
  #define XFS_LOCK_FLAGS \
         { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
         { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
         { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
+       { XFS_MMAPLOCK_EXCL,    "MMAPLOCK_EXCL" }, \
+       { XFS_MMAPLOCK_SHARED,  "MMAPLOCK_SHARED" }
  
  
  /*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  #define XFS_IOLOCK_SHIFT       16
  #define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
  
+#define XFS_MMAPLOCK_SHIFT     20
+
  #define XFS_ILOCK_SHIFT                24
  #define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
  #define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
  #define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
  
-#define XFS_IOLOCK_DEP_MASK    0x00ff0000
+#define XFS_IOLOCK_DEP_MASK    0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK  0x00f00000
  #define XFS_ILOCK_DEP_MASK     0xff000000
-#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
+#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | \
+                                XFS_MMAPLOCK_DEP_MASK | \
+                                XFS_ILOCK_DEP_MASK)
  
-#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
-#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) \
+                                       >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags)        (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+                                       >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) \
+                                       >> XFS_ILOCK_SHIFT)
  
  /*
   * For multiple groups support: if S_ISGID bit is set in the parent
@@ -384,10 +399,11 @@ enum xfs_prealloc_flags {
         XFS_PREALLOC_INVISIBLE  = (1 << 4),
  };
  
-int            xfs_update_prealloc_flags(struct xfs_inode *,
-                       enum xfs_prealloc_flags);
-int            xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-int            xfs_iozero(struct xfs_inode *, loff_t, size_t);
+int    xfs_update_prealloc_flags(struct xfs_inode *ip,
+                                 enum xfs_prealloc_flags flags);
+int    xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
+                    xfs_fsize_t isize, bool *did_zeroing);
+int    xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
  
  
  /* from xfs_iops.c */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index ac4feae45eb308c39629f177c0b6620fae77fb69..4ee44ddfdfb766738ff33fd35375e4eb87a4dba0 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
  
         if (filp->f_flags & O_DSYNC)
                 flags |= XFS_PREALLOC_SYNC;
-       if (ioflags & XFS_IO_INVIS)     
+       if (ioflags & XFS_IO_INVIS)
                 flags |= XFS_PREALLOC_INVISIBLE;
  
         error = mnt_want_write_file(filp);
@@ -643,6 +643,9 @@ xfs_ioc_space(
         if (error)
                 goto out_unlock;
  
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
         switch (bf->l_whence) {
         case 0: /*SEEK_SET*/
                 break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index ccb1dd0d509ef8f9d50f1fb0d51e6c4fe6712237..38e633bad8c2a2c919d1b1c9a4b9bda1d24d2ec4 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
         alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
                                        alloc_blocks);
  
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-       freesp = mp->m_sb.sb_fdblocks;
+       freesp = percpu_counter_read_positive(&mp->m_fdblocks);
         if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
                 shift = 2;
                 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index d7782ae1af3c1b966ac6303ccfab64d36ceeed96..015d6a366b1670f323053fcef8281326bb75745c 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -394,7 +394,7 @@ xfs_vn_rename(
         struct xfs_name oname;
         struct xfs_name nname;
  
-       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                 return -EINVAL;
  
         /* if we are exchanging files, we need to set i_mode of both files */
@@ -756,6 +756,7 @@ xfs_setattr_size(
         int                     error;
         uint                    lock_flags = 0;
         uint                    commit_flags = 0;
+       bool                    did_zeroing = false;
  
         trace_xfs_setattr(ip);
  
@@ -770,6 +771,7 @@ xfs_setattr_size(
                 return error;
  
         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
         ASSERT(S_ISREG(ip->i_d.di_mode));
         ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -799,20 +801,16 @@ xfs_setattr_size(
                 return error;
  
         /*
-        * Now we can make the changes.  Before we join the inode to the
-        * transaction, take care of the part of the truncation that must be
-        * done without the inode lock.  This needs to be done before joining
-        * the inode to the transaction, because the inode cannot be unlocked
-        * once it is a part of the transaction.
+        * File data changes must be complete before we start the transaction to
+        * modify the inode.  This needs to be done before joining the inode to
+        * the transaction because the inode cannot be unlocked once it is a
+        * part of the transaction.
+        *
+        * Start with zeroing any data block beyond EOF that we may expose on
+        * file extension.
          */
         if (newsize > oldsize) {
-               /*
-                * Do the first part of growing a file: zero any data in the
-                * last block that is beyond the old EOF.  We need to do this
-                * before the inode is joined to the transaction to modify
-                * i_size.
-                */
-               error = xfs_zero_eof(ip, newsize, oldsize);
+               error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
                 if (error)
                         return error;
         }
@@ -822,75 +820,42 @@ xfs_setattr_size(
          * any previous writes that are beyond the on disk EOF and the new
          * EOF that have not been written out need to be written here.  If we
          * do not write the data out, we expose ourselves to the null files
-        * problem.
-        *
-        * Only flush from the on disk size to the smaller of the in memory
-        * file size or the new size as that's the range we really care about
-        * here and prevents waiting for other data not within the range we
-        * care about here.
+        * problem. Note that this includes any block zeroing we did above;
+        * otherwise those blocks may not be zeroed after a crash.
          */
-       if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+       if (newsize > ip->i_d.di_size &&
+           (oldsize != ip->i_d.di_size || did_zeroing)) {
                 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                                       ip->i_d.di_size, newsize);
                 if (error)
                         return error;
         }
  
-       /*
-        * Wait for all direct I/O to complete.
-        */
+       /* Now wait for all direct I/O to complete. */
         inode_dio_wait(inode);
  
         /*
-        * Do all the page cache truncate work outside the transaction context
-        * as the "lock" order is page lock->log space reservation.  i.e.
-        * locking pages inside the transaction can ABBA deadlock with
-        * writeback. We have to do the VFS inode size update before we truncate
-        * the pagecache, however, to avoid racing with page faults beyond the
-        * new EOF they are not serialised against truncate operations except by
-        * page locks and size updates.
+        * We've already locked out new page faults, so now we can safely remove
+        * pages from the page cache knowing they won't get refaulted until we
+        * drop the XFS_MMAP_EXCL lock after the extent manipulations are
+        * complete. The truncate_setsize() call also cleans partial EOF page
+        * PTEs on extending truncates and hence ensures sub-page block size
+        * filesystems are correctly handled, too.
          *
-        * Hence we are in a situation where a truncate can fail with ENOMEM
-        * from xfs_trans_reserve(), but having already truncated the in-memory
-        * version of the file (i.e. made user visible changes). There's not
-        * much we can do about this, except to hope that the caller sees ENOMEM
-        * and retries the truncate operation.
+        * We have to do all the page cache truncate work outside the
+        * transaction context as the "lock" order is page lock->log space
+        * reservation as defined by extent allocation in the writeback path.
+        * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
+        * having already truncated the in-memory version of the file (i.e. made
+        * user visible changes). There's not much we can do about this, except
+        * to hope that the caller sees ENOMEM and retries the truncate
+        * operation.
          */
         error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
         if (error)
                 return error;
         truncate_setsize(inode, newsize);
  
-       /*
-        * The "we can't serialise against page faults" pain gets worse.
-        *
-        * If the file is mapped then we have to clean the page at the old EOF
-        * when extending the file. Extending the file can expose changes the
-        * underlying page mapping (e.g. from beyond EOF to a hole or
-        * unwritten), and so on the next attempt to write to that page we need
-        * to remap it for write. i.e. we need .page_mkwrite() to be called.
-        * Hence we need to clean the page to clean the pte and so a new write
-        * fault will be triggered appropriately.
-        *
-        * If we do it before we change the inode size, then we can race with a
-        * page fault that maps the page with exactly the same problem. If we do
-        * it after we change the file size, then a new page fault can come in
-        * and allocate space before we've run the rest of the truncate
-        * transaction. That's kinda grotesque, but it's better than have data
-        * over a hole, and so that's the lesser evil that has been chosen here.
-        *
-        * The real solution, however, is to have some mechanism for locking out
-        * page faults while a truncate is in progress.
-        */
-       if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
-               error = filemap_write_and_wait_range(
-                               VFS_I(ip)->i_mapping,
-                               round_down(oldsize, PAGE_CACHE_SIZE),
-                               round_up(oldsize, PAGE_CACHE_SIZE) - 1);
-               if (error)
-                       return error;
-       }
-
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
         if (error)
@@ -989,8 +954,12 @@ xfs_vn_setattr(
  
                 xfs_ilock(ip, iolock);
                 error = xfs_break_layouts(dentry->d_inode, &iolock);
-               if (!error)
+               if (!error) {
+                       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+                       iolock |= XFS_MMAPLOCK_EXCL;
+
                         error = xfs_setattr_size(ip, iattr);
+               }
                 xfs_iunlock(ip, iolock);
         } else {
                 error = xfs_setattr_nonsize(ip, iattr, 0);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

index c31d2c2eadc4445606d53cbd0c744b9143345375..7c7842c85a082f92c99b25f6ca818c1bf32a7224 100644 (file)
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
  #undef XFS_NATIVE_HOST
  #endif
  
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
  #define irix_sgid_inherit      xfs_params.sgid_inherit.val
  #define irix_symlink_mode      xfs_params.symlink_mode.val
  #define xfs_panic_mask         xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index a5a945fc3bdc700e218c15f3d9511e9d390cba8f..4f5784f85a5b22e4bad0946106dcb1302f817ace 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
         xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
         ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
         ASSERT(xfs_sb_good_version(sbp));
+       xfs_reinit_percpu_counters(log->l_mp);
+
         xfs_buf_relse(bp);
  
-       /* We've re-read the superblock so re-initialize per-cpu counters */
-       xfs_icsb_reinit_counters(log->l_mp);
  
         xlog_recover_check_summary(log);
  
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 4fa80e63eea2ab86d786af16cb03ec3bf9e49dc1..2ce7ee3b4ec1fdb9e9344a1ec7ea3a2df5a3b29c 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
  #include "xfs_sysfs.h"
  
  
-#ifdef HAVE_PERCPU_SB
-STATIC void    xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-
-#define xfs_icsb_balance_counter(mp, a, b)             do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)      do { } while (0)
-#endif
-
  static DEFINE_MUTEX(xfs_uuid_table_mutex);
  static int xfs_uuid_table_size;
  static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
                 goto reread;
         }
  
-       /* Initialize per-cpu counters */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
  
         /* no need to be quiet anymore, so reset the buf ops */
         bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
         if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
                 return 0;
  
-       xfs_icsb_sync_counters(mp, 0);
-
         /*
          * we don't need to do this if we are updating the superblock
          * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
         return xfs_sync_sb(mp, true);
  }
  
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
- * a delta to a specified field in the in-core superblock.  Simply
- * switch on the field indicated and apply the delta to that field.
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
+int
+xfs_mod_icount(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
  {
-       int             scounter;       /* short counter for 32 bit fields */
-       long long       lcounter;       /* long counter for 64 bit fields */
-       long long       res_used, rem;
-
-       /*
-        * With the in-core superblock spin lock held, switch
-        * on the indicated field.  Apply the delta to the
-        * proper field.  If the fields value would dip below
-        * 0, then do not apply the delta and return EINVAL.
-        */
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = (long long)mp->m_sb.sb_icount;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_icount = lcounter;
-               return 0;
-       case XFS_SBS_IFREE:
-               lcounter = (long long)mp->m_sb.sb_ifree;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_ifree = lcounter;
-               return 0;
-       case XFS_SBS_FDBLOCKS:
-               lcounter = (long long)
-                       mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-
-               if (delta > 0) {                /* Putting blocks back */
-                       if (res_used > delta) {
-                               mp->m_resblks_avail += delta;
-                       } else {
-                               rem = delta - res_used;
-                               mp->m_resblks_avail = mp->m_resblks;
-                               lcounter += rem;
-                       }
-               } else {                                /* Taking blocks away */
-                       lcounter += delta;
-                       if (lcounter >= 0) {
-                               mp->m_sb.sb_fdblocks = lcounter +
-                                                       XFS_ALLOC_SET_ASIDE(mp);
-                               return 0;
-                       }
-
-                       /*
-                        * We are out of blocks, use any available reserved
-                        * blocks if were allowed to.
-                        */
-                       if (!rsvd)
-                               return -ENOSPC;
-
-                       lcounter = (long long)mp->m_resblks_avail + delta;
-                       if (lcounter >= 0) {
-                               mp->m_resblks_avail = lcounter;
-                               return 0;
-                       }
-                       printk_once(KERN_WARNING
-                               "Filesystem \"%s\": reserve blocks depleted! "
-                               "Consider increasing reserve pool size.",
-                               mp->m_fsname);
-                       return -ENOSPC;
-               }
-
-               mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               return 0;
-       case XFS_SBS_FREXTENTS:
-               lcounter = (long long)mp->m_sb.sb_frextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       return -ENOSPC;
-               }
-               mp->m_sb.sb_frextents = lcounter;
-               return 0;
-       case XFS_SBS_DBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_dblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_dblocks = lcounter;
-               return 0;
-       case XFS_SBS_AGCOUNT:
-               scounter = mp->m_sb.sb_agcount;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_agcount = scounter;
-               return 0;
-       case XFS_SBS_IMAX_PCT:
-               scounter = mp->m_sb.sb_imax_pct;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_imax_pct = scounter;
-               return 0;
-       case XFS_SBS_REXTSIZE:
-               scounter = mp->m_sb.sb_rextsize;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextsize = scounter;
-               return 0;
-       case XFS_SBS_RBMBLOCKS:
-               scounter = mp->m_sb.sb_rbmblocks;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rbmblocks = scounter;
-               return 0;
-       case XFS_SBS_RBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_rblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rblocks = lcounter;
-               return 0;
-       case XFS_SBS_REXTENTS:
-               lcounter = (long long)mp->m_sb.sb_rextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextents = lcounter;
-               return 0;
-       case XFS_SBS_REXTSLOG:
-               scounter = mp->m_sb.sb_rextslog;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextslog = scounter;
-               return 0;
-       default:
+       /* deltas are +/-64, hence the large batch size of 128. */
+       __percpu_counter_add(&mp->m_icount, delta, 128);
+       if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
                 ASSERT(0);
+               percpu_counter_add(&mp->m_icount, -delta);
                 return -EINVAL;
         }
+       return 0;
  }
  
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta.  This modification
- * is protected by the m_sb_lock.  Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
  int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
         struct xfs_mount        *mp,
-       xfs_sb_field_t          field,
-       int64_t                 delta,
-       int                     rsvd)
+       int64_t                 delta)
  {
-       int                     status;
-
-#ifdef HAVE_PERCPU_SB
-       ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-#endif
-       spin_lock(&mp->m_sb_lock);
-       status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       return status;
+       percpu_counter_add(&mp->m_ifree, delta);
+       if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
+               ASSERT(0);
+               percpu_counter_add(&mp->m_ifree, -delta);
+               return -EINVAL;
+       }
+       return 0;
  }
  
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be applied or none of them will.  If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
  int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
         struct xfs_mount        *mp,
-       xfs_mod_sb_t            *msb,
-       uint                    nmsb,
-       int                     rsvd)
+       int64_t                 delta,
+       bool                    rsvd)
  {
-       xfs_mod_sb_t            *msbp;
-       int                     error = 0;
+       int64_t                 lcounter;
+       long long               res_used;
+       s32                     batch;
+
+       if (delta > 0) {
+               /*
+                * If the reserve pool is depleted, put blocks back into it
+                * first. Most of the time the pool is full.
+                */
+               if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+                       return 0;
+               }
+
+               spin_lock(&mp->m_sb_lock);
+               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+
+               if (res_used > delta) {
+                       mp->m_resblks_avail += delta;
+               } else {
+                       delta -= res_used;
+                       mp->m_resblks_avail = mp->m_resblks;
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+               }
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
+       }
  
         /*
-        * Loop through the array of mod structures and apply each individually.
-        * If any fail, then back out all those which have already been applied.
-        * Do all of this within the scope of the m_sb_lock so that all of the
-        * changes will be atomic.
+        * Taking blocks away, need to be more accurate the closer we
+        * are to zero.
+        *
+        * batch size is set to a maximum of 1024 blocks - if we are
+        * allocating of freeing extents larger than this then we aren't
+        * going to be hammering the counter lock so a lock per update
+        * is not a problem.
+        *
+        * If the counter has a value of less than 2 * max batch size,
+        * then make everything serialise as we are real close to
+        * ENOSPC.
+        */
+#define __BATCH        1024
+       if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+               batch = 1;
+       else
+               batch = __BATCH;
+#undef __BATCH
+
+       __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+       if (percpu_counter_compare(&mp->m_fdblocks,
+                                  XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+               /* we had space! */
+               return 0;
+       }
+
+       /*
+        * lock up the sb for dipping into reserves before releasing the space
+        * that took us to ENOSPC.
          */
         spin_lock(&mp->m_sb_lock);
-       for (msbp = msb; msbp < (msb + nmsb); msbp++) {
-               ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
-                      msbp->msb_field > XFS_SBS_FDBLOCKS);
+       percpu_counter_add(&mp->m_fdblocks, -delta);
+       if (!rsvd)
+               goto fdblocks_enospc;
  
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  msbp->msb_delta, rsvd);
-               if (error)
-                       goto unwind;
+       lcounter = (long long)mp->m_resblks_avail + delta;
+       if (lcounter >= 0) {
+               mp->m_resblks_avail = lcounter;
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
         }
+       printk_once(KERN_WARNING
+               "Filesystem \"%s\": reserve blocks depleted! "
+               "Consider increasing reserve pool size.",
+               mp->m_fsname);
+fdblocks_enospc:
         spin_unlock(&mp->m_sb_lock);
-       return 0;
+       return -ENOSPC;
+}
  
-unwind:
-       while (--msbp >= msb) {
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  -msbp->msb_delta, rsvd);
-               ASSERT(error == 0);
-       }
+int
+xfs_mod_frextents(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
+{
+       int64_t                 lcounter;
+       int                     ret = 0;
+
+       spin_lock(&mp->m_sb_lock);
+       lcounter = mp->m_sb.sb_frextents + delta;
+       if (lcounter < 0)
+               ret = -ENOSPC;
+       else
+               mp->m_sb.sb_frextents = lcounter;
         spin_unlock(&mp->m_sb_lock);
-       return error;
+       return ret;
  }
  
  /*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
         }
         return 0;
  }
-
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g.  free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- *     1. m_sb_lock before picking up per-cpu locks
- *     2. per-cpu locks always picked up via for_each_online_cpu() order
- *     3. accurate counter sync requires m_sb_lock + per cpu locks
- *     4. modifying per-cpu counters requires holding per-cpu lock
- *     5. modifying global counters requires holding m_sb_lock
- *     6. enabling or disabling a counter requires holding the m_sb_lock 
- *        and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
-       struct notifier_block *nfb,
-       unsigned long action,
-       void *hcpu)
-{
-       xfs_icsb_cnts_t *cntp;
-       xfs_mount_t     *mp;
-
-       mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
-       cntp = (xfs_icsb_cnts_t *)
-                       per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               /* Easy Case - initialize the area and locks, and
-                * then rebalance when online does everything else for us. */
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-               break;
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               xfs_icsb_lock(mp);
-               xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-               xfs_icsb_unlock(mp);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /* Disable all the counters, then fold the dead cpu's
-                * count into the total on the global superblock and
-                * re-enable the counters. */
-               xfs_icsb_lock(mp);
-               spin_lock(&mp->m_sb_lock);
-               xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
-               xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
-               xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-
-               mp->m_sb.sb_icount += cntp->icsb_icount;
-               mp->m_sb.sb_ifree += cntp->icsb_ifree;
-               mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
-               spin_unlock(&mp->m_sb_lock);
-               xfs_icsb_unlock(mp);
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int
-xfs_icsb_init_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-       if (mp->m_sb_cnts == NULL)
-               return -ENOMEM;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-       }
-
-       mutex_init(&mp->m_icsb_mutex);
-
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-
-#ifdef CONFIG_HOTPLUG_CPU
-       mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-       mp->m_icsb_notifier.priority = 0;
-       register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
-       return 0;
-}
-
-void
-xfs_icsb_reinit_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_lock(mp);
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-       xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-       xfs_icsb_unlock(mp);
-}
-
-void
-xfs_icsb_destroy_counters(
-       xfs_mount_t     *mp)
-{
-       if (mp->m_sb_cnts) {
-               unregister_hotcpu_notifier(&mp->m_icsb_notifier);
-               free_percpu(mp->m_sb_cnts);
-       }
-       mutex_destroy(&mp->m_icsb_mutex);
-}
-
-STATIC void
-xfs_icsb_lock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
-               ndelay(1000);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-
-
-STATIC void
-xfs_icsb_lock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_lock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_unlock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_count(
-       xfs_mount_t     *mp,
-       xfs_icsb_cnts_t *cnt,
-       int             flags)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_lock_all_counters(mp);
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               cnt->icsb_icount += cntp->icsb_icount;
-               cnt->icsb_ifree += cntp->icsb_ifree;
-               cnt->icsb_fdblocks += cntp->icsb_fdblocks;
-       }
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC int
-xfs_icsb_counter_disabled(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-       return test_bit(field, &mp->m_icsb_counters);
-}
-
-STATIC void
-xfs_icsb_disable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       xfs_icsb_cnts_t cnt;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       /*
-        * If we are already disabled, then there is nothing to do
-        * here. We check before locking all the counters to avoid
-        * the expensive lock operation when being called in the
-        * slow path and the counter is already disabled. This is
-        * safe because the only time we set or clear this state is under
-        * the m_icsb_mutex.
-        */
-       if (xfs_icsb_counter_disabled(mp, field))
-               return;
-
-       xfs_icsb_lock_all_counters(mp);
-       if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
-               /* drain back to superblock */
-
-               xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
-               switch(field) {
-               case XFS_SBS_ICOUNT:
-                       mp->m_sb.sb_icount = cnt.icsb_icount;
-                       break;
-               case XFS_SBS_IFREE:
-                       mp->m_sb.sb_ifree = cnt.icsb_ifree;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-                       break;
-               default:
-                       BUG();
-               }
-       }
-
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC void
-xfs_icsb_enable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       uint64_t        count,
-       uint64_t        resid)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       xfs_icsb_lock_all_counters(mp);
-       for_each_online_cpu(i) {
-               cntp = per_cpu_ptr(mp->m_sb_cnts, i);
-               switch (field) {
-               case XFS_SBS_ICOUNT:
-                       cntp->icsb_icount = count + resid;
-                       break;
-               case XFS_SBS_IFREE:
-                       cntp->icsb_ifree = count + resid;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       cntp->icsb_fdblocks = count + resid;
-                       break;
-               default:
-                       BUG();
-                       break;
-               }
-               resid = 0;
-       }
-       clear_bit(field, &mp->m_icsb_counters);
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-void
-xfs_icsb_sync_counters_locked(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       xfs_icsb_cnts_t cnt;
-
-       xfs_icsb_count(mp, &cnt, flags);
-
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
-               mp->m_sb.sb_icount = cnt.icsb_icount;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
-               mp->m_sb.sb_ifree = cnt.icsb_ifree;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
-               mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, flags);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic.  inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-
-#define XFS_ICSB_INO_CNTR_REENABLE     (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
-               (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int             min_per_cpu)
-{
-       uint64_t        count, resid;
-       int             weight = num_online_cpus();
-       uint64_t        min = (uint64_t)min_per_cpu;
-
-       /* disable counter and sync counter */
-       xfs_icsb_disable_counter(mp, field);
-
-       /* update counters  - first CPU gets residual*/
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               count = mp->m_sb.sb_icount;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_IFREE:
-               count = mp->m_sb.sb_ifree;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_FDBLOCKS:
-               count = mp->m_sb.sb_fdblocks;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-                       return;
-               break;
-       default:
-               BUG();
-               count = resid = 0;      /* quiet, gcc */
-               break;
-       }
-
-       xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  fields,
-       int             min_per_cpu)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-int
-xfs_icsb_modify_counters(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
-{
-       xfs_icsb_cnts_t *icsbp;
-       long long       lcounter;       /* long counter for 64 bit fields */
-       int             ret = 0;
-
-       might_sleep();
-again:
-       preempt_disable();
-       icsbp = this_cpu_ptr(mp->m_sb_cnts);
-
-       /*
-        * if the counter is disabled, go to slow path
-        */
-       if (unlikely(xfs_icsb_counter_disabled(mp, field)))
-               goto slow_path;
-       xfs_icsb_lock_cntr(icsbp);
-       if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock_cntr(icsbp);
-               goto slow_path;
-       }
-
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = icsbp->icsb_icount;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_icount = lcounter;
-               break;
-
-       case XFS_SBS_IFREE:
-               lcounter = icsbp->icsb_ifree;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_ifree = lcounter;
-               break;
-
-       case XFS_SBS_FDBLOCKS:
-               BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-
-               lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               break;
-       default:
-               BUG();
-               break;
-       }
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-       return 0;
-
-slow_path:
-       preempt_enable();
-
-       /*
-        * serialise with a mutex so we don't burn lots of cpu on
-        * the superblock lock. We still need to hold the superblock
-        * lock, however, when we modify the global structures.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * Now running atomically.
-        *
-        * If the counter is enabled, someone has beaten us to rebalancing.
-        * Drop the lock and try again in the fast path....
-        */
-       if (!(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock(mp);
-               goto again;
-       }
-
-       /*
-        * The counter is currently disabled. Because we are
-        * running atomically here, we know a rebalance cannot
-        * be in progress. Hence we can go straight to operating
-        * on the global superblock. We do not call xfs_mod_incore_sb()
-        * here even though we need to get the m_sb_lock. Doing so
-        * will cause us to re-enter this function and deadlock.
-        * Hence we get the m_sb_lock ourselves and then call
-        * xfs_mod_incore_sb_unlocked() as the unlocked path operates
-        * directly on the global counters.
-        */
-       spin_lock(&mp->m_sb_lock);
-       ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * Now that we've modified the global superblock, we
-        * may be able to re-enable the distributed counters
-        * (e.g. lots of space just got freed). After that
-        * we are done.
-        */
-       if (ret != -ENOSPC)
-               xfs_icsb_balance_counter(mp, field, 0);
-       xfs_icsb_unlock(mp);
-       return ret;
-
-balance_counter:
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-
-       /*
-        * We may have multiple threads here if multiple per-cpu
-        * counters run dry at the same time. This will mean we can
-        * do more balances than strictly necessary but it is not
-        * the common slowpath case.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * running atomically.
-        *
-        * This will leave the counter in the correct state for future
-        * accesses. After the rebalance, we simply try again and our retry
-        * will either succeed through the fast path or slow path without
-        * another balance operation being required.
-        */
-       xfs_icsb_balance_counter(mp, field, delta);
-       xfs_icsb_unlock(mp);
-       goto again;
-}
-
-#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 0d8abd6364d97e613d98d4f827814c7eb7beb207..8c995a2ccb6f94d01728812e47e361678ce61c22 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
  #ifndef __XFS_MOUNT_H__
  #define        __XFS_MOUNT_H__
  
-#ifdef __KERNEL__
-
  struct xlog;
  struct xfs_inode;
  struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
  struct xfs_dir_ops;
  struct xfs_da_geometry;
  
-#ifdef HAVE_PERCPU_SB
-
-/*
- * Valid per-cpu incore superblock counters. Note that if you add new counters,
- * you may need to define new counter disabled bit field descriptors as there
- * are more possible fields in the superblock that can fit in a bitfield on a
- * 32 bit platform. The XFS_SBS_* values for the current current counters just
- * fit.
- */
-typedef struct xfs_icsb_cnts {
-       uint64_t        icsb_fdblocks;
-       uint64_t        icsb_ifree;
-       uint64_t        icsb_icount;
-       unsigned long   icsb_flags;
-} xfs_icsb_cnts_t;
-
-#define XFS_ICSB_FLAG_LOCK     (1 << 0)        /* counter lock bit */
-
-#define XFS_ICSB_LAZY_COUNT    (1 << 1)        /* accuracy not needed */
-
-extern int     xfs_icsb_init_counters(struct xfs_mount *);
-extern void    xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void    xfs_icsb_destroy_counters(struct xfs_mount *);
-extern void    xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void    xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
-extern int     xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
-                                               int64_t, int);
-
-#else
-#define xfs_icsb_init_counters(mp)             (0)
-#define xfs_icsb_destroy_counters(mp)          do { } while (0)
-#define xfs_icsb_reinit_counters(mp)           do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags)      do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
-#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
-       xfs_mod_incore_sb(mp, field, delta, rsvd)
-#endif
-
  /* dynamic preallocation free space thresholds, 5% down to 1% */
  enum {
         XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
         struct super_block      *m_super;
         xfs_tid_t               m_tid;          /* next unused tid for fs */
         struct xfs_ail          *m_ail;         /* fs active log item list */
-       xfs_sb_t                m_sb;           /* copy of fs superblock */
+
+       struct xfs_sb           m_sb;           /* copy of fs superblock */
         spinlock_t              m_sb_lock;      /* sb counter lock */
+       struct percpu_counter   m_icount;       /* allocated inodes counter */
+       struct percpu_counter   m_ifree;        /* free inodes counter */
+       struct percpu_counter   m_fdblocks;     /* free block counter */
+
         struct xfs_buf          *m_sb_bp;       /* buffer for superblock */
         char                    *m_fsname;      /* filesystem name */
         int                     m_fsname_len;   /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
         const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
         uint                    m_chsize;       /* size of next field */
         atomic_t                m_active_trans; /* number trans frozen */
-#ifdef HAVE_PERCPU_SB
-       xfs_icsb_cnts_t __percpu *m_sb_cnts;    /* per-cpu superblock counters */
-       unsigned long           m_icsb_counters; /* disabled per-cpu counters */
-       struct notifier_block   m_icsb_notifier; /* hotplug cpu notifier */
-       struct mutex            m_icsb_mutex;   /* balancer sync lock */
-#endif
         struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
         struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct delayed_work     m_eofblocks_work; /* background eof blocks
@@ -300,35 +259,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
         return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
  }
  
-/*
- * Per-cpu superblock locking functions
- */
-#ifdef HAVE_PERCPU_SB
-static inline void
-xfs_icsb_lock(xfs_mount_t *mp)
-{
-       mutex_lock(&mp->m_icsb_mutex);
-}
-
-static inline void
-xfs_icsb_unlock(xfs_mount_t *mp)
-{
-       mutex_unlock(&mp->m_icsb_mutex);
-}
-#else
-#define xfs_icsb_lock(mp)
-#define xfs_icsb_unlock(mp)
-#endif
-
-/*
- * This structure is for use by the xfs_mod_incore_sb_batch() routine.
- * xfs_growfs can specify a few fields which are more than int limit
- */
-typedef struct xfs_mod_sb {
-       xfs_sb_field_t  msb_field;      /* Field to modify, see below */
-       int64_t         msb_delta;      /* Change to make to specified field */
-} xfs_mod_sb_t;
-
  /*
   * Per-ag incore structure, copies of information in agf and agi, to improve the
   * performance of allocation group selection.
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
  extern int     xfs_mountfs(xfs_mount_t *mp);
  extern int     xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
                                      xfs_agnumber_t *maxagi);
-
  extern void    xfs_unmountfs(xfs_mount_t *);
-extern int     xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int     xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
-                       uint, int);
+
+extern int     xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+                                bool reserved);
+extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
+
  extern int     xfs_mount_log_sb(xfs_mount_t *);
  extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
  extern int     xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int  xfs_dev_is_read_only(struct xfs_mount *, char *);
  
  extern void    xfs_set_low_space_thresholds(struct xfs_mount *);
  
-#endif /* __KERNEL__ */
-
  #endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c

index 30ecca3037e37bc5354fb9e37deb4db6e163fa8a..f8a674d7f092441fb5fb3ba2a35d3da3f9175e1a 100644 (file)
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
         if (!mru || !mru->lists)
                 return -EINVAL;
  
-       if (radix_tree_preload(GFP_KERNEL))
+       if (radix_tree_preload(GFP_NOFS))
                 return -ENOMEM;
  
         INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index 4b33ef112400cc01507b96632d93775839573d99..365dd57ea760ddd5beaa87d78dc6a1f31d7feb94 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -300,8 +300,10 @@ xfs_fs_commit_blocks(
  
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error)
+       if (error) {
+               xfs_trans_cancel(tp, 0);
                 goto out_drop_iolock;
+       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index c6b22e1e77ed1e647a35baf365568970593da0ad..5538468c7f630d21d16c34551b98f2f4c4c7c5a9 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -841,6 +841,11 @@ xfs_qm_reset_dqcounts(
                  */
                 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
                             "xfs_quotacheck");
+               /*
+                * Reset type in case we are reusing group quota file for
+                * project quotas or vice versa
+                */
+               ddq->d_flags = type;
                 ddq->d_bcount = 0;
                 ddq->d_icount = 0;
                 ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 58453e3255f8802fa0050ac034546e88a3ad4cca..8782b36517b5bfc7c9787eab8af1f83a4b35155a 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
         atomic_set(&ip->i_pincount, 0);
         spin_lock_init(&ip->i_flags_lock);
  
+       mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                      "xfsino", ip->i_ino);
  }
@@ -1013,24 +1015,6 @@ xfs_free_fsname(
         kfree(mp->m_logname);
  }
  
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_notice(mp, "Unmounting Filesystem");
-       xfs_filestream_unmount(mp);
-       xfs_unmountfs(mp);
-
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_destroy_mount_workqueues(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
  STATIC int
  xfs_fs_sync_fs(
         struct super_block      *sb,
@@ -1066,6 +1050,9 @@ xfs_fs_statfs(
         xfs_sb_t                *sbp = &mp->m_sb;
         struct xfs_inode        *ip = XFS_I(dentry->d_inode);
         __uint64_t              fakeinos, id;
+       __uint64_t              icount;
+       __uint64_t              ifree;
+       __uint64_t              fdblocks;
         xfs_extlen_t            lsize;
         __int64_t               ffree;
  
@@ -1076,17 +1063,21 @@ xfs_fs_statfs(
         statp->f_fsid.val[0] = (u32)id;
         statp->f_fsid.val[1] = (u32)(id >> 32);
  
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       icount = percpu_counter_sum(&mp->m_icount);
+       ifree = percpu_counter_sum(&mp->m_ifree);
+       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
  
         spin_lock(&mp->m_sb_lock);
         statp->f_bsize = sbp->sb_blocksize;
         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
         statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       spin_unlock(&mp->m_sb_lock);
+
+       statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       statp->f_bavail = statp->f_bfree;
+
         fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
         if (mp->m_maxicount)
                 statp->f_files = min_t(typeof(statp->f_files),
                                         statp->f_files,
@@ -1098,10 +1089,9 @@ xfs_fs_statfs(
                                         sbp->sb_icount);
  
         /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       ffree = statp->f_files - (icount - ifree);
         statp->f_ffree = max_t(__int64_t, ffree, 0);
  
-       spin_unlock(&mp->m_sb_lock);
  
         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1382,6 +1372,51 @@ xfs_finish_flags(
         return 0;
  }
  
+static int
+xfs_init_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       int             error;
+
+       error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
+       if (error)
+               return -ENOMEM;
+
+       error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
+       if (error)
+               goto free_icount;
+
+       error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+       if (error)
+               goto free_ifree;
+
+       return 0;
+
+free_ifree:
+       percpu_counter_destroy(&mp->m_ifree);
+free_icount:
+       percpu_counter_destroy(&mp->m_icount);
+       return -ENOMEM;
+}
+
+void
+xfs_reinit_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
+       percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
+       percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+}
+
+static void
+xfs_destroy_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_destroy(&mp->m_icount);
+       percpu_counter_destroy(&mp->m_ifree);
+       percpu_counter_destroy(&mp->m_fdblocks);
+}
+
  STATIC int
  xfs_fs_fill_super(
         struct super_block      *sb,
@@ -1430,7 +1465,7 @@ xfs_fs_fill_super(
         if (error)
                 goto out_close_devices;
  
-       error = xfs_icsb_init_counters(mp);
+       error = xfs_init_percpu_counters(mp);
         if (error)
                 goto out_destroy_workqueues;
  
@@ -1488,7 +1523,7 @@ xfs_fs_fill_super(
   out_free_sb:
         xfs_freesb(mp);
   out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
+       xfs_destroy_percpu_counters(mp);
  out_destroy_workqueues:
         xfs_destroy_mount_workqueues(mp);
   out_close_devices:
@@ -1505,6 +1540,24 @@ out_destroy_workqueues:
         goto out_free_sb;
  }
  
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_notice(mp, "Unmounting Filesystem");
+       xfs_filestream_unmount(mp);
+       xfs_unmountfs(mp);
+
+       xfs_freesb(mp);
+       xfs_destroy_percpu_counters(mp);
+       xfs_destroy_mount_workqueues(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
  STATIC struct dentry *
  xfs_fs_mount(
         struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h

index 2b830c2f322e36ec4ce58753c4db96c2b6842a58..499058fea303a81fa3d77f15123a83a2bf6f28b8 100644 (file)
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
  extern const struct xattr_handler *xfs_xattr_handlers[];
  extern const struct quotactl_ops xfs_quotactl_operations;
  
+extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+
  #define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
  
  #endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 7e45fa155ea821fd43ba6fde78d8547a7ded60c8..b2a45cc9eceb1ec73d01d76d13cde12def37f9e7 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
                 __entry->refcount = refcount;
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+       TP_printk("dev %d:%d agno %u refcount %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->agno,
                   __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
+                 "lock %d error %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
                 __entry->lock_flags = lock_flags;
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -686,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
  
+DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+
  DECLARE_EVENT_CLASS(xfs_iref_class,
         TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
         TP_ARGS(ip, caller_ip),
@@ -703,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
                 __entry->pincount = atomic_read(&ip->i_pincount);
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __entry->count,
@@ -1334,7 +1337,7 @@ TRACE_EVENT(xfs_bunmap,
                 __entry->flags = flags;
         ),
         TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
+                 "flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __entry->size,
@@ -1467,7 +1470,7 @@ TRACE_EVENT(xfs_agf,
         ),
         TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
                   "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
+                 "freeblks %u longest %u caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->agno,
                   __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index eb90cd59a0ec6298ec2bb98cae11fd2d4af57bc5..220ef2c906b2e2da7fcdfc0278a6256c59943691 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
         uint                    rtextents)
  {
         int             error = 0;
-       int             rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
  
         /* Mark this thread as being in a transaction */
         current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
          * fail if the count would go below zero.
          */
         if (blocks > 0) {
-               error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                         -((int64_t)blocks), rsvd);
+               error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                 if (error != 0) {
                         current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
                         return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
          * fail if the count would go below zero.
          */
         if (rtextents > 0) {
-               error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)rtextents), rsvd);
+               error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
                 if (error) {
                         error = -ENOSPC;
                         goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
  
  undo_blocks:
         if (blocks > 0) {
-               xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                        (int64_t)blocks, rsvd);
+               xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                 tp->t_blk_res = 0;
         }
  
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
                                   sizeof(sbp->sb_frextents) - 1);
  }
  
+STATIC int
+xfs_sb_mod8(
+       uint8_t                 *field,
+       int8_t                  delta)
+{
+       int8_t                  counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod32(
+       uint32_t                *field,
+       int32_t                 delta)
+{
+       int32_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod64(
+       uint64_t                *field,
+       int64_t                 delta)
+{
+       int64_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
  /*
   * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
   * and apply superblock counter changes to the in-core superblock.  The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
   * applied to the in-core superblock.  The idea is that that has already been
   * done.
   *
- * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
- * However, we have to ensure that we only modify each superblock field only
- * once because the application of the delta values may not be atomic. That can
- * lead to ENOSPC races occurring if we have two separate modifcations of the
- * free space counter to put back the entire reservation and then take away
- * what we used.
- *
   * If we are not logging superblock counters, then the inode allocated/free and
   * used block counts are not updated in the on disk superblock. In this case,
   * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
   */
  void
  xfs_trans_unreserve_and_mod_sb(
-       xfs_trans_t     *tp)
+       struct xfs_trans        *tp)
  {
-       xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
-       xfs_mod_sb_t    *msbp;
-       xfs_mount_t     *mp = tp->t_mountp;
-       /* REFERENCED */
-       int             error;
-       int             rsvd;
-       int64_t         blkdelta = 0;
-       int64_t         rtxdelta = 0;
-       int64_t         idelta = 0;
-       int64_t         ifreedelta = 0;
-
-       msbp = msb;
-       rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       struct xfs_mount        *mp = tp->t_mountp;
+       bool                    rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       int64_t                 blkdelta = 0;
+       int64_t                 rtxdelta = 0;
+       int64_t                 idelta = 0;
+       int64_t                 ifreedelta = 0;
+       int                     error;
  
         /* calculate deltas */
         if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
  
         /* apply the per-cpu counters */
         if (blkdelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                blkdelta, rsvd);
+               error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
                 if (error)
                         goto out;
         }
  
         if (idelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
-                                                idelta, rsvd);
+               error = xfs_mod_icount(mp, idelta);
                 if (error)
                         goto out_undo_fdblocks;
         }
  
         if (ifreedelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
-                                                ifreedelta, rsvd);
+               error = xfs_mod_ifree(mp, ifreedelta);
                 if (error)
                         goto out_undo_icount;
         }
  
+       if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+               return;
+
         /* apply remaining deltas */
-       if (rtxdelta != 0) {
-               msbp->msb_field = XFS_SBS_FREXTENTS;
-               msbp->msb_delta = rtxdelta;
-               msbp++;
+       spin_lock(&mp->m_sb_lock);
+       if (rtxdelta) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
+               if (error)
+                       goto out_undo_ifree;
         }
  
-       if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
-               if (tp->t_dblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_DBLOCKS;
-                       msbp->msb_delta = tp->t_dblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_agcount_delta != 0) {
-                       msbp->msb_field = XFS_SBS_AGCOUNT;
-                       msbp->msb_delta = tp->t_agcount_delta;
-                       msbp++;
-               }
-               if (tp->t_imaxpct_delta != 0) {
-                       msbp->msb_field = XFS_SBS_IMAX_PCT;
-                       msbp->msb_delta = tp->t_imaxpct_delta;
-                       msbp++;
-               }
-               if (tp->t_rextsize_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSIZE;
-                       msbp->msb_delta = tp->t_rextsize_delta;
-                       msbp++;
-               }
-               if (tp->t_rbmblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBMBLOCKS;
-                       msbp->msb_delta = tp->t_rbmblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBLOCKS;
-                       msbp->msb_delta = tp->t_rblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rextents_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTENTS;
-                       msbp->msb_delta = tp->t_rextents_delta;
-                       msbp++;
-               }
-               if (tp->t_rextslog_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSLOG;
-                       msbp->msb_delta = tp->t_rextslog_delta;
-                       msbp++;
-               }
+       if (tp->t_dblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
+               if (error)
+                       goto out_undo_frextents;
         }
-
-       /*
-        * If we need to change anything, do it.
-        */
-       if (msbp > msb) {
-               error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
-                       (uint)(msbp - msb), rsvd);
+       if (tp->t_agcount_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
                 if (error)
-                       goto out_undo_ifreecount;
+                       goto out_undo_dblocks;
         }
-
+       if (tp->t_imaxpct_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+               if (error)
+                       goto out_undo_agcount;
+       }
+       if (tp->t_rextsize_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+                                    tp->t_rextsize_delta);
+               if (error)
+                       goto out_undo_imaxpct;
+       }
+       if (tp->t_rbmblocks_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+                                    tp->t_rbmblocks_delta);
+               if (error)
+                       goto out_undo_rextsize;
+       }
+       if (tp->t_rblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+               if (error)
+                       goto out_undo_rbmblocks;
+       }
+       if (tp->t_rextents_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+                                    tp->t_rextents_delta);
+               if (error)
+                       goto out_undo_rblocks;
+       }
+       if (tp->t_rextslog_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+                                    tp->t_rextslog_delta);
+               if (error)
+                       goto out_undo_rextents;
+       }
+       spin_unlock(&mp->m_sb_lock);
         return;
  
-out_undo_ifreecount:
+out_undo_rextents:
+       if (tp->t_rextents_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+out_undo_rblocks:
+       if (tp->t_rblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+out_undo_rbmblocks:
+       if (tp->t_rbmblocks_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+out_undo_rextsize:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+out_undo_imaxpct:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+out_undo_agcount:
+       if (tp->t_agcount_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+out_undo_dblocks:
+       if (tp->t_dblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+out_undo_frextents:
+       if (rtxdelta)
+               xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+out_undo_ifree:
+       spin_unlock(&mp->m_sb_lock);
         if (ifreedelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+               xfs_mod_ifree(mp, -ifreedelta);
  out_undo_icount:
         if (idelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+               xfs_mod_icount(mp, -idelta);
  out_undo_fdblocks:
         if (blkdelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+               xfs_mod_fdblocks(mp, -blkdelta, rsvd);
  out:
         ASSERT(error == 0);
         return;
author	Dave Chinner <david@fromorbit.com>
	Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)
committer	Dave Chinner <david@fromorbit.com>
	Wed, 25 Mar 2015 04:12:53 +0000 (15:12 +1100)
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_da_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_format.h		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_error.c		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_filestream.c		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_iops.c		patch \| blob \| history
fs/xfs/xfs_linux.h		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_mru_cache.c		patch \| blob \| history
fs/xfs/xfs_pnfs.c		patch \| blob \| history
fs/xfs/xfs_qm.c		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_super.h		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history