Merge remote-tracking branch 'hid/for-next'

[karo-tx-linux.git] / fs / xfs / xfs_bmap_util.c
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 541d59f5e65822270fa225c93aa94bbf9a687b06..5887e41c0323ae85f867cc9bc83bbdf8c1e41cd1 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -18,31 +18,31 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
  #include "xfs_sb.h"
  #include "xfs_ag.h"
  #include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
+#include "xfs_da_format.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
+#include "xfs_trans.h"
  #include "xfs_extfree_item.h"
  #include "xfs_alloc.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
  #include "xfs_rtalloc.h"
  #include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
+#include "xfs_log.h"
+#include "xfs_dinode.h"
  
  /* Kernel only BMAP related definitions and functions */
  
@@ -612,13 +612,9 @@ xfs_getbmap(
  
         if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
                 return XFS_ERROR(ENOMEM);
-       out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-       if (!out) {
-               out = kmem_zalloc_large(bmv->bmv_count *
-                                       sizeof(struct getbmapx));
-               if (!out)
-                       return XFS_ERROR(ENOMEM);
-       }
+       out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+       if (!out)
+               return XFS_ERROR(ENOMEM);
  
         xfs_ilock(ip, XFS_IOLOCK_SHARED);
         if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -754,10 +750,7 @@ xfs_getbmap(
                         break;
         }
  
-       if (is_vmalloc_addr(out))
-               kmem_free_large(out);
-       else
-               kmem_free(out);
+       kmem_free(out);
         return error;
  }
  
@@ -972,32 +965,12 @@ xfs_free_eofblocks(
         return error;
  }
  
-/*
- * xfs_alloc_file_space()
- *      This routine allocates disk space for the given file.
- *
- *     If alloc_type == 0, this request is for an ALLOCSP type
- *     request which will change the file size.  In this case, no
- *     DMAPI event will be generated by the call.  A TRUNCATE event
- *     will be generated later by xfs_setattr.
- *
- *     If alloc_type != 0, this request is for a RESVSP type
- *     request, and a DMAPI DM_EVENT_WRITE will be generated if the
- *     lower block boundary byte address is less than the file's
- *     length.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-STATIC int
+int
  xfs_alloc_file_space(
-       xfs_inode_t             *ip,
+       struct xfs_inode        *ip,
         xfs_off_t               offset,
         xfs_off_t               len,
-       int                     alloc_type,
-       int                     attr_flags)
+       int                     alloc_type)
  {
         xfs_mount_t             *mp = ip->i_mount;
         xfs_off_t               count;
@@ -1239,24 +1212,11 @@ xfs_zero_remaining_bytes(
         return error;
  }
  
-/*
- * xfs_free_file_space()
- *      This routine frees disk space for the given file.
- *
- *     This routine is only called by xfs_change_file_space
- *     for an UNRESVSP type call.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-STATIC int
+int
  xfs_free_file_space(
-       xfs_inode_t             *ip,
+       struct xfs_inode        *ip,
         xfs_off_t               offset,
-       xfs_off_t               len,
-       int                     attr_flags)
+       xfs_off_t               len)
  {
         int                     committed;
         int                     done;
@@ -1274,7 +1234,6 @@ xfs_free_file_space(
         int                     rt;
         xfs_fileoff_t           startoffset_fsb;
         xfs_trans_t             *tp;
-       int                     need_iolock = 1;
  
         mp = ip->i_mount;
  
@@ -1291,20 +1250,15 @@ xfs_free_file_space(
         startoffset_fsb = XFS_B_TO_FSB(mp, offset);
         endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
  
-       if (attr_flags & XFS_ATTR_NOLOCK)
-               need_iolock = 0;
-       if (need_iolock) {
-               xfs_ilock(ip, XFS_IOLOCK_EXCL);
-               /* wait for the completion of any pending DIOs */
-               inode_dio_wait(VFS_I(ip));
-       }
+       /* wait for the completion of any pending DIOs */
+       inode_dio_wait(VFS_I(ip));
  
         rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
         ioffset = offset & ~(rounding - 1);
         error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                               ioffset, -1);
         if (error)
-               goto out_unlock_iolock;
+               goto out;
         truncate_pagecache_range(VFS_I(ip), ioffset, -1);
  
         /*
@@ -1318,7 +1272,7 @@ xfs_free_file_space(
                 error = xfs_bmapi_read(ip, startoffset_fsb, 1,
                                         &imap, &nimap, 0);
                 if (error)
-                       goto out_unlock_iolock;
+                       goto out;
                 ASSERT(nimap == 0 || nimap == 1);
                 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
                         xfs_daddr_t     block;
@@ -1333,7 +1287,7 @@ xfs_free_file_space(
                 error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
                                         &imap, &nimap, 0);
                 if (error)
-                       goto out_unlock_iolock;
+                       goto out;
                 ASSERT(nimap == 0 || nimap == 1);
                 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
                         ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
@@ -1419,27 +1373,23 @@ xfs_free_file_space(
                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
         }
  
- out_unlock_iolock:
-       if (need_iolock)
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ out:
         return error;
  
   error0:
         xfs_bmap_cancel(&free_list);
   error1:
         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-       xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
-                   XFS_ILOCK_EXCL);
-       return error;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       goto out;
  }
  
  
-STATIC int
+int
  xfs_zero_file_space(
         struct xfs_inode        *ip,
         xfs_off_t               offset,
-       xfs_off_t               len,
-       int                     attr_flags)
+       xfs_off_t               len)
  {
         struct xfs_mount        *mp = ip->i_mount;
         uint                    granularity;
@@ -1460,9 +1410,6 @@ xfs_zero_file_space(
         ASSERT(start_boundary >= offset);
         ASSERT(end_boundary <= offset + len);
  
-       if (!(attr_flags & XFS_ATTR_NOLOCK))
-               xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
         if (start_boundary < end_boundary - 1) {
                 /* punch out the page cache over the conversion range */
                 truncate_pagecache_range(VFS_I(ip), start_boundary,
@@ -1470,16 +1417,16 @@ xfs_zero_file_space(
                 /* convert the blocks */
                 error = xfs_alloc_file_space(ip, start_boundary,
                                         end_boundary - start_boundary - 1,
-                                       XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
-                                       attr_flags);
+                                       XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
                 if (error)
-                       goto out_unlock;
+                       goto out;
  
                 /* We've handled the interior of the range, now for the edges */
-               if (start_boundary != offset)
+               if (start_boundary != offset) {
                         error = xfs_iozero(ip, offset, start_boundary - offset);
-               if (error)
-                       goto out_unlock;
+                       if (error)
+                               goto out;
+               }
  
                 if (end_boundary != offset + len)
                         error = xfs_iozero(ip, end_boundary,
@@ -1493,196 +1440,11 @@ xfs_zero_file_space(
                 error = xfs_iozero(ip, offset, len);
         }
  
-out_unlock:
-       if (!(attr_flags & XFS_ATTR_NOLOCK))
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+out:
         return error;
  
  }
  
-/*
- * xfs_change_file_space()
- *      This routine allocates or frees disk space for the given file.
- *      The user specified parameters are checked for alignment and size
- *      limitations.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-int
-xfs_change_file_space(
-       xfs_inode_t     *ip,
-       int             cmd,
-       xfs_flock64_t   *bf,
-       xfs_off_t       offset,
-       int             attr_flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       int             clrprealloc;
-       int             error;
-       xfs_fsize_t     fsize;
-       int             setprealloc;
-       xfs_off_t       startoffset;
-       xfs_trans_t     *tp;
-       struct iattr    iattr;
-
-       if (!S_ISREG(ip->i_d.di_mode))
-               return XFS_ERROR(EINVAL);
-
-       switch (bf->l_whence) {
-       case 0: /*SEEK_SET*/
-               break;
-       case 1: /*SEEK_CUR*/
-               bf->l_start += offset;
-               break;
-       case 2: /*SEEK_END*/
-               bf->l_start += XFS_ISIZE(ip);
-               break;
-       default:
-               return XFS_ERROR(EINVAL);
-       }
-
-       /*
-        * length of <= 0 for resv/unresv/zero is invalid.  length for
-        * alloc/free is ignored completely and we have no idea what userspace
-        * might have set it to, so set it to zero to allow range
-        * checks to pass.
-        */
-       switch (cmd) {
-       case XFS_IOC_ZERO_RANGE:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_UNRESVSP64:
-               if (bf->l_len <= 0)
-                       return XFS_ERROR(EINVAL);
-               break;
-       default:
-               bf->l_len = 0;
-               break;
-       }
-
-       if (bf->l_start < 0 ||
-           bf->l_start > mp->m_super->s_maxbytes ||
-           bf->l_start + bf->l_len < 0 ||
-           bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
-               return XFS_ERROR(EINVAL);
-
-       bf->l_whence = 0;
-
-       startoffset = bf->l_start;
-       fsize = XFS_ISIZE(ip);
-
-       setprealloc = clrprealloc = 0;
-       switch (cmd) {
-       case XFS_IOC_ZERO_RANGE:
-               error = xfs_zero_file_space(ip, startoffset, bf->l_len,
-                                               attr_flags);
-               if (error)
-                       return error;
-               setprealloc = 1;
-               break;
-
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_RESVSP64:
-               error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
-                                               XFS_BMAPI_PREALLOC, attr_flags);
-               if (error)
-                       return error;
-               setprealloc = 1;
-               break;
-
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_UNRESVSP64:
-               if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
-                                                               attr_flags)))
-                       return error;
-               break;
-
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_FREESP64:
-               /*
-                * These operations actually do IO when extending the file, but
-                * the allocation is done seperately to the zeroing that is
-                * done. This set of operations need to be serialised against
-                * other IO operations, such as truncate and buffered IO. We
-                * need to take the IOLOCK here to serialise the allocation and
-                * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
-                * truncate, direct IO) from racing against the transient
-                * allocated but not written state we can have here.
-                */
-               xfs_ilock(ip, XFS_IOLOCK_EXCL);
-               if (startoffset > fsize) {
-                       error = xfs_alloc_file_space(ip, fsize,
-                                       startoffset - fsize, 0,
-                                       attr_flags | XFS_ATTR_NOLOCK);
-                       if (error) {
-                               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                               break;
-                       }
-               }
-
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = startoffset;
-
-               error = xfs_setattr_size(ip, &iattr,
-                                        attr_flags | XFS_ATTR_NOLOCK);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-
-               if (error)
-                       return error;
-
-               clrprealloc = 1;
-               break;
-
-       default:
-               ASSERT(0);
-               return XFS_ERROR(EINVAL);
-       }
-
-       /*
-        * update the inode timestamp, mode, and prealloc flag bits
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
-       if ((attr_flags & XFS_ATTR_DMI) == 0) {
-               ip->i_d.di_mode &= ~S_ISUID;
-
-               /*
-                * Note that we don't have to worry about mandatory
-                * file locking being disabled here because we only
-                * clear the S_ISGID bit if the Group execute bit is
-                * on, but if it was on then mandatory locking wouldn't
-                * have been enabled.
-                */
-               if (ip->i_d.di_mode & S_IXGRP)
-                       ip->i_d.di_mode &= ~S_ISGID;
-
-               xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       }
-       if (setprealloc)
-               ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
-       else if (clrprealloc)
-               ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       if (attr_flags & XFS_ATTR_SYNC)
-               xfs_trans_set_sync(tp);
-       return xfs_trans_commit(tp, 0);
-}
-
  /*
   * We need to check that the format of the data fork in the temporary inode is
   * valid for the target inode before doing the swap. This is not a problem with
@@ -1789,14 +1551,6 @@ xfs_swap_extents(
         int             taforkblks = 0;
         __uint64_t      tmp;
  
-       /*
-        * We have no way of updating owner information in the BMBT blocks for
-        * each inode on CRC enabled filesystems, so to avoid corrupting the
-        * this metadata we simply don't allow extent swaps to occur.
-        */
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return XFS_ERROR(EINVAL);
-
         tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
         if (!tempifp) {
                 error = XFS_ERROR(ENOMEM);
@@ -1920,6 +1674,42 @@ xfs_swap_extents(
                         goto out_trans_cancel;
         }
  
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       /*
+        * Before we've swapped the forks, lets set the owners of the forks
+        * appropriately. We have to do this as we are demand paging the btree
+        * buffers, and so the validation done on read will expect the owner
+        * field to be correctly set. Once we change the owners, we can swap the
+        * inode forks.
+        *
+        * Note the trickiness in setting the log flags - we set the owner log
+        * flag on the opposite inode (i.e. the inode we are setting the new
+        * owner to be) because once we swap the forks and log that, log
+        * recovery is going to see the fork as owned by the swapped inode,
+        * not the pre-swapped inodes.
+        */
+       src_log_flags = XFS_ILOG_CORE;
+       target_log_flags = XFS_ILOG_CORE;
+       if (ip->i_d.di_version == 3 &&
+           ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               target_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
+                                             tip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
+       if (tip->i_d.di_version == 3 &&
+           tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               src_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
+                                             ip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
         /*
          * Swap the data forks of the inodes
          */
@@ -1957,7 +1747,6 @@ xfs_swap_extents(
         tip->i_delayed_blks = ip->i_delayed_blks;
         ip->i_delayed_blks = 0;
  
-       src_log_flags = XFS_ILOG_CORE;
         switch (ip->i_d.di_format) {
         case XFS_DINODE_FMT_EXTENTS:
                 /* If the extents fit in the inode, fix the
@@ -1971,11 +1760,12 @@ xfs_swap_extents(
                 src_log_flags |= XFS_ILOG_DEXT;
                 break;
         case XFS_DINODE_FMT_BTREE:
+               ASSERT(ip->i_d.di_version < 3 ||
+                      (src_log_flags & XFS_ILOG_DOWNER));
                 src_log_flags |= XFS_ILOG_DBROOT;
                 break;
         }
  
-       target_log_flags = XFS_ILOG_CORE;
         switch (tip->i_d.di_format) {
         case XFS_DINODE_FMT_EXTENTS:
                 /* If the extents fit in the inode, fix the
@@ -1990,13 +1780,11 @@ xfs_swap_extents(
                 break;
         case XFS_DINODE_FMT_BTREE:
                 target_log_flags |= XFS_ILOG_DBROOT;
+               ASSERT(tip->i_d.di_version < 3 ||
+                      (target_log_flags & XFS_ILOG_DOWNER));
                 break;
         }
  
-
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
         xfs_trans_log_inode(tp, ip,  src_log_flags);
         xfs_trans_log_inode(tp, tip, target_log_flags);