Merge tag 'xfs-for-linus-v3.12-rc1-2' of git://oss.sgi.com/xfs/xfs

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

index 4a7286c1dc80d270af40a3733870bb9dd769ee82..a02cfb9e3bcea43d49a033f313387fa217aa789c 100644 (file)
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -27,8 +27,6 @@
  
  /*
   * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
   */
  void *
  kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
@@ -36,7 +34,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
         void            *ptr;
         size_t          kmsize = maxsize;
  
-       while (!(ptr = kmem_zalloc_large(kmsize))) {
+       while (!(ptr = vzalloc(kmsize))) {
                 if ((kmsize >>= 1) <= minsize)
                         kmsize = minsize;
         }
@@ -75,6 +73,17 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
         return ptr;
  }
  
+void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+       void    *ptr;
+
+       ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+       if (ptr)
+               return ptr;
+       return vzalloc(size);
+}
+
  void
  kmem_free(const void *ptr)
  {
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h

index b2f2620f9a87b9f1bf6836c8faf3d5039e7af94f..3a7371cab508a7ffea0fc9441d5319026ce089e2 100644 (file)
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -57,17 +57,10 @@ kmem_flags_convert(xfs_km_flags_t flags)
  
  extern void *kmem_alloc(size_t, xfs_km_flags_t);
  extern void *kmem_zalloc(size_t, xfs_km_flags_t);
+extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
  extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
  extern void  kmem_free(const void *);
  
-static inline void *kmem_zalloc_large(size_t size)
-{
-       return vzalloc(size);
-}
-static inline void kmem_free_large(void *ptr)
-{
-       vfree(ptr);
-}
  
  extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
  
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c

index 69518960b2ba17e888d71f75d30e4df57d0cca73..0e2f37efedd0547a05b5bec4c0109db83192bb72 100644 (file)
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
          * go out to the disk.
          */
         len = XFS_ACL_MAX_SIZE(ip->i_mount);
-       xfs_acl = kzalloc(len, GFP_KERNEL);
+       xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
         if (!xfs_acl)
                 return ERR_PTR(-ENOMEM);
  
@@ -175,10 +175,10 @@ xfs_get_acl(struct inode *inode, int type)
         if (IS_ERR(acl))
                 goto out;
  
- out_update_cache:
+out_update_cache:
         set_cached_acl(inode, type, acl);
- out:
-       kfree(xfs_acl);
+out:
+       kmem_free(xfs_acl);
         return acl;
  }
  
@@ -209,7 +209,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 struct xfs_acl *xfs_acl;
                 int len = XFS_ACL_MAX_SIZE(ip->i_mount);
  
-               xfs_acl = kzalloc(len, GFP_KERNEL);
+               xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
                 if (!xfs_acl)
                         return -ENOMEM;
  
@@ -222,7 +222,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
                                 len, ATTR_ROOT);
  
-               kfree(xfs_acl);
+               kmem_free(xfs_acl);
         } else {
                 /*
                  * A NULL ACL argument means we want to remove the ACL.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index 92b830901d60bcf2b662315bb1625b2944964ddf..f47e65c30be6ddde5caa276d3262540d603d07dc 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4450,7 +4450,7 @@ xfs_bmapi_write(
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_ifork        *ifp;
-       struct xfs_bmalloca     bma = { 0 };    /* args for xfs_bmap_alloc */
+       struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
         xfs_fileoff_t           end;            /* end of mapped file region */
         int                     eof;            /* after the end of extents */
         int                     error;          /* error return */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c

index cf3bc76710c3de6e021b37ccc275894458f8c931..bb8de8e399c4b351effa08e6669e000438751d37 100644 (file)
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -925,3 +925,47 @@ xfs_bmdr_maxrecs(
                 return blocklen / sizeof(xfs_bmdr_rec_t);
         return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
  }
+
+/*
+ * Change the owner of a btree format fork fo the inode passed in. Change it to
+ * the owner of that is passed in so that we can change owners before or after
+ * we switch forks between inodes. The operation that the caller is doing will
+ * determine whether is needs to change owner before or after the switch.
+ *
+ * For demand paged transactional modification, the fork switch should be done
+ * after reading in all the blocks, modifying them and pinning them in the
+ * transaction. For modification when the buffers are already pinned in memory,
+ * the fork switch can be done before changing the owner as we won't need to
+ * validate the owner until the btree buffers are unpinned and writes can occur
+ * again.
+ *
+ * For recovery based ownership change, there is no transactional context and
+ * so a buffer list must be supplied so that we can record the buffers that we
+ * modified for the caller to issue IO on.
+ */
+int
+xfs_bmbt_change_owner(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_ino_t               new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       ASSERT(tp || buffer_list);
+       ASSERT(!(tp && buffer_list));
+       if (whichfork == XFS_DATA_FORK)
+               ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
+       else
+               ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
+
+       cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
+       if (!cur)
+               return ENOMEM;
+
+       error = xfs_btree_change_owner(cur, new_owner, buffer_list);
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       return error;
+}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h

index 1b726d6269412d8a9685a6db71c33bf9da9558ae..e367461a638e5b65ffdedc77bed6121dc1150161 100644 (file)
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -236,6 +236,10 @@ extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
  extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
  extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
  
+extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
+                                int whichfork, xfs_ino_t new_owner,
+                                struct list_head *buffer_list);
+
  extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
                 struct xfs_trans *, struct xfs_inode *, int);
  
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 541d59f5e65822270fa225c93aa94bbf9a687b06..97f952caea74bd8311a3269ca7520e87b01fcb18 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -612,13 +612,9 @@ xfs_getbmap(
  
         if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
                 return XFS_ERROR(ENOMEM);
-       out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-       if (!out) {
-               out = kmem_zalloc_large(bmv->bmv_count *
-                                       sizeof(struct getbmapx));
-               if (!out)
-                       return XFS_ERROR(ENOMEM);
-       }
+       out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+       if (!out)
+               return XFS_ERROR(ENOMEM);
  
         xfs_ilock(ip, XFS_IOLOCK_SHARED);
         if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -754,10 +750,7 @@ xfs_getbmap(
                         break;
         }
  
-       if (is_vmalloc_addr(out))
-               kmem_free_large(out);
-       else
-               kmem_free(out);
+       kmem_free(out);
         return error;
  }
  
@@ -1789,14 +1782,6 @@ xfs_swap_extents(
         int             taforkblks = 0;
         __uint64_t      tmp;
  
-       /*
-        * We have no way of updating owner information in the BMBT blocks for
-        * each inode on CRC enabled filesystems, so to avoid corrupting the
-        * this metadata we simply don't allow extent swaps to occur.
-        */
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return XFS_ERROR(EINVAL);
-
         tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
         if (!tempifp) {
                 error = XFS_ERROR(ENOMEM);
@@ -1920,6 +1905,42 @@ xfs_swap_extents(
                         goto out_trans_cancel;
         }
  
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       /*
+        * Before we've swapped the forks, lets set the owners of the forks
+        * appropriately. We have to do this as we are demand paging the btree
+        * buffers, and so the validation done on read will expect the owner
+        * field to be correctly set. Once we change the owners, we can swap the
+        * inode forks.
+        *
+        * Note the trickiness in setting the log flags - we set the owner log
+        * flag on the opposite inode (i.e. the inode we are setting the new
+        * owner to be) because once we swap the forks and log that, log
+        * recovery is going to see the fork as owned by the swapped inode,
+        * not the pre-swapped inodes.
+        */
+       src_log_flags = XFS_ILOG_CORE;
+       target_log_flags = XFS_ILOG_CORE;
+       if (ip->i_d.di_version == 3 &&
+           ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               target_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
+                                             tip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
+       if (tip->i_d.di_version == 3 &&
+           tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               src_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
+                                             ip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
         /*
          * Swap the data forks of the inodes
          */
@@ -1957,7 +1978,6 @@ xfs_swap_extents(
         tip->i_delayed_blks = ip->i_delayed_blks;
         ip->i_delayed_blks = 0;
  
-       src_log_flags = XFS_ILOG_CORE;
         switch (ip->i_d.di_format) {
         case XFS_DINODE_FMT_EXTENTS:
                 /* If the extents fit in the inode, fix the
@@ -1971,11 +1991,12 @@ xfs_swap_extents(
                 src_log_flags |= XFS_ILOG_DEXT;
                 break;
         case XFS_DINODE_FMT_BTREE:
+               ASSERT(ip->i_d.di_version < 3 ||
+                      (src_log_flags & XFS_ILOG_DOWNER));
                 src_log_flags |= XFS_ILOG_DBROOT;
                 break;
         }
  
-       target_log_flags = XFS_ILOG_CORE;
         switch (tip->i_d.di_format) {
         case XFS_DINODE_FMT_EXTENTS:
                 /* If the extents fit in the inode, fix the
@@ -1990,13 +2011,11 @@ xfs_swap_extents(
                 break;
         case XFS_DINODE_FMT_BTREE:
                 target_log_flags |= XFS_ILOG_DBROOT;
+               ASSERT(tip->i_d.di_version < 3 ||
+                      (target_log_flags & XFS_ILOG_DOWNER));
                 break;
         }
  
-
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
         xfs_trans_log_inode(tp, ip,  src_log_flags);
         xfs_trans_log_inode(tp, tip, target_log_flags);
  
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c

index 7a2b4da3c0db9a0f77f19a30cea966848ed60cef..5690e102243d70e7b87876f0ef9bc07be058da86 100644 (file)
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -855,6 +855,41 @@ xfs_btree_readahead(
         return xfs_btree_readahead_sblock(cur, lr, block);
  }
  
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+
+               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+       } else {
+               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+
+               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+                                       be32_to_cpu(ptr->s));
+       }
+}
+
+/*
+ * Readahead @count btree blocks at the given @ptr location.
+ *
+ * We don't need to care about long or short form btrees here as we have a
+ * method of converting the ptr directly to a daddr available to us.
+ */
+STATIC void
+xfs_btree_readahead_ptr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       xfs_extlen_t            count)
+{
+       xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
+                         xfs_btree_ptr_to_daddr(cur, ptr),
+                         cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+}
+
  /*
   * Set the buffer for level "lev" in the cursor to bp, releasing
   * any previous buffer.
@@ -1073,24 +1108,6 @@ xfs_btree_buf_to_ptr(
         }
  }
  
-STATIC xfs_daddr_t
-xfs_btree_ptr_to_daddr(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
-
-               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
-       } else {
-               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
-
-               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
-                                       be32_to_cpu(ptr->s));
-       }
-}
-
  STATIC void
  xfs_btree_set_refs(
         struct xfs_btree_cur    *cur,
@@ -3869,3 +3886,120 @@ xfs_btree_get_rec(
         *stat = 1;
         return 0;
  }
+
+/*
+ * Change the owner of a btree.
+ *
+ * The mechanism we use here is ordered buffer logging. Because we don't know
+ * how many buffers were are going to need to modify, we don't really want to
+ * have to make transaction reservations for the worst case of every buffer in a
+ * full size btree as that may be more space that we can fit in the log....
+ *
+ * We do the btree walk in the most optimal manner possible - we have sibling
+ * pointers so we can just walk all the blocks on each level from left to right
+ * in a single pass, and then move to the next level and do the same. We can
+ * also do readahead on the sibling pointers to get IO moving more quickly,
+ * though for slow disks this is unlikely to make much difference to performance
+ * as the amount of CPU work we have to do before moving to the next block is
+ * relatively small.
+ *
+ * For each btree block that we load, modify the owner appropriately, set the
+ * buffer as an ordered buffer and log it appropriately. We need to ensure that
+ * we mark the region we change dirty so that if the buffer is relogged in
+ * a subsequent transaction the changes we make here as an ordered buffer are
+ * correctly relogged in that transaction.  If we are in recovery context, then
+ * just queue the modified buffer as delayed write buffer so the transaction
+ * recovery completion writes the changes to disk.
+ */
+static int
+xfs_btree_block_change_owner(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       union xfs_btree_ptr     rptr;
+
+       /* do right sibling readahead */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+       /* modify the owner */
+       block = xfs_btree_get_block(cur, level, &bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
+       else
+               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+
+       /*
+        * If the block is a root block hosted in an inode, we might not have a
+        * buffer pointer here and we shouldn't attempt to log the change as the
+        * information is already held in the inode and discarded when the root
+        * block is formatted into the on-disk inode fork. We still change it,
+        * though, so everything is consistent in memory.
+        */
+       if (bp) {
+               if (cur->bc_tp) {
+                       xfs_trans_ordered_buf(cur->bc_tp, bp);
+                       xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
+               } else {
+                       xfs_buf_delwri_queue(bp, buffer_list);
+               }
+       } else {
+               ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+               ASSERT(level == cur->bc_nlevels - 1);
+       }
+
+       /* now read rh sibling block for next iteration */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               return ENOENT;
+
+       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+}
+
+int
+xfs_btree_change_owner(
+       struct xfs_btree_cur    *cur,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       union xfs_btree_ptr     lptr;
+       int                     level;
+       struct xfs_btree_block  *block = NULL;
+       int                     error = 0;
+
+       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+
+       /* for each level */
+       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
+               /* grab the left hand block */
+               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
+               if (error)
+                       return error;
+
+               /* readahead the left most block for the next level down */
+               if (level > 0) {
+                       union xfs_btree_ptr     *ptr;
+
+                       ptr = xfs_btree_ptr_addr(cur, 1, block);
+                       xfs_btree_readahead_ptr(cur, ptr, 1);
+
+                       /* save for the next iteration of the loop */
+                       lptr = *ptr;
+               }
+
+               /* for each buffer in the level */
+               do {
+                       error = xfs_btree_block_change_owner(cur, level,
+                                                            new_owner,
+                                                            buffer_list);
+               } while (!error);
+
+               if (error != ENOENT)
+                       return error;
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h

index c8473c7ef45e4c764fd61eb1bf6419cb1d98f4ea..06729b67ad58ec2c394a3ecdb1104938ced672c2 100644 (file)
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -121,15 +121,18 @@ union xfs_btree_rec {
  /*
   * For logging record fields.
   */
-#define        XFS_BB_MAGIC            0x01
-#define        XFS_BB_LEVEL            0x02
-#define        XFS_BB_NUMRECS          0x04
-#define        XFS_BB_LEFTSIB          0x08
-#define        XFS_BB_RIGHTSIB         0x10
-#define        XFS_BB_BLKNO            0x20
+#define        XFS_BB_MAGIC            (1 << 0)
+#define        XFS_BB_LEVEL            (1 << 1)
+#define        XFS_BB_NUMRECS          (1 << 2)
+#define        XFS_BB_LEFTSIB          (1 << 3)
+#define        XFS_BB_RIGHTSIB         (1 << 4)
+#define        XFS_BB_BLKNO            (1 << 5)
+#define        XFS_BB_LSN              (1 << 6)
+#define        XFS_BB_UUID             (1 << 7)
+#define        XFS_BB_OWNER            (1 << 8)
  #define        XFS_BB_NUM_BITS         5
  #define        XFS_BB_ALL_BITS         ((1 << XFS_BB_NUM_BITS) - 1)
-#define        XFS_BB_NUM_BITS_CRC     8
+#define        XFS_BB_NUM_BITS_CRC     9
  #define        XFS_BB_ALL_BITS_CRC     ((1 << XFS_BB_NUM_BITS_CRC) - 1)
  
  /*
@@ -442,6 +445,8 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
  int xfs_btree_insert(struct xfs_btree_cur *, int *);
  int xfs_btree_delete(struct xfs_btree_cur *, int *);
  int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+                          struct list_head *buffer_list);
  
  /*
   * btree block CRC helpers
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 3a944b198e35a0fbfc758a84fd39a2e0674d360f..88c5ea75ebf66abd175bdf2d71898380f2aca9a8 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -613,13 +613,27 @@ xfs_buf_item_unlock(
                         }
                 }
         }
-       if (clean || aborted) {
-               if (atomic_dec_and_test(&bip->bli_refcount)) {
-                       ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
+
+       /*
+        * Clean buffers, by definition, cannot be in the AIL. However, aborted
+        * buffers may be dirty and hence in the AIL. Therefore if we are
+        * aborting a buffer and we've just taken the last refernce away, we
+        * have to check if it is in the AIL before freeing it. We need to free
+        * it in this case, because an aborted transaction has already shut the
+        * filesystem down and this is the last chance we will have to do so.
+        */
+       if (atomic_dec_and_test(&bip->bli_refcount)) {
+               if (clean)
+                       xfs_buf_item_relse(bp);
+               else if (aborted) {
+                       ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+                       if (lip->li_flags & XFS_LI_IN_AIL) {
+                               xfs_trans_ail_delete(lip->li_ailp, lip,
+                                                    SHUTDOWN_LOG_IO_ERROR);
+                       }
                         xfs_buf_item_relse(bp);
                 }
-       } else
-               atomic_dec(&bip->bli_refcount);
+       }
  
         if (!(flags & XFS_BLI_HOLD))
                 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c

index d4e59a4ff59ff1600cf8c9a83ce4b36f47ddfcd0..069537c845e5cc424ce02bf3cec7838250357a32 100644 (file)
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -635,6 +635,7 @@ xfs_da3_root_split(
         xfs_trans_log_buf(tp, bp, 0, size - 1);
  
         bp->b_ops = blk1->bp->b_ops;
+       xfs_trans_buf_copy_type(bp, blk1->bp);
         blk1->bp = bp;
         blk1->blkno = blkno;
  
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c

index 08984eeee159c5d790bdce648caf4fe8cb4ea676..1021c8356d0836318e300adefc4a35f170d120c3 100644 (file)
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -180,6 +180,11 @@ xfs_dir3_leaf_check_int(
         return true;
  }
  
+/*
+ * We verify the magic numbers before decoding the leaf header so that on debug
+ * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
+ * to incorrect magic numbers.
+ */
  static bool
  xfs_dir3_leaf_verify(
         struct xfs_buf          *bp,
@@ -191,24 +196,25 @@ xfs_dir3_leaf_verify(
  
         ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
  
-       xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
         if (xfs_sb_version_hascrc(&mp->m_sb)) {
                 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+               __uint16_t              magic3;
  
-               if ((magic == XFS_DIR2_LEAF1_MAGIC &&
-                    leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) ||
-                   (magic == XFS_DIR2_LEAFN_MAGIC &&
-                    leafhdr.magic != XFS_DIR3_LEAFN_MAGIC))
-                       return false;
+               magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
+                                                        : XFS_DIR3_LEAFN_MAGIC;
  
+               if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
+                       return false;
                 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
                         return false;
                 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
                         return false;
         } else {
-               if (leafhdr.magic != magic)
+               if (leaf->hdr.info.magic != cpu_to_be16(magic))
                         return false;
         }
+
+       xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
         return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
  }
  
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c

index 60c6e1f126952acc43e1bbe2a1d065f304ed484d..e838d84b4e85697917c8623418c06ae28140a8b7 100644 (file)
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -142,7 +142,8 @@ xfs_qm_dqunpin_wait(
  STATIC uint
  xfs_qm_dquot_logitem_push(
         struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+       struct list_head        *buffer_list) __releases(&lip->li_ailp->xa_lock)
+                                             __acquires(&lip->li_ailp->xa_lock)
  {
         struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
         struct xfs_buf          *bp = NULL;
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c

index 86f559f6e5d3c3f825df5aeffbf6967a7654f055..e43708e2f0806d4daae241ee32e18abcac1a017f 100644 (file)
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -160,7 +160,8 @@ xfs_extent_busy_update_extent(
         struct xfs_extent_busy  *busyp,
         xfs_agblock_t           fbno,
         xfs_extlen_t            flen,
-       bool                    userdata)
+       bool                    userdata) __releases(&pag->pagb_lock)
+                                         __acquires(&pag->pagb_lock)
  {
         xfs_agblock_t           fend = fbno + flen;
         xfs_agblock_t           bbno = busyp->bno;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 73b62a24ceacaccfcd98f9accc10df40f319137e..193206ba43582c0aecbea7afd1d23220bfff554f 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -48,7 +48,7 @@ STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
  /*
   * Allocate and initialise an xfs_inode.
   */
-STATIC struct xfs_inode *
+struct xfs_inode *
  xfs_inode_alloc(
         struct xfs_mount        *mp,
         xfs_ino_t               ino)
@@ -98,7 +98,7 @@ xfs_inode_free_callback(
         kmem_zone_free(xfs_inode_zone, ip);
  }
  
-STATIC void
+void
  xfs_inode_free(
         struct xfs_inode        *ip)
  {
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h

index 456f0144e1b6f3f189e0117cad3989e2c286f50c..9ed68bb750f50871a1ba17a4a67e24a8b4d4829b 100644 (file)
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -42,6 +42,10 @@ struct xfs_eofblocks {
  int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
              uint flags, uint lock_flags, xfs_inode_t **ipp);
  
+/* recovery needs direct inode allocation capability */
+struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
+void xfs_inode_free(struct xfs_inode *ip);
+
  void xfs_reclaim_worker(struct work_struct *work);
  
  int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c

index e011d597f12f6c99250a8e6394cdc48d9b6ef66d..63382d37f5658c8ee774668df7a50a87eeec5834 100644 (file)
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -53,9 +53,8 @@ xfs_inobp_check(
                                         i * mp->m_sb.sb_inodesize);
                 if (!dip->di_next_unlinked)  {
                         xfs_alert(mp,
-       "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
-                               bp);
-                       ASSERT(dip->di_next_unlinked);
+       "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
+                               i, (long long)bp->b_bn);
                 }
         }
  }
@@ -106,11 +105,10 @@ xfs_inode_buf_verify(
                         XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
                                              mp, dip);
  #ifdef DEBUG
-                       xfs_emerg(mp,
+                       xfs_alert(mp,
                                 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
                                 (unsigned long long)bp->b_bn, i,
                                 be16_to_cpu(dip->di_magic));
-                       ASSERT(0);
  #endif
                 }
         }
@@ -196,7 +194,7 @@ xfs_imap_to_bp(
         return 0;
  }
  
-STATIC void
+void
  xfs_dinode_from_disk(
         xfs_icdinode_t          *to,
         xfs_dinode_t            *from)
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h

index 599e6c0ca2a95a75ebccf92374026a8bb15dbbb6..abba0ae8cf2da2b4012445bc2cc5cbf63b8c9a66 100644 (file)
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/xfs_inode_buf.h
@@ -32,17 +32,17 @@ struct xfs_imap {
         ushort          im_boffset;     /* inode offset in block in bytes */
  };
  
-int            xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
-                              struct xfs_imap *, struct xfs_dinode **,
-                              struct xfs_buf **, uint, uint);
-int            xfs_iread(struct xfs_mount *, struct xfs_trans *,
-                         struct xfs_inode *, uint);
-void           xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void           xfs_dinode_to_disk(struct xfs_dinode *,
-                                  struct xfs_icdinode *);
+int    xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+                      struct xfs_imap *, struct xfs_dinode **,
+                      struct xfs_buf **, uint, uint);
+int    xfs_iread(struct xfs_mount *, struct xfs_trans *,
+                 struct xfs_inode *, uint);
+void   xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
+void   xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
+void   xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
  
  #if defined(DEBUG)
-void           xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
+void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
  #else
  #define        xfs_inobp_check(mp, bp)
  #endif /* DEBUG */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index bdebc21078d7e83bac4347ad13a5f569ed4d6a0f..668e8f4ccf5e7201e8e6a360cd26668484f6d515 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -71,7 +71,7 @@ xfs_find_handle(
         int                     hsize;
         xfs_handle_t            handle;
         struct inode            *inode;
-       struct fd               f = {0};
+       struct fd               f = {NULL};
         struct path             path;
         int                     error;
         struct xfs_inode        *ip;
@@ -456,12 +456,9 @@ xfs_attrlist_by_handle(
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(al_hreq.buflen);
-               if (!kbuf)
-                       goto out_dput;
-       }
+       kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+       if (!kbuf)
+               goto out_dput;
  
         cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
         error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -472,12 +469,9 @@ xfs_attrlist_by_handle(
         if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
                 error = -EFAULT;
  
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
- out_dput:
+out_kfree:
+       kmem_free(kbuf);
+out_dput:
         dput(dentry);
         return error;
  }
@@ -495,12 +489,9 @@ xfs_attrmulti_attr_get(
  
         if (*len > XATTR_SIZE_MAX)
                 return EINVAL;
-       kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(*len);
-               if (!kbuf)
-                       return ENOMEM;
-       }
+       kbuf = kmem_zalloc_large(*len, KM_SLEEP);
+       if (!kbuf)
+               return ENOMEM;
  
         error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
         if (error)
@@ -509,11 +500,8 @@ xfs_attrmulti_attr_get(
         if (copy_to_user(ubuf, kbuf, *len))
                 error = EFAULT;
  
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
+out_kfree:
+       kmem_free(kbuf);
         return error;
  }
  
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c

index d3ab9534307fcaa7e863be8965f80311090ce499..f671f7e472ac008511ca4df2068c9d4fb91d169c 100644 (file)
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -371,12 +371,9 @@ xfs_compat_attrlist_by_handle(
                 return PTR_ERR(dentry);
  
         error = -ENOMEM;
-       kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(al_hreq.buflen);
-               if (!kbuf)
-                       goto out_dput;
-       }
+       kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+       if (!kbuf)
+               goto out_dput;
  
         cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
         error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -387,12 +384,9 @@ xfs_compat_attrlist_by_handle(
         if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
                 error = -EFAULT;
  
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
- out_dput:
+out_kfree:
+       kmem_free(kbuf);
+out_dput:
         dput(dentry);
         return error;
  }
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index b93e14b86754a6cf6b12e4e9952e911e10a008ad..084b3e1741fd0346cac1d8279ed8085553ee7486 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -495,7 +495,7 @@ xfs_bulkstat(
         /*
          * Done, we're either out of filesystem or space to put the data.
          */
-       kmem_free_large(irbuf);
+       kmem_free(irbuf);
         *ubcountp = ubelem;
         /*
          * Found some inodes, return them now and return the error next time.
@@ -541,8 +541,9 @@ xfs_bulkstat_single(
          * at the expense of the error case.
          */
  
-       ino = (xfs_ino_t)*lastinop;
-       error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
+       ino = *lastinop;
+       error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
+                                NULL, &res);
         if (error) {
                 /*
                  * Special case way failed, do it the "long" way
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 5372d58ef93a26220f0d916763e7f37de63d0050..a2dea108071ae6e81d0e683a98a4a011b74f23ee 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -257,7 +257,8 @@ xlog_grant_head_wait(
         struct xlog             *log,
         struct xlog_grant_head  *head,
         struct xlog_ticket      *tic,
-       int                     need_bytes)
+       int                     need_bytes) __releases(&head->lock)
+                                           __acquires(&head->lock)
  {
         list_add_tail(&tic->t_queue, &head->waiters);
  
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h

index 31e3a06c4644d22a93c404b2159db0944fd2a5a7..ca7e28a8ed31d996f7e56862e38af43b449235cc 100644 (file)
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/xfs_log_format.h
@@ -474,6 +474,8 @@ typedef struct xfs_inode_log_format_64 {
  #define        XFS_ILOG_ADATA  0x040   /* log i_af.if_data */
  #define        XFS_ILOG_AEXT   0x080   /* log i_af.if_extents */
  #define        XFS_ILOG_ABROOT 0x100   /* log i_af.i_broot */
+#define XFS_ILOG_DOWNER        0x200   /* change the data fork owner on replay */
+#define XFS_ILOG_AOWNER        0x400   /* change the attr fork owner on replay */
  
  
  /*
@@ -487,7 +489,8 @@ typedef struct xfs_inode_log_format_64 {
  #define        XFS_ILOG_NONCORE        (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                  XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
                                  XFS_ILOG_UUID | XFS_ILOG_ADATA | \
-                                XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
+                                XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
+                                XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
  
  #define        XFS_ILOG_DFORK          (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                  XFS_ILOG_DBROOT)
@@ -499,7 +502,8 @@ typedef struct xfs_inode_log_format_64 {
                                  XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
                                  XFS_ILOG_DEV | XFS_ILOG_UUID | \
                                  XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
+                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \
+                                XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
  
  static inline int xfs_ilog_fbroot(int w)
  {
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 7c0c1fdc728b4ff6e18da1a4f0b46edde2337e4e..dabda9521b4becc2ded846307aa062a093a7a3d3 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2014,7 +2014,7 @@ xlog_recover_get_buf_lsn(
         case XFS_ATTR3_RMT_MAGIC:
                 return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
         case XFS_SB_MAGIC:
-               return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
+               return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
         default:
                 break;
         }
@@ -2629,6 +2629,82 @@ out_release:
         return error;
  }
  
+/*
+ * Inode fork owner changes
+ *
+ * If we have been told that we have to reparent the inode fork, it's because an
+ * extent swap operation on a CRC enabled filesystem has been done and we are
+ * replaying it. We need to walk the BMBT of the appropriate fork and change the
+ * owners of it.
+ *
+ * The complexity here is that we don't have an inode context to work with, so
+ * after we've replayed the inode we need to instantiate one.  This is where the
+ * fun begins.
+ *
+ * We are in the middle of log recovery, so we can't run transactions. That
+ * means we cannot use cache coherent inode instantiation via xfs_iget(), as
+ * that will result in the corresponding iput() running the inode through
+ * xfs_inactive(). If we've just replayed an inode core that changes the link
+ * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
+ * transactions (bad!).
+ *
+ * So, to avoid this, we instantiate an inode directly from the inode core we've
+ * just recovered. We have the buffer still locked, and all we really need to
+ * instantiate is the inode core and the forks being modified. We can do this
+ * manually, then run the inode btree owner change, and then tear down the
+ * xfs_inode without having to run any transactions at all.
+ *
+ * Also, because we don't have a transaction context available here but need to
+ * gather all the buffers we modify for writeback so we pass the buffer_list
+ * instead for the operation to use.
+ */
+
+STATIC int
+xfs_recover_inode_owner_change(
+       struct xfs_mount        *mp,
+       struct xfs_dinode       *dip,
+       struct xfs_inode_log_format *in_f,
+       struct list_head        *buffer_list)
+{
+       struct xfs_inode        *ip;
+       int                     error;
+
+       ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
+
+       ip = xfs_inode_alloc(mp, in_f->ilf_ino);
+       if (!ip)
+               return ENOMEM;
+
+       /* instantiate the inode */
+       xfs_dinode_from_disk(&ip->i_d, dip);
+       ASSERT(ip->i_d.di_version >= 3);
+
+       error = xfs_iformat_fork(ip, dip);
+       if (error)
+               goto out_free_ip;
+
+
+       if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
+               ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
+               error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
+                                             ip->i_ino, buffer_list);
+               if (error)
+                       goto out_free_ip;
+       }
+
+       if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
+               ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
+               error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
+                                             ip->i_ino, buffer_list);
+               if (error)
+                       goto out_free_ip;
+       }
+
+out_free_ip:
+       xfs_inode_free(ip);
+       return error;
+}
+
  STATIC int
  xlog_recover_inode_pass2(
         struct xlog                     *log,
@@ -2681,8 +2757,7 @@ xlog_recover_inode_pass2(
         error = bp->b_error;
         if (error) {
                 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
-               xfs_buf_relse(bp);
-               goto error;
+               goto out_release;
         }
         ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
         dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
@@ -2692,30 +2767,31 @@ xlog_recover_inode_pass2(
          * like an inode!
          */
         if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
-               xfs_buf_relse(bp);
                 xfs_alert(mp,
         "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
                         __func__, dip, bp, in_f->ilf_ino);
                 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
                                  XFS_ERRLEVEL_LOW, mp);
                 error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
         }
         dicp = item->ri_buf[1].i_addr;
         if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
-               xfs_buf_relse(bp);
                 xfs_alert(mp,
                         "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
                         __func__, item, in_f->ilf_ino);
                 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
                                  XFS_ERRLEVEL_LOW, mp);
                 error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
         }
  
         /*
          * If the inode has an LSN in it, recover the inode only if it's less
-        * than the lsn of the transaction we are replaying.
+        * than the lsn of the transaction we are replaying. Note: we still
+        * need to replay an owner change even though the inode is more recent
+        * than the transaction as there is no guarantee that all the btree
+        * blocks are more recent than this transaction, too.
          */
         if (dip->di_version >= 3) {
                 xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
@@ -2723,7 +2799,7 @@ xlog_recover_inode_pass2(
                 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
                         trace_xfs_log_recover_inode_skip(log, in_f);
                         error = 0;
-                       goto out_release;
+                       goto out_owner_change;
                 }
         }
  
@@ -2745,10 +2821,9 @@ xlog_recover_inode_pass2(
                     dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
                         /* do nothing */
                 } else {
-                       xfs_buf_relse(bp);
                         trace_xfs_log_recover_inode_skip(log, in_f);
                         error = 0;
-                       goto error;
+                       goto out_release;
                 }
         }
  
@@ -2760,13 +2835,12 @@ xlog_recover_inode_pass2(
                     (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
                         XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
                                          XFS_ERRLEVEL_LOW, mp, dicp);
-                       xfs_buf_relse(bp);
                         xfs_alert(mp,
                 "%s: Bad regular inode log record, rec ptr 0x%p, "
                 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                 __func__, item, dip, bp, in_f->ilf_ino);
                         error = EFSCORRUPTED;
-                       goto error;
+                       goto out_release;
                 }
         } else if (unlikely(S_ISDIR(dicp->di_mode))) {
                 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -2774,19 +2848,17 @@ xlog_recover_inode_pass2(
                     (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
                         XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
                                              XFS_ERRLEVEL_LOW, mp, dicp);
-                       xfs_buf_relse(bp);
                         xfs_alert(mp,
                 "%s: Bad dir inode log record, rec ptr 0x%p, "
                 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                 __func__, item, dip, bp, in_f->ilf_ino);
                         error = EFSCORRUPTED;
-                       goto error;
+                       goto out_release;
                 }
         }
         if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
                                      XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                 xfs_alert(mp,
         "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
         "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
@@ -2794,29 +2866,27 @@ xlog_recover_inode_pass2(
                         dicp->di_nextents + dicp->di_anextents,
                         dicp->di_nblocks);
                 error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
         }
         if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
                                      XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                 xfs_alert(mp,
         "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
         "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
                         item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
                 error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
         }
         isize = xfs_icdinode_size(dicp->di_version);
         if (unlikely(item->ri_buf[1].i_len > isize)) {
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
                                      XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                 xfs_alert(mp,
                         "%s: Bad inode log record length %d, rec ptr 0x%p",
                         __func__, item->ri_buf[1].i_len, item);
                 error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
         }
  
         /* The core is in in-core format */
@@ -2842,7 +2912,7 @@ xlog_recover_inode_pass2(
         }
  
         if (in_f->ilf_size == 2)
-               goto write_inode_buffer;
+               goto out_owner_change;
         len = item->ri_buf[2].i_len;
         src = item->ri_buf[2].i_addr;
         ASSERT(in_f->ilf_size <= 4);
@@ -2903,13 +2973,15 @@ xlog_recover_inode_pass2(
                 default:
                         xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
                         ASSERT(0);
-                       xfs_buf_relse(bp);
                         error = EIO;
-                       goto error;
+                       goto out_release;
                 }
         }
  
-write_inode_buffer:
+out_owner_change:
+       if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
+               error = xfs_recover_inode_owner_change(mp, dip, in_f,
+                                                      buffer_list);
         /* re-generate the checksum. */
         xfs_dinode_calc_crc(log->l_mp, dip);
  
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 2f2a7c005be2d32219fd9c580bb2050f2f4e0050..f622a97a7e3383d287d85a3787c2feecc8a36b3f 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -41,6 +41,7 @@
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
  #include "xfs_symlink.h"
+#include "xfs_buf_item.h"
  
  /* ----- Kernel only functions below ----- */
  STATIC int
@@ -363,6 +364,7 @@ xfs_symlink(
                         pathlen -= byte_cnt;
                         offset += byte_cnt;
  
+                       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
                         xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
                                                         (char *)bp->b_addr);
                 }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Sep 2013 23:13:41 +0000 (16:13 -0700)
fs/xfs/kmem.c		patch \| blob \| history
fs/xfs/kmem.h		patch \| blob \| history
fs/xfs/xfs_acl.c		patch \| blob \| history
fs/xfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/xfs_bmap_btree.c		patch \| blob \| history
fs/xfs/xfs_bmap_btree.h		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_btree.c		patch \| blob \| history
fs/xfs/xfs_btree.h		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_da_btree.c		patch \| blob \| history
fs/xfs/xfs_dir2_leaf.c		patch \| blob \| history
fs/xfs/xfs_dquot_item.c		patch \| blob \| history
fs/xfs/xfs_extent_busy.c		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_icache.h		patch \| blob \| history
fs/xfs/xfs_inode_buf.c		patch \| blob \| history
fs/xfs/xfs_inode_buf.h		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_ioctl32.c		patch \| blob \| history
fs/xfs/xfs_itable.c		patch \| blob \| history
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_log_format.h		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_symlink.c		patch \| blob \| history