xfs: decouple inode and bmap btree header files

[karo-tx-linux.git] / fs / xfs / xfs_btree.c
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c

index 0903960410a255c171a7b663ffb3ba4af9137074..9adaae4f3e2fd21c647c9e7fa023a120e5012272 100644 (file)
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -17,18 +17,16 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
  #include "xfs_sb.h"
  #include "xfs_ag.h"
  #include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
  #include "xfs_inode.h"
+#include "xfs_trans.h"
  #include "xfs_inode_item.h"
  #include "xfs_buf_item.h"
  #include "xfs_btree.h"
@@ -510,7 +508,7 @@ xfs_btree_ptr_addr(
  }
  
  /*
- * Get a the root block which is stored in the inode.
+ * Get the root block which is stored in the inode.
   *
   * For now this btree implementation assumes the btree root is always
   * stored in the if_broot field of an inode fork.
@@ -855,6 +853,41 @@ xfs_btree_readahead(
         return xfs_btree_readahead_sblock(cur, lr, block);
  }
  
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+
+               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+       } else {
+               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+
+               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+                                       be32_to_cpu(ptr->s));
+       }
+}
+
+/*
+ * Readahead @count btree blocks at the given @ptr location.
+ *
+ * We don't need to care about long or short form btrees here as we have a
+ * method of converting the ptr directly to a daddr available to us.
+ */
+STATIC void
+xfs_btree_readahead_ptr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       xfs_extlen_t            count)
+{
+       xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
+                         xfs_btree_ptr_to_daddr(cur, ptr),
+                         cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+}
+
  /*
   * Set the buffer for level "lev" in the cursor to bp, releasing
   * any previous buffer.
@@ -978,6 +1011,7 @@ xfs_btree_init_block_int(
                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
                         buf->bb_u.l.bb_pad = 0;
+                       buf->bb_u.l.bb_lsn = 0;
                 }
         } else {
                 /* owner is a 32 bit value on short blocks */
@@ -989,6 +1023,7 @@ xfs_btree_init_block_int(
                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+                       buf->bb_u.s.bb_lsn = 0;
                 }
         }
  }
@@ -1071,24 +1106,6 @@ xfs_btree_buf_to_ptr(
         }
  }
  
-STATIC xfs_daddr_t
-xfs_btree_ptr_to_daddr(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
-
-               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
-       } else {
-               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
-
-               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
-                                       be32_to_cpu(ptr->s));
-       }
-}
-
  STATIC void
  xfs_btree_set_refs(
         struct xfs_btree_cur    *cur,
@@ -1684,7 +1701,7 @@ xfs_lookup_get_search_key(
  
  /*
   * Lookup the record.  The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
+ * stat is set to 0 if can't find any such record, 1 for success.
   */
  int                                    /* error */
  xfs_btree_lookup(
@@ -2756,7 +2773,6 @@ xfs_btree_make_block_unfull(
  
                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
                         /* A root block that can be made bigger. */
-
                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
                 } else {
                         /* A root block that needs replacing */
@@ -3868,3 +3884,120 @@ xfs_btree_get_rec(
         *stat = 1;
         return 0;
  }
+
+/*
+ * Change the owner of a btree.
+ *
+ * The mechanism we use here is ordered buffer logging. Because we don't know
+ * how many buffers were are going to need to modify, we don't really want to
+ * have to make transaction reservations for the worst case of every buffer in a
+ * full size btree as that may be more space that we can fit in the log....
+ *
+ * We do the btree walk in the most optimal manner possible - we have sibling
+ * pointers so we can just walk all the blocks on each level from left to right
+ * in a single pass, and then move to the next level and do the same. We can
+ * also do readahead on the sibling pointers to get IO moving more quickly,
+ * though for slow disks this is unlikely to make much difference to performance
+ * as the amount of CPU work we have to do before moving to the next block is
+ * relatively small.
+ *
+ * For each btree block that we load, modify the owner appropriately, set the
+ * buffer as an ordered buffer and log it appropriately. We need to ensure that
+ * we mark the region we change dirty so that if the buffer is relogged in
+ * a subsequent transaction the changes we make here as an ordered buffer are
+ * correctly relogged in that transaction.  If we are in recovery context, then
+ * just queue the modified buffer as delayed write buffer so the transaction
+ * recovery completion writes the changes to disk.
+ */
+static int
+xfs_btree_block_change_owner(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       union xfs_btree_ptr     rptr;
+
+       /* do right sibling readahead */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+       /* modify the owner */
+       block = xfs_btree_get_block(cur, level, &bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
+       else
+               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+
+       /*
+        * If the block is a root block hosted in an inode, we might not have a
+        * buffer pointer here and we shouldn't attempt to log the change as the
+        * information is already held in the inode and discarded when the root
+        * block is formatted into the on-disk inode fork. We still change it,
+        * though, so everything is consistent in memory.
+        */
+       if (bp) {
+               if (cur->bc_tp) {
+                       xfs_trans_ordered_buf(cur->bc_tp, bp);
+                       xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
+               } else {
+                       xfs_buf_delwri_queue(bp, buffer_list);
+               }
+       } else {
+               ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+               ASSERT(level == cur->bc_nlevels - 1);
+       }
+
+       /* now read rh sibling block for next iteration */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               return ENOENT;
+
+       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+}
+
+int
+xfs_btree_change_owner(
+       struct xfs_btree_cur    *cur,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       union xfs_btree_ptr     lptr;
+       int                     level;
+       struct xfs_btree_block  *block = NULL;
+       int                     error = 0;
+
+       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+
+       /* for each level */
+       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
+               /* grab the left hand block */
+               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
+               if (error)
+                       return error;
+
+               /* readahead the left most block for the next level down */
+               if (level > 0) {
+                       union xfs_btree_ptr     *ptr;
+
+                       ptr = xfs_btree_ptr_addr(cur, 1, block);
+                       xfs_btree_readahead_ptr(cur, ptr, 1);
+
+                       /* save for the next iteration of the loop */
+                       lptr = *ptr;
+               }
+
+               /* for each buffer in the level */
+               do {
+                       error = xfs_btree_block_change_owner(cur, level,
+                                                            new_owner,
+                                                            buffer_list);
+               } while (!error);
+
+               if (error != ENOENT)
+                       return error;
+       }
+
+       return 0;
+}