]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/xfs/xfs_btree.c
Merge tag 'squashfs-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/pkl...
[karo-tx-linux.git] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_bmap_btree.h"
28 #include "xfs_alloc_btree.h"
29 #include "xfs_ialloc_btree.h"
30 #include "xfs_dinode.h"
31 #include "xfs_inode.h"
32 #include "xfs_inode_item.h"
33 #include "xfs_buf_item.h"
34 #include "xfs_btree.h"
35 #include "xfs_error.h"
36 #include "xfs_trace.h"
37 #include "xfs_cksum.h"
38
39 /*
40  * Cursor allocation zone.
41  */
42 kmem_zone_t     *xfs_btree_cur_zone;
43
44 /*
45  * Btree magic numbers.
46  */
47 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
48         { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
49         { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
50           XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
51 };
52 #define xfs_btree_magic(cur) \
53         xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
54
55
56 STATIC int                              /* error (0 or EFSCORRUPTED) */
57 xfs_btree_check_lblock(
58         struct xfs_btree_cur    *cur,   /* btree cursor */
59         struct xfs_btree_block  *block, /* btree long form block pointer */
60         int                     level,  /* level of the btree block */
61         struct xfs_buf          *bp)    /* buffer for block, if any */
62 {
63         int                     lblock_ok = 1; /* block passes checks */
64         struct xfs_mount        *mp;    /* file system mount point */
65
66         mp = cur->bc_mp;
67
68         if (xfs_sb_version_hascrc(&mp->m_sb)) {
69                 lblock_ok = lblock_ok &&
70                         uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
71                         block->bb_u.l.bb_blkno == cpu_to_be64(
72                                 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
73         }
74
75         lblock_ok = lblock_ok &&
76                 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
77                 be16_to_cpu(block->bb_level) == level &&
78                 be16_to_cpu(block->bb_numrecs) <=
79                         cur->bc_ops->get_maxrecs(cur, level) &&
80                 block->bb_u.l.bb_leftsib &&
81                 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
82                  XFS_FSB_SANITY_CHECK(mp,
83                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
84                 block->bb_u.l.bb_rightsib &&
85                 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
86                  XFS_FSB_SANITY_CHECK(mp,
87                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
88
89         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
90                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
91                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
92                 if (bp)
93                         trace_xfs_btree_corrupt(bp, _RET_IP_);
94                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
95                 return XFS_ERROR(EFSCORRUPTED);
96         }
97         return 0;
98 }
99
100 STATIC int                              /* error (0 or EFSCORRUPTED) */
101 xfs_btree_check_sblock(
102         struct xfs_btree_cur    *cur,   /* btree cursor */
103         struct xfs_btree_block  *block, /* btree short form block pointer */
104         int                     level,  /* level of the btree block */
105         struct xfs_buf          *bp)    /* buffer containing block */
106 {
107         struct xfs_mount        *mp;    /* file system mount point */
108         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
109         struct xfs_agf          *agf;   /* ag. freespace structure */
110         xfs_agblock_t           agflen; /* native ag. freespace length */
111         int                     sblock_ok = 1; /* block passes checks */
112
113         mp = cur->bc_mp;
114         agbp = cur->bc_private.a.agbp;
115         agf = XFS_BUF_TO_AGF(agbp);
116         agflen = be32_to_cpu(agf->agf_length);
117
118         if (xfs_sb_version_hascrc(&mp->m_sb)) {
119                 sblock_ok = sblock_ok &&
120                         uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
121                         block->bb_u.s.bb_blkno == cpu_to_be64(
122                                 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
123         }
124
125         sblock_ok = sblock_ok &&
126                 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
127                 be16_to_cpu(block->bb_level) == level &&
128                 be16_to_cpu(block->bb_numrecs) <=
129                         cur->bc_ops->get_maxrecs(cur, level) &&
130                 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
131                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
132                 block->bb_u.s.bb_leftsib &&
133                 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
134                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
135                 block->bb_u.s.bb_rightsib;
136
137         if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
138                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
139                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
140                 if (bp)
141                         trace_xfs_btree_corrupt(bp, _RET_IP_);
142                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
143                 return XFS_ERROR(EFSCORRUPTED);
144         }
145         return 0;
146 }
147
148 /*
149  * Debug routine: check that block header is ok.
150  */
151 int
152 xfs_btree_check_block(
153         struct xfs_btree_cur    *cur,   /* btree cursor */
154         struct xfs_btree_block  *block, /* generic btree block pointer */
155         int                     level,  /* level of the btree block */
156         struct xfs_buf          *bp)    /* buffer containing block, if any */
157 {
158         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
159                 return xfs_btree_check_lblock(cur, block, level, bp);
160         else
161                 return xfs_btree_check_sblock(cur, block, level, bp);
162 }
163
164 /*
165  * Check that (long) pointer is ok.
166  */
167 int                                     /* error (0 or EFSCORRUPTED) */
168 xfs_btree_check_lptr(
169         struct xfs_btree_cur    *cur,   /* btree cursor */
170         xfs_dfsbno_t            bno,    /* btree block disk address */
171         int                     level)  /* btree block level */
172 {
173         XFS_WANT_CORRUPTED_RETURN(
174                 level > 0 &&
175                 bno != NULLDFSBNO &&
176                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
177         return 0;
178 }
179
180 #ifdef DEBUG
181 /*
182  * Check that (short) pointer is ok.
183  */
184 STATIC int                              /* error (0 or EFSCORRUPTED) */
185 xfs_btree_check_sptr(
186         struct xfs_btree_cur    *cur,   /* btree cursor */
187         xfs_agblock_t           bno,    /* btree block disk address */
188         int                     level)  /* btree block level */
189 {
190         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
191
192         XFS_WANT_CORRUPTED_RETURN(
193                 level > 0 &&
194                 bno != NULLAGBLOCK &&
195                 bno != 0 &&
196                 bno < agblocks);
197         return 0;
198 }
199
200 /*
201  * Check that block ptr is ok.
202  */
203 STATIC int                              /* error (0 or EFSCORRUPTED) */
204 xfs_btree_check_ptr(
205         struct xfs_btree_cur    *cur,   /* btree cursor */
206         union xfs_btree_ptr     *ptr,   /* btree block disk address */
207         int                     index,  /* offset from ptr to check */
208         int                     level)  /* btree block level */
209 {
210         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
211                 return xfs_btree_check_lptr(cur,
212                                 be64_to_cpu((&ptr->l)[index]), level);
213         } else {
214                 return xfs_btree_check_sptr(cur,
215                                 be32_to_cpu((&ptr->s)[index]), level);
216         }
217 }
218 #endif
219
220 /*
221  * Calculate CRC on the whole btree block and stuff it into the
222  * long-form btree header.
223  *
224  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
225  * it into the buffer so recovery knows what the last modifcation was that made
226  * it to disk.
227  */
228 void
229 xfs_btree_lblock_calc_crc(
230         struct xfs_buf          *bp)
231 {
232         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
233         struct xfs_buf_log_item *bip = bp->b_fspriv;
234
235         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
236                 return;
237         if (bip)
238                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
239         xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
240                          XFS_BTREE_LBLOCK_CRC_OFF);
241 }
242
243 bool
244 xfs_btree_lblock_verify_crc(
245         struct xfs_buf          *bp)
246 {
247         if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
248                 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
249                                         XFS_BTREE_LBLOCK_CRC_OFF);
250         return true;
251 }
252
253 /*
254  * Calculate CRC on the whole btree block and stuff it into the
255  * short-form btree header.
256  *
257  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
258  * it into the buffer so recovery knows what the last modifcation was that made
259  * it to disk.
260  */
261 void
262 xfs_btree_sblock_calc_crc(
263         struct xfs_buf          *bp)
264 {
265         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
266         struct xfs_buf_log_item *bip = bp->b_fspriv;
267
268         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
269                 return;
270         if (bip)
271                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
272         xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
273                          XFS_BTREE_SBLOCK_CRC_OFF);
274 }
275
276 bool
277 xfs_btree_sblock_verify_crc(
278         struct xfs_buf          *bp)
279 {
280         if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
281                 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
282                                         XFS_BTREE_SBLOCK_CRC_OFF);
283         return true;
284 }
285
286 /*
287  * Delete the btree cursor.
288  */
289 void
290 xfs_btree_del_cursor(
291         xfs_btree_cur_t *cur,           /* btree cursor */
292         int             error)          /* del because of error */
293 {
294         int             i;              /* btree level */
295
296         /*
297          * Clear the buffer pointers, and release the buffers.
298          * If we're doing this in the face of an error, we
299          * need to make sure to inspect all of the entries
300          * in the bc_bufs array for buffers to be unlocked.
301          * This is because some of the btree code works from
302          * level n down to 0, and if we get an error along
303          * the way we won't have initialized all the entries
304          * down to 0.
305          */
306         for (i = 0; i < cur->bc_nlevels; i++) {
307                 if (cur->bc_bufs[i])
308                         xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
309                 else if (!error)
310                         break;
311         }
312         /*
313          * Can't free a bmap cursor without having dealt with the
314          * allocated indirect blocks' accounting.
315          */
316         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
317                cur->bc_private.b.allocated == 0);
318         /*
319          * Free the cursor.
320          */
321         kmem_zone_free(xfs_btree_cur_zone, cur);
322 }
323
324 /*
325  * Duplicate the btree cursor.
326  * Allocate a new one, copy the record, re-get the buffers.
327  */
328 int                                     /* error */
329 xfs_btree_dup_cursor(
330         xfs_btree_cur_t *cur,           /* input cursor */
331         xfs_btree_cur_t **ncur)         /* output cursor */
332 {
333         xfs_buf_t       *bp;            /* btree block's buffer pointer */
334         int             error;          /* error return value */
335         int             i;              /* level number of btree block */
336         xfs_mount_t     *mp;            /* mount structure for filesystem */
337         xfs_btree_cur_t *new;           /* new cursor value */
338         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
339
340         tp = cur->bc_tp;
341         mp = cur->bc_mp;
342
343         /*
344          * Allocate a new cursor like the old one.
345          */
346         new = cur->bc_ops->dup_cursor(cur);
347
348         /*
349          * Copy the record currently in the cursor.
350          */
351         new->bc_rec = cur->bc_rec;
352
353         /*
354          * For each level current, re-get the buffer and copy the ptr value.
355          */
356         for (i = 0; i < new->bc_nlevels; i++) {
357                 new->bc_ptrs[i] = cur->bc_ptrs[i];
358                 new->bc_ra[i] = cur->bc_ra[i];
359                 bp = cur->bc_bufs[i];
360                 if (bp) {
361                         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
362                                                    XFS_BUF_ADDR(bp), mp->m_bsize,
363                                                    0, &bp,
364                                                    cur->bc_ops->buf_ops);
365                         if (error) {
366                                 xfs_btree_del_cursor(new, error);
367                                 *ncur = NULL;
368                                 return error;
369                         }
370                 }
371                 new->bc_bufs[i] = bp;
372         }
373         *ncur = new;
374         return 0;
375 }
376
377 /*
378  * XFS btree block layout and addressing:
379  *
380  * There are two types of blocks in the btree: leaf and non-leaf blocks.
381  *
382  * The leaf record start with a header then followed by records containing
383  * the values.  A non-leaf block also starts with the same header, and
384  * then first contains lookup keys followed by an equal number of pointers
385  * to the btree blocks at the previous level.
386  *
387  *              +--------+-------+-------+-------+-------+-------+-------+
388  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
389  *              +--------+-------+-------+-------+-------+-------+-------+
390  *
391  *              +--------+-------+-------+-------+-------+-------+-------+
392  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
393  *              +--------+-------+-------+-------+-------+-------+-------+
394  *
395  * The header is called struct xfs_btree_block for reasons better left unknown
396  * and comes in different versions for short (32bit) and long (64bit) block
397  * pointers.  The record and key structures are defined by the btree instances
398  * and opaque to the btree core.  The block pointers are simple disk endian
399  * integers, available in a short (32bit) and long (64bit) variant.
400  *
401  * The helpers below calculate the offset of a given record, key or pointer
402  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
403  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
404  * inside the btree block is done using indices starting at one, not zero!
405  */
406
407 /*
408  * Return size of the btree block header for this btree instance.
409  */
410 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
411 {
412         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
413                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
414                         return XFS_BTREE_LBLOCK_CRC_LEN;
415                 return XFS_BTREE_LBLOCK_LEN;
416         }
417         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
418                 return XFS_BTREE_SBLOCK_CRC_LEN;
419         return XFS_BTREE_SBLOCK_LEN;
420 }
421
422 /*
423  * Return size of btree block pointers for this btree instance.
424  */
425 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
426 {
427         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
428                 sizeof(__be64) : sizeof(__be32);
429 }
430
431 /*
432  * Calculate offset of the n-th record in a btree block.
433  */
434 STATIC size_t
435 xfs_btree_rec_offset(
436         struct xfs_btree_cur    *cur,
437         int                     n)
438 {
439         return xfs_btree_block_len(cur) +
440                 (n - 1) * cur->bc_ops->rec_len;
441 }
442
443 /*
444  * Calculate offset of the n-th key in a btree block.
445  */
446 STATIC size_t
447 xfs_btree_key_offset(
448         struct xfs_btree_cur    *cur,
449         int                     n)
450 {
451         return xfs_btree_block_len(cur) +
452                 (n - 1) * cur->bc_ops->key_len;
453 }
454
455 /*
456  * Calculate offset of the n-th block pointer in a btree block.
457  */
458 STATIC size_t
459 xfs_btree_ptr_offset(
460         struct xfs_btree_cur    *cur,
461         int                     n,
462         int                     level)
463 {
464         return xfs_btree_block_len(cur) +
465                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
466                 (n - 1) * xfs_btree_ptr_len(cur);
467 }
468
469 /*
470  * Return a pointer to the n-th record in the btree block.
471  */
472 STATIC union xfs_btree_rec *
473 xfs_btree_rec_addr(
474         struct xfs_btree_cur    *cur,
475         int                     n,
476         struct xfs_btree_block  *block)
477 {
478         return (union xfs_btree_rec *)
479                 ((char *)block + xfs_btree_rec_offset(cur, n));
480 }
481
482 /*
483  * Return a pointer to the n-th key in the btree block.
484  */
485 STATIC union xfs_btree_key *
486 xfs_btree_key_addr(
487         struct xfs_btree_cur    *cur,
488         int                     n,
489         struct xfs_btree_block  *block)
490 {
491         return (union xfs_btree_key *)
492                 ((char *)block + xfs_btree_key_offset(cur, n));
493 }
494
495 /*
496  * Return a pointer to the n-th block pointer in the btree block.
497  */
498 STATIC union xfs_btree_ptr *
499 xfs_btree_ptr_addr(
500         struct xfs_btree_cur    *cur,
501         int                     n,
502         struct xfs_btree_block  *block)
503 {
504         int                     level = xfs_btree_get_level(block);
505
506         ASSERT(block->bb_level != 0);
507
508         return (union xfs_btree_ptr *)
509                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
510 }
511
512 /*
513  * Get the root block which is stored in the inode.
514  *
515  * For now this btree implementation assumes the btree root is always
516  * stored in the if_broot field of an inode fork.
517  */
518 STATIC struct xfs_btree_block *
519 xfs_btree_get_iroot(
520        struct xfs_btree_cur    *cur)
521 {
522        struct xfs_ifork        *ifp;
523
524        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
525        return (struct xfs_btree_block *)ifp->if_broot;
526 }
527
528 /*
529  * Retrieve the block pointer from the cursor at the given level.
530  * This may be an inode btree root or from a buffer.
531  */
532 STATIC struct xfs_btree_block *         /* generic btree block pointer */
533 xfs_btree_get_block(
534         struct xfs_btree_cur    *cur,   /* btree cursor */
535         int                     level,  /* level in btree */
536         struct xfs_buf          **bpp)  /* buffer containing the block */
537 {
538         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
539             (level == cur->bc_nlevels - 1)) {
540                 *bpp = NULL;
541                 return xfs_btree_get_iroot(cur);
542         }
543
544         *bpp = cur->bc_bufs[level];
545         return XFS_BUF_TO_BLOCK(*bpp);
546 }
547
548 /*
549  * Get a buffer for the block, return it with no data read.
550  * Long-form addressing.
551  */
552 xfs_buf_t *                             /* buffer for fsbno */
553 xfs_btree_get_bufl(
554         xfs_mount_t     *mp,            /* file system mount point */
555         xfs_trans_t     *tp,            /* transaction pointer */
556         xfs_fsblock_t   fsbno,          /* file system block number */
557         uint            lock)           /* lock flags for get_buf */
558 {
559         xfs_buf_t       *bp;            /* buffer pointer (return value) */
560         xfs_daddr_t             d;              /* real disk block address */
561
562         ASSERT(fsbno != NULLFSBLOCK);
563         d = XFS_FSB_TO_DADDR(mp, fsbno);
564         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
565         ASSERT(!xfs_buf_geterror(bp));
566         return bp;
567 }
568
569 /*
570  * Get a buffer for the block, return it with no data read.
571  * Short-form addressing.
572  */
573 xfs_buf_t *                             /* buffer for agno/agbno */
574 xfs_btree_get_bufs(
575         xfs_mount_t     *mp,            /* file system mount point */
576         xfs_trans_t     *tp,            /* transaction pointer */
577         xfs_agnumber_t  agno,           /* allocation group number */
578         xfs_agblock_t   agbno,          /* allocation group block number */
579         uint            lock)           /* lock flags for get_buf */
580 {
581         xfs_buf_t       *bp;            /* buffer pointer (return value) */
582         xfs_daddr_t             d;              /* real disk block address */
583
584         ASSERT(agno != NULLAGNUMBER);
585         ASSERT(agbno != NULLAGBLOCK);
586         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
587         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
588         ASSERT(!xfs_buf_geterror(bp));
589         return bp;
590 }
591
592 /*
593  * Check for the cursor referring to the last block at the given level.
594  */
595 int                                     /* 1=is last block, 0=not last block */
596 xfs_btree_islastblock(
597         xfs_btree_cur_t         *cur,   /* btree cursor */
598         int                     level)  /* level to check */
599 {
600         struct xfs_btree_block  *block; /* generic btree block pointer */
601         xfs_buf_t               *bp;    /* buffer containing block */
602
603         block = xfs_btree_get_block(cur, level, &bp);
604         xfs_btree_check_block(cur, block, level, bp);
605         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
606                 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
607         else
608                 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
609 }
610
611 /*
612  * Change the cursor to point to the first record at the given level.
613  * Other levels are unaffected.
614  */
615 STATIC int                              /* success=1, failure=0 */
616 xfs_btree_firstrec(
617         xfs_btree_cur_t         *cur,   /* btree cursor */
618         int                     level)  /* level to change */
619 {
620         struct xfs_btree_block  *block; /* generic btree block pointer */
621         xfs_buf_t               *bp;    /* buffer containing block */
622
623         /*
624          * Get the block pointer for this level.
625          */
626         block = xfs_btree_get_block(cur, level, &bp);
627         xfs_btree_check_block(cur, block, level, bp);
628         /*
629          * It's empty, there is no such record.
630          */
631         if (!block->bb_numrecs)
632                 return 0;
633         /*
634          * Set the ptr value to 1, that's the first record/key.
635          */
636         cur->bc_ptrs[level] = 1;
637         return 1;
638 }
639
640 /*
641  * Change the cursor to point to the last record in the current block
642  * at the given level.  Other levels are unaffected.
643  */
644 STATIC int                              /* success=1, failure=0 */
645 xfs_btree_lastrec(
646         xfs_btree_cur_t         *cur,   /* btree cursor */
647         int                     level)  /* level to change */
648 {
649         struct xfs_btree_block  *block; /* generic btree block pointer */
650         xfs_buf_t               *bp;    /* buffer containing block */
651
652         /*
653          * Get the block pointer for this level.
654          */
655         block = xfs_btree_get_block(cur, level, &bp);
656         xfs_btree_check_block(cur, block, level, bp);
657         /*
658          * It's empty, there is no such record.
659          */
660         if (!block->bb_numrecs)
661                 return 0;
662         /*
663          * Set the ptr value to numrecs, that's the last record/key.
664          */
665         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
666         return 1;
667 }
668
669 /*
670  * Compute first and last byte offsets for the fields given.
671  * Interprets the offsets table, which contains struct field offsets.
672  */
673 void
674 xfs_btree_offsets(
675         __int64_t       fields,         /* bitmask of fields */
676         const short     *offsets,       /* table of field offsets */
677         int             nbits,          /* number of bits to inspect */
678         int             *first,         /* output: first byte offset */
679         int             *last)          /* output: last byte offset */
680 {
681         int             i;              /* current bit number */
682         __int64_t       imask;          /* mask for current bit number */
683
684         ASSERT(fields != 0);
685         /*
686          * Find the lowest bit, so the first byte offset.
687          */
688         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
689                 if (imask & fields) {
690                         *first = offsets[i];
691                         break;
692                 }
693         }
694         /*
695          * Find the highest bit, so the last byte offset.
696          */
697         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
698                 if (imask & fields) {
699                         *last = offsets[i + 1] - 1;
700                         break;
701                 }
702         }
703 }
704
705 /*
706  * Get a buffer for the block, return it read in.
707  * Long-form addressing.
708  */
709 int
710 xfs_btree_read_bufl(
711         struct xfs_mount        *mp,            /* file system mount point */
712         struct xfs_trans        *tp,            /* transaction pointer */
713         xfs_fsblock_t           fsbno,          /* file system block number */
714         uint                    lock,           /* lock flags for read_buf */
715         struct xfs_buf          **bpp,          /* buffer for fsbno */
716         int                     refval,         /* ref count value for buffer */
717         const struct xfs_buf_ops *ops)
718 {
719         struct xfs_buf          *bp;            /* return value */
720         xfs_daddr_t             d;              /* real disk block address */
721         int                     error;
722
723         ASSERT(fsbno != NULLFSBLOCK);
724         d = XFS_FSB_TO_DADDR(mp, fsbno);
725         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
726                                    mp->m_bsize, lock, &bp, ops);
727         if (error)
728                 return error;
729         ASSERT(!xfs_buf_geterror(bp));
730         if (bp)
731                 xfs_buf_set_ref(bp, refval);
732         *bpp = bp;
733         return 0;
734 }
735
736 /*
737  * Read-ahead the block, don't wait for it, don't return a buffer.
738  * Long-form addressing.
739  */
740 /* ARGSUSED */
741 void
742 xfs_btree_reada_bufl(
743         struct xfs_mount        *mp,            /* file system mount point */
744         xfs_fsblock_t           fsbno,          /* file system block number */
745         xfs_extlen_t            count,          /* count of filesystem blocks */
746         const struct xfs_buf_ops *ops)
747 {
748         xfs_daddr_t             d;
749
750         ASSERT(fsbno != NULLFSBLOCK);
751         d = XFS_FSB_TO_DADDR(mp, fsbno);
752         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
753 }
754
755 /*
756  * Read-ahead the block, don't wait for it, don't return a buffer.
757  * Short-form addressing.
758  */
759 /* ARGSUSED */
760 void
761 xfs_btree_reada_bufs(
762         struct xfs_mount        *mp,            /* file system mount point */
763         xfs_agnumber_t          agno,           /* allocation group number */
764         xfs_agblock_t           agbno,          /* allocation group block number */
765         xfs_extlen_t            count,          /* count of filesystem blocks */
766         const struct xfs_buf_ops *ops)
767 {
768         xfs_daddr_t             d;
769
770         ASSERT(agno != NULLAGNUMBER);
771         ASSERT(agbno != NULLAGBLOCK);
772         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
773         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
774 }
775
776 STATIC int
777 xfs_btree_readahead_lblock(
778         struct xfs_btree_cur    *cur,
779         int                     lr,
780         struct xfs_btree_block  *block)
781 {
782         int                     rval = 0;
783         xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
784         xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
785
786         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
787                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
788                                      cur->bc_ops->buf_ops);
789                 rval++;
790         }
791
792         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
793                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
794                                      cur->bc_ops->buf_ops);
795                 rval++;
796         }
797
798         return rval;
799 }
800
801 STATIC int
802 xfs_btree_readahead_sblock(
803         struct xfs_btree_cur    *cur,
804         int                     lr,
805         struct xfs_btree_block *block)
806 {
807         int                     rval = 0;
808         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
809         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
810
811
812         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
813                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
814                                      left, 1, cur->bc_ops->buf_ops);
815                 rval++;
816         }
817
818         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
819                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
820                                      right, 1, cur->bc_ops->buf_ops);
821                 rval++;
822         }
823
824         return rval;
825 }
826
827 /*
828  * Read-ahead btree blocks, at the given level.
829  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
830  */
831 STATIC int
832 xfs_btree_readahead(
833         struct xfs_btree_cur    *cur,           /* btree cursor */
834         int                     lev,            /* level in btree */
835         int                     lr)             /* left/right bits */
836 {
837         struct xfs_btree_block  *block;
838
839         /*
840          * No readahead needed if we are at the root level and the
841          * btree root is stored in the inode.
842          */
843         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
844             (lev == cur->bc_nlevels - 1))
845                 return 0;
846
847         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
848                 return 0;
849
850         cur->bc_ra[lev] |= lr;
851         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
852
853         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
854                 return xfs_btree_readahead_lblock(cur, lr, block);
855         return xfs_btree_readahead_sblock(cur, lr, block);
856 }
857
858 /*
859  * Set the buffer for level "lev" in the cursor to bp, releasing
860  * any previous buffer.
861  */
862 STATIC void
863 xfs_btree_setbuf(
864         xfs_btree_cur_t         *cur,   /* btree cursor */
865         int                     lev,    /* level in btree */
866         xfs_buf_t               *bp)    /* new buffer to set */
867 {
868         struct xfs_btree_block  *b;     /* btree block */
869
870         if (cur->bc_bufs[lev])
871                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
872         cur->bc_bufs[lev] = bp;
873         cur->bc_ra[lev] = 0;
874
875         b = XFS_BUF_TO_BLOCK(bp);
876         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
877                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
878                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
879                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
880                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
881         } else {
882                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
883                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
884                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
885                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
886         }
887 }
888
889 STATIC int
890 xfs_btree_ptr_is_null(
891         struct xfs_btree_cur    *cur,
892         union xfs_btree_ptr     *ptr)
893 {
894         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
895                 return ptr->l == cpu_to_be64(NULLDFSBNO);
896         else
897                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
898 }
899
900 STATIC void
901 xfs_btree_set_ptr_null(
902         struct xfs_btree_cur    *cur,
903         union xfs_btree_ptr     *ptr)
904 {
905         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
906                 ptr->l = cpu_to_be64(NULLDFSBNO);
907         else
908                 ptr->s = cpu_to_be32(NULLAGBLOCK);
909 }
910
911 /*
912  * Get/set/init sibling pointers
913  */
914 STATIC void
915 xfs_btree_get_sibling(
916         struct xfs_btree_cur    *cur,
917         struct xfs_btree_block  *block,
918         union xfs_btree_ptr     *ptr,
919         int                     lr)
920 {
921         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
922
923         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
924                 if (lr == XFS_BB_RIGHTSIB)
925                         ptr->l = block->bb_u.l.bb_rightsib;
926                 else
927                         ptr->l = block->bb_u.l.bb_leftsib;
928         } else {
929                 if (lr == XFS_BB_RIGHTSIB)
930                         ptr->s = block->bb_u.s.bb_rightsib;
931                 else
932                         ptr->s = block->bb_u.s.bb_leftsib;
933         }
934 }
935
936 STATIC void
937 xfs_btree_set_sibling(
938         struct xfs_btree_cur    *cur,
939         struct xfs_btree_block  *block,
940         union xfs_btree_ptr     *ptr,
941         int                     lr)
942 {
943         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
944
945         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
946                 if (lr == XFS_BB_RIGHTSIB)
947                         block->bb_u.l.bb_rightsib = ptr->l;
948                 else
949                         block->bb_u.l.bb_leftsib = ptr->l;
950         } else {
951                 if (lr == XFS_BB_RIGHTSIB)
952                         block->bb_u.s.bb_rightsib = ptr->s;
953                 else
954                         block->bb_u.s.bb_leftsib = ptr->s;
955         }
956 }
957
958 void
959 xfs_btree_init_block_int(
960         struct xfs_mount        *mp,
961         struct xfs_btree_block  *buf,
962         xfs_daddr_t             blkno,
963         __u32                   magic,
964         __u16                   level,
965         __u16                   numrecs,
966         __u64                   owner,
967         unsigned int            flags)
968 {
969         buf->bb_magic = cpu_to_be32(magic);
970         buf->bb_level = cpu_to_be16(level);
971         buf->bb_numrecs = cpu_to_be16(numrecs);
972
973         if (flags & XFS_BTREE_LONG_PTRS) {
974                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
975                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
976                 if (flags & XFS_BTREE_CRC_BLOCKS) {
977                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
978                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
979                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
980                         buf->bb_u.l.bb_pad = 0;
981                         buf->bb_u.l.bb_lsn = 0;
982                 }
983         } else {
984                 /* owner is a 32 bit value on short blocks */
985                 __u32 __owner = (__u32)owner;
986
987                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
988                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
989                 if (flags & XFS_BTREE_CRC_BLOCKS) {
990                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
991                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
992                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
993                         buf->bb_u.s.bb_lsn = 0;
994                 }
995         }
996 }
997
998 void
999 xfs_btree_init_block(
1000         struct xfs_mount *mp,
1001         struct xfs_buf  *bp,
1002         __u32           magic,
1003         __u16           level,
1004         __u16           numrecs,
1005         __u64           owner,
1006         unsigned int    flags)
1007 {
1008         xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1009                                  magic, level, numrecs, owner, flags);
1010 }
1011
1012 STATIC void
1013 xfs_btree_init_block_cur(
1014         struct xfs_btree_cur    *cur,
1015         struct xfs_buf          *bp,
1016         int                     level,
1017         int                     numrecs)
1018 {
1019         __u64 owner;
1020
1021         /*
1022          * we can pull the owner from the cursor right now as the different
1023          * owners align directly with the pointer size of the btree. This may
1024          * change in future, but is safe for current users of the generic btree
1025          * code.
1026          */
1027         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1028                 owner = cur->bc_private.b.ip->i_ino;
1029         else
1030                 owner = cur->bc_private.a.agno;
1031
1032         xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1033                                  xfs_btree_magic(cur), level, numrecs,
1034                                  owner, cur->bc_flags);
1035 }
1036
1037 /*
1038  * Return true if ptr is the last record in the btree and
1039  * we need to track updates to this record.  The decision
1040  * will be further refined in the update_lastrec method.
1041  */
1042 STATIC int
1043 xfs_btree_is_lastrec(
1044         struct xfs_btree_cur    *cur,
1045         struct xfs_btree_block  *block,
1046         int                     level)
1047 {
1048         union xfs_btree_ptr     ptr;
1049
1050         if (level > 0)
1051                 return 0;
1052         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1053                 return 0;
1054
1055         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1056         if (!xfs_btree_ptr_is_null(cur, &ptr))
1057                 return 0;
1058         return 1;
1059 }
1060
1061 STATIC void
1062 xfs_btree_buf_to_ptr(
1063         struct xfs_btree_cur    *cur,
1064         struct xfs_buf          *bp,
1065         union xfs_btree_ptr     *ptr)
1066 {
1067         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1068                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1069                                         XFS_BUF_ADDR(bp)));
1070         else {
1071                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1072                                         XFS_BUF_ADDR(bp)));
1073         }
1074 }
1075
1076 STATIC xfs_daddr_t
1077 xfs_btree_ptr_to_daddr(
1078         struct xfs_btree_cur    *cur,
1079         union xfs_btree_ptr     *ptr)
1080 {
1081         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1082                 ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
1083
1084                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1085         } else {
1086                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1087                 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
1088
1089                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1090                                         be32_to_cpu(ptr->s));
1091         }
1092 }
1093
1094 STATIC void
1095 xfs_btree_set_refs(
1096         struct xfs_btree_cur    *cur,
1097         struct xfs_buf          *bp)
1098 {
1099         switch (cur->bc_btnum) {
1100         case XFS_BTNUM_BNO:
1101         case XFS_BTNUM_CNT:
1102                 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1103                 break;
1104         case XFS_BTNUM_INO:
1105                 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1106                 break;
1107         case XFS_BTNUM_BMAP:
1108                 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1109                 break;
1110         default:
1111                 ASSERT(0);
1112         }
1113 }
1114
1115 STATIC int
1116 xfs_btree_get_buf_block(
1117         struct xfs_btree_cur    *cur,
1118         union xfs_btree_ptr     *ptr,
1119         int                     flags,
1120         struct xfs_btree_block  **block,
1121         struct xfs_buf          **bpp)
1122 {
1123         struct xfs_mount        *mp = cur->bc_mp;
1124         xfs_daddr_t             d;
1125
1126         /* need to sort out how callers deal with failures first */
1127         ASSERT(!(flags & XBF_TRYLOCK));
1128
1129         d = xfs_btree_ptr_to_daddr(cur, ptr);
1130         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1131                                  mp->m_bsize, flags);
1132
1133         if (!*bpp)
1134                 return ENOMEM;
1135
1136         (*bpp)->b_ops = cur->bc_ops->buf_ops;
1137         *block = XFS_BUF_TO_BLOCK(*bpp);
1138         return 0;
1139 }
1140
1141 /*
1142  * Read in the buffer at the given ptr and return the buffer and
1143  * the block pointer within the buffer.
1144  */
1145 STATIC int
1146 xfs_btree_read_buf_block(
1147         struct xfs_btree_cur    *cur,
1148         union xfs_btree_ptr     *ptr,
1149         int                     level,
1150         int                     flags,
1151         struct xfs_btree_block  **block,
1152         struct xfs_buf          **bpp)
1153 {
1154         struct xfs_mount        *mp = cur->bc_mp;
1155         xfs_daddr_t             d;
1156         int                     error;
1157
1158         /* need to sort out how callers deal with failures first */
1159         ASSERT(!(flags & XBF_TRYLOCK));
1160
1161         d = xfs_btree_ptr_to_daddr(cur, ptr);
1162         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1163                                    mp->m_bsize, flags, bpp,
1164                                    cur->bc_ops->buf_ops);
1165         if (error)
1166                 return error;
1167
1168         ASSERT(!xfs_buf_geterror(*bpp));
1169         xfs_btree_set_refs(cur, *bpp);
1170         *block = XFS_BUF_TO_BLOCK(*bpp);
1171         return 0;
1172 }
1173
1174 /*
1175  * Copy keys from one btree block to another.
1176  */
1177 STATIC void
1178 xfs_btree_copy_keys(
1179         struct xfs_btree_cur    *cur,
1180         union xfs_btree_key     *dst_key,
1181         union xfs_btree_key     *src_key,
1182         int                     numkeys)
1183 {
1184         ASSERT(numkeys >= 0);
1185         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1186 }
1187
1188 /*
1189  * Copy records from one btree block to another.
1190  */
1191 STATIC void
1192 xfs_btree_copy_recs(
1193         struct xfs_btree_cur    *cur,
1194         union xfs_btree_rec     *dst_rec,
1195         union xfs_btree_rec     *src_rec,
1196         int                     numrecs)
1197 {
1198         ASSERT(numrecs >= 0);
1199         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1200 }
1201
1202 /*
1203  * Copy block pointers from one btree block to another.
1204  */
1205 STATIC void
1206 xfs_btree_copy_ptrs(
1207         struct xfs_btree_cur    *cur,
1208         union xfs_btree_ptr     *dst_ptr,
1209         union xfs_btree_ptr     *src_ptr,
1210         int                     numptrs)
1211 {
1212         ASSERT(numptrs >= 0);
1213         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1214 }
1215
1216 /*
1217  * Shift keys one index left/right inside a single btree block.
1218  */
1219 STATIC void
1220 xfs_btree_shift_keys(
1221         struct xfs_btree_cur    *cur,
1222         union xfs_btree_key     *key,
1223         int                     dir,
1224         int                     numkeys)
1225 {
1226         char                    *dst_key;
1227
1228         ASSERT(numkeys >= 0);
1229         ASSERT(dir == 1 || dir == -1);
1230
1231         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1232         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1233 }
1234
1235 /*
1236  * Shift records one index left/right inside a single btree block.
1237  */
1238 STATIC void
1239 xfs_btree_shift_recs(
1240         struct xfs_btree_cur    *cur,
1241         union xfs_btree_rec     *rec,
1242         int                     dir,
1243         int                     numrecs)
1244 {
1245         char                    *dst_rec;
1246
1247         ASSERT(numrecs >= 0);
1248         ASSERT(dir == 1 || dir == -1);
1249
1250         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1251         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1252 }
1253
1254 /*
1255  * Shift block pointers one index left/right inside a single btree block.
1256  */
1257 STATIC void
1258 xfs_btree_shift_ptrs(
1259         struct xfs_btree_cur    *cur,
1260         union xfs_btree_ptr     *ptr,
1261         int                     dir,
1262         int                     numptrs)
1263 {
1264         char                    *dst_ptr;
1265
1266         ASSERT(numptrs >= 0);
1267         ASSERT(dir == 1 || dir == -1);
1268
1269         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1270         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1271 }
1272
1273 /*
1274  * Log key values from the btree block.
1275  */
1276 STATIC void
1277 xfs_btree_log_keys(
1278         struct xfs_btree_cur    *cur,
1279         struct xfs_buf          *bp,
1280         int                     first,
1281         int                     last)
1282 {
1283         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1284         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1285
1286         if (bp) {
1287                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1288                 xfs_trans_log_buf(cur->bc_tp, bp,
1289                                   xfs_btree_key_offset(cur, first),
1290                                   xfs_btree_key_offset(cur, last + 1) - 1);
1291         } else {
1292                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1293                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1294         }
1295
1296         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1297 }
1298
1299 /*
1300  * Log record values from the btree block.
1301  */
1302 void
1303 xfs_btree_log_recs(
1304         struct xfs_btree_cur    *cur,
1305         struct xfs_buf          *bp,
1306         int                     first,
1307         int                     last)
1308 {
1309         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1310         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1311
1312         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1313         xfs_trans_log_buf(cur->bc_tp, bp,
1314                           xfs_btree_rec_offset(cur, first),
1315                           xfs_btree_rec_offset(cur, last + 1) - 1);
1316
1317         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1318 }
1319
1320 /*
1321  * Log block pointer fields from a btree block (nonleaf).
1322  */
1323 STATIC void
1324 xfs_btree_log_ptrs(
1325         struct xfs_btree_cur    *cur,   /* btree cursor */
1326         struct xfs_buf          *bp,    /* buffer containing btree block */
1327         int                     first,  /* index of first pointer to log */
1328         int                     last)   /* index of last pointer to log */
1329 {
1330         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1331         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1332
1333         if (bp) {
1334                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1335                 int                     level = xfs_btree_get_level(block);
1336
1337                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1338                 xfs_trans_log_buf(cur->bc_tp, bp,
1339                                 xfs_btree_ptr_offset(cur, first, level),
1340                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1341         } else {
1342                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1343                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1344         }
1345
1346         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1347 }
1348
1349 /*
1350  * Log fields from a btree block header.
1351  */
1352 void
1353 xfs_btree_log_block(
1354         struct xfs_btree_cur    *cur,   /* btree cursor */
1355         struct xfs_buf          *bp,    /* buffer containing btree block */
1356         int                     fields) /* mask of fields: XFS_BB_... */
1357 {
1358         int                     first;  /* first byte offset logged */
1359         int                     last;   /* last byte offset logged */
1360         static const short      soffsets[] = {  /* table of offsets (short) */
1361                 offsetof(struct xfs_btree_block, bb_magic),
1362                 offsetof(struct xfs_btree_block, bb_level),
1363                 offsetof(struct xfs_btree_block, bb_numrecs),
1364                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1365                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1366                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1367                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1368                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1369                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1370                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1371                 XFS_BTREE_SBLOCK_CRC_LEN
1372         };
1373         static const short      loffsets[] = {  /* table of offsets (long) */
1374                 offsetof(struct xfs_btree_block, bb_magic),
1375                 offsetof(struct xfs_btree_block, bb_level),
1376                 offsetof(struct xfs_btree_block, bb_numrecs),
1377                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1378                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1379                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1380                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1381                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1382                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1383                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1384                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1385                 XFS_BTREE_LBLOCK_CRC_LEN
1386         };
1387
1388         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1389         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1390
1391         if (bp) {
1392                 int nbits;
1393
1394                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1395                         /*
1396                          * We don't log the CRC when updating a btree
1397                          * block but instead recreate it during log
1398                          * recovery.  As the log buffers have checksums
1399                          * of their own this is safe and avoids logging a crc
1400                          * update in a lot of places.
1401                          */
1402                         if (fields == XFS_BB_ALL_BITS)
1403                                 fields = XFS_BB_ALL_BITS_CRC;
1404                         nbits = XFS_BB_NUM_BITS_CRC;
1405                 } else {
1406                         nbits = XFS_BB_NUM_BITS;
1407                 }
1408                 xfs_btree_offsets(fields,
1409                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1410                                         loffsets : soffsets,
1411                                   nbits, &first, &last);
1412                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1413                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1414         } else {
1415                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1416                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1417         }
1418
1419         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1420 }
1421
1422 /*
1423  * Increment cursor by one record at the level.
1424  * For nonzero levels the leaf-ward information is untouched.
1425  */
1426 int                                             /* error */
1427 xfs_btree_increment(
1428         struct xfs_btree_cur    *cur,
1429         int                     level,
1430         int                     *stat)          /* success/failure */
1431 {
1432         struct xfs_btree_block  *block;
1433         union xfs_btree_ptr     ptr;
1434         struct xfs_buf          *bp;
1435         int                     error;          /* error return value */
1436         int                     lev;
1437
1438         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1439         XFS_BTREE_TRACE_ARGI(cur, level);
1440
1441         ASSERT(level < cur->bc_nlevels);
1442
1443         /* Read-ahead to the right at this level. */
1444         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1445
1446         /* Get a pointer to the btree block. */
1447         block = xfs_btree_get_block(cur, level, &bp);
1448
1449 #ifdef DEBUG
1450         error = xfs_btree_check_block(cur, block, level, bp);
1451         if (error)
1452                 goto error0;
1453 #endif
1454
1455         /* We're done if we remain in the block after the increment. */
1456         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1457                 goto out1;
1458
1459         /* Fail if we just went off the right edge of the tree. */
1460         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1461         if (xfs_btree_ptr_is_null(cur, &ptr))
1462                 goto out0;
1463
1464         XFS_BTREE_STATS_INC(cur, increment);
1465
1466         /*
1467          * March up the tree incrementing pointers.
1468          * Stop when we don't go off the right edge of a block.
1469          */
1470         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1471                 block = xfs_btree_get_block(cur, lev, &bp);
1472
1473 #ifdef DEBUG
1474                 error = xfs_btree_check_block(cur, block, lev, bp);
1475                 if (error)
1476                         goto error0;
1477 #endif
1478
1479                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1480                         break;
1481
1482                 /* Read-ahead the right block for the next loop. */
1483                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1484         }
1485
1486         /*
1487          * If we went off the root then we are either seriously
1488          * confused or have the tree root in an inode.
1489          */
1490         if (lev == cur->bc_nlevels) {
1491                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1492                         goto out0;
1493                 ASSERT(0);
1494                 error = EFSCORRUPTED;
1495                 goto error0;
1496         }
1497         ASSERT(lev < cur->bc_nlevels);
1498
1499         /*
1500          * Now walk back down the tree, fixing up the cursor's buffer
1501          * pointers and key numbers.
1502          */
1503         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1504                 union xfs_btree_ptr     *ptrp;
1505
1506                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1507                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1508                                                         0, &block, &bp);
1509                 if (error)
1510                         goto error0;
1511
1512                 xfs_btree_setbuf(cur, lev, bp);
1513                 cur->bc_ptrs[lev] = 1;
1514         }
1515 out1:
1516         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1517         *stat = 1;
1518         return 0;
1519
1520 out0:
1521         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1522         *stat = 0;
1523         return 0;
1524
1525 error0:
1526         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1527         return error;
1528 }
1529
1530 /*
1531  * Decrement cursor by one record at the level.
1532  * For nonzero levels the leaf-ward information is untouched.
1533  */
1534 int                                             /* error */
1535 xfs_btree_decrement(
1536         struct xfs_btree_cur    *cur,
1537         int                     level,
1538         int                     *stat)          /* success/failure */
1539 {
1540         struct xfs_btree_block  *block;
1541         xfs_buf_t               *bp;
1542         int                     error;          /* error return value */
1543         int                     lev;
1544         union xfs_btree_ptr     ptr;
1545
1546         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1547         XFS_BTREE_TRACE_ARGI(cur, level);
1548
1549         ASSERT(level < cur->bc_nlevels);
1550
1551         /* Read-ahead to the left at this level. */
1552         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1553
1554         /* We're done if we remain in the block after the decrement. */
1555         if (--cur->bc_ptrs[level] > 0)
1556                 goto out1;
1557
1558         /* Get a pointer to the btree block. */
1559         block = xfs_btree_get_block(cur, level, &bp);
1560
1561 #ifdef DEBUG
1562         error = xfs_btree_check_block(cur, block, level, bp);
1563         if (error)
1564                 goto error0;
1565 #endif
1566
1567         /* Fail if we just went off the left edge of the tree. */
1568         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1569         if (xfs_btree_ptr_is_null(cur, &ptr))
1570                 goto out0;
1571
1572         XFS_BTREE_STATS_INC(cur, decrement);
1573
1574         /*
1575          * March up the tree decrementing pointers.
1576          * Stop when we don't go off the left edge of a block.
1577          */
1578         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1579                 if (--cur->bc_ptrs[lev] > 0)
1580                         break;
1581                 /* Read-ahead the left block for the next loop. */
1582                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1583         }
1584
1585         /*
1586          * If we went off the root then we are seriously confused.
1587          * or the root of the tree is in an inode.
1588          */
1589         if (lev == cur->bc_nlevels) {
1590                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1591                         goto out0;
1592                 ASSERT(0);
1593                 error = EFSCORRUPTED;
1594                 goto error0;
1595         }
1596         ASSERT(lev < cur->bc_nlevels);
1597
1598         /*
1599          * Now walk back down the tree, fixing up the cursor's buffer
1600          * pointers and key numbers.
1601          */
1602         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1603                 union xfs_btree_ptr     *ptrp;
1604
1605                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1606                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1607                                                         0, &block, &bp);
1608                 if (error)
1609                         goto error0;
1610                 xfs_btree_setbuf(cur, lev, bp);
1611                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1612         }
1613 out1:
1614         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615         *stat = 1;
1616         return 0;
1617
1618 out0:
1619         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1620         *stat = 0;
1621         return 0;
1622
1623 error0:
1624         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1625         return error;
1626 }
1627
1628 STATIC int
1629 xfs_btree_lookup_get_block(
1630         struct xfs_btree_cur    *cur,   /* btree cursor */
1631         int                     level,  /* level in the btree */
1632         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1633         struct xfs_btree_block  **blkp) /* return btree block */
1634 {
1635         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1636         int                     error = 0;
1637
1638         /* special case the root block if in an inode */
1639         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1640             (level == cur->bc_nlevels - 1)) {
1641                 *blkp = xfs_btree_get_iroot(cur);
1642                 return 0;
1643         }
1644
1645         /*
1646          * If the old buffer at this level for the disk address we are
1647          * looking for re-use it.
1648          *
1649          * Otherwise throw it away and get a new one.
1650          */
1651         bp = cur->bc_bufs[level];
1652         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1653                 *blkp = XFS_BUF_TO_BLOCK(bp);
1654                 return 0;
1655         }
1656
1657         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1658         if (error)
1659                 return error;
1660
1661         xfs_btree_setbuf(cur, level, bp);
1662         return 0;
1663 }
1664
1665 /*
1666  * Get current search key.  For level 0 we don't actually have a key
1667  * structure so we make one up from the record.  For all other levels
1668  * we just return the right key.
1669  */
1670 STATIC union xfs_btree_key *
1671 xfs_lookup_get_search_key(
1672         struct xfs_btree_cur    *cur,
1673         int                     level,
1674         int                     keyno,
1675         struct xfs_btree_block  *block,
1676         union xfs_btree_key     *kp)
1677 {
1678         if (level == 0) {
1679                 cur->bc_ops->init_key_from_rec(kp,
1680                                 xfs_btree_rec_addr(cur, keyno, block));
1681                 return kp;
1682         }
1683
1684         return xfs_btree_key_addr(cur, keyno, block);
1685 }
1686
1687 /*
1688  * Lookup the record.  The cursor is made to point to it, based on dir.
1689  * stat is set to 0 if can't find any such record, 1 for success.
1690  */
1691 int                                     /* error */
1692 xfs_btree_lookup(
1693         struct xfs_btree_cur    *cur,   /* btree cursor */
1694         xfs_lookup_t            dir,    /* <=, ==, or >= */
1695         int                     *stat)  /* success/failure */
1696 {
1697         struct xfs_btree_block  *block; /* current btree block */
1698         __int64_t               diff;   /* difference for the current key */
1699         int                     error;  /* error return value */
1700         int                     keyno;  /* current key number */
1701         int                     level;  /* level in the btree */
1702         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1703         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1704
1705         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1706         XFS_BTREE_TRACE_ARGI(cur, dir);
1707
1708         XFS_BTREE_STATS_INC(cur, lookup);
1709
1710         block = NULL;
1711         keyno = 0;
1712
1713         /* initialise start pointer from cursor */
1714         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1715         pp = &ptr;
1716
1717         /*
1718          * Iterate over each level in the btree, starting at the root.
1719          * For each level above the leaves, find the key we need, based
1720          * on the lookup record, then follow the corresponding block
1721          * pointer down to the next level.
1722          */
1723         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1724                 /* Get the block we need to do the lookup on. */
1725                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1726                 if (error)
1727                         goto error0;
1728
1729                 if (diff == 0) {
1730                         /*
1731                          * If we already had a key match at a higher level, we
1732                          * know we need to use the first entry in this block.
1733                          */
1734                         keyno = 1;
1735                 } else {
1736                         /* Otherwise search this block. Do a binary search. */
1737
1738                         int     high;   /* high entry number */
1739                         int     low;    /* low entry number */
1740
1741                         /* Set low and high entry numbers, 1-based. */
1742                         low = 1;
1743                         high = xfs_btree_get_numrecs(block);
1744                         if (!high) {
1745                                 /* Block is empty, must be an empty leaf. */
1746                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1747
1748                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1749                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1750                                 *stat = 0;
1751                                 return 0;
1752                         }
1753
1754                         /* Binary search the block. */
1755                         while (low <= high) {
1756                                 union xfs_btree_key     key;
1757                                 union xfs_btree_key     *kp;
1758
1759                                 XFS_BTREE_STATS_INC(cur, compare);
1760
1761                                 /* keyno is average of low and high. */
1762                                 keyno = (low + high) >> 1;
1763
1764                                 /* Get current search key */
1765                                 kp = xfs_lookup_get_search_key(cur, level,
1766                                                 keyno, block, &key);
1767
1768                                 /*
1769                                  * Compute difference to get next direction:
1770                                  *  - less than, move right
1771                                  *  - greater than, move left
1772                                  *  - equal, we're done
1773                                  */
1774                                 diff = cur->bc_ops->key_diff(cur, kp);
1775                                 if (diff < 0)
1776                                         low = keyno + 1;
1777                                 else if (diff > 0)
1778                                         high = keyno - 1;
1779                                 else
1780                                         break;
1781                         }
1782                 }
1783
1784                 /*
1785                  * If there are more levels, set up for the next level
1786                  * by getting the block number and filling in the cursor.
1787                  */
1788                 if (level > 0) {
1789                         /*
1790                          * If we moved left, need the previous key number,
1791                          * unless there isn't one.
1792                          */
1793                         if (diff > 0 && --keyno < 1)
1794                                 keyno = 1;
1795                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1796
1797 #ifdef DEBUG
1798                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1799                         if (error)
1800                                 goto error0;
1801 #endif
1802                         cur->bc_ptrs[level] = keyno;
1803                 }
1804         }
1805
1806         /* Done with the search. See if we need to adjust the results. */
1807         if (dir != XFS_LOOKUP_LE && diff < 0) {
1808                 keyno++;
1809                 /*
1810                  * If ge search and we went off the end of the block, but it's
1811                  * not the last block, we're in the wrong block.
1812                  */
1813                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1814                 if (dir == XFS_LOOKUP_GE &&
1815                     keyno > xfs_btree_get_numrecs(block) &&
1816                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1817                         int     i;
1818
1819                         cur->bc_ptrs[0] = keyno;
1820                         error = xfs_btree_increment(cur, 0, &i);
1821                         if (error)
1822                                 goto error0;
1823                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1824                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1825                         *stat = 1;
1826                         return 0;
1827                 }
1828         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1829                 keyno--;
1830         cur->bc_ptrs[0] = keyno;
1831
1832         /* Return if we succeeded or not. */
1833         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1834                 *stat = 0;
1835         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1836                 *stat = 1;
1837         else
1838                 *stat = 0;
1839         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1840         return 0;
1841
1842 error0:
1843         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1844         return error;
1845 }
1846
1847 /*
1848  * Update keys at all levels from here to the root along the cursor's path.
1849  */
1850 STATIC int
1851 xfs_btree_updkey(
1852         struct xfs_btree_cur    *cur,
1853         union xfs_btree_key     *keyp,
1854         int                     level)
1855 {
1856         struct xfs_btree_block  *block;
1857         struct xfs_buf          *bp;
1858         union xfs_btree_key     *kp;
1859         int                     ptr;
1860
1861         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1862         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1863
1864         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1865
1866         /*
1867          * Go up the tree from this level toward the root.
1868          * At each level, update the key value to the value input.
1869          * Stop when we reach a level where the cursor isn't pointing
1870          * at the first entry in the block.
1871          */
1872         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1873 #ifdef DEBUG
1874                 int             error;
1875 #endif
1876                 block = xfs_btree_get_block(cur, level, &bp);
1877 #ifdef DEBUG
1878                 error = xfs_btree_check_block(cur, block, level, bp);
1879                 if (error) {
1880                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1881                         return error;
1882                 }
1883 #endif
1884                 ptr = cur->bc_ptrs[level];
1885                 kp = xfs_btree_key_addr(cur, ptr, block);
1886                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1887                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1888         }
1889
1890         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1891         return 0;
1892 }
1893
1894 /*
1895  * Update the record referred to by cur to the value in the
1896  * given record. This either works (return 0) or gets an
1897  * EFSCORRUPTED error.
1898  */
1899 int
1900 xfs_btree_update(
1901         struct xfs_btree_cur    *cur,
1902         union xfs_btree_rec     *rec)
1903 {
1904         struct xfs_btree_block  *block;
1905         struct xfs_buf          *bp;
1906         int                     error;
1907         int                     ptr;
1908         union xfs_btree_rec     *rp;
1909
1910         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1911         XFS_BTREE_TRACE_ARGR(cur, rec);
1912
1913         /* Pick up the current block. */
1914         block = xfs_btree_get_block(cur, 0, &bp);
1915
1916 #ifdef DEBUG
1917         error = xfs_btree_check_block(cur, block, 0, bp);
1918         if (error)
1919                 goto error0;
1920 #endif
1921         /* Get the address of the rec to be updated. */
1922         ptr = cur->bc_ptrs[0];
1923         rp = xfs_btree_rec_addr(cur, ptr, block);
1924
1925         /* Fill in the new contents and log them. */
1926         xfs_btree_copy_recs(cur, rp, rec, 1);
1927         xfs_btree_log_recs(cur, bp, ptr, ptr);
1928
1929         /*
1930          * If we are tracking the last record in the tree and
1931          * we are at the far right edge of the tree, update it.
1932          */
1933         if (xfs_btree_is_lastrec(cur, block, 0)) {
1934                 cur->bc_ops->update_lastrec(cur, block, rec,
1935                                             ptr, LASTREC_UPDATE);
1936         }
1937
1938         /* Updating first rec in leaf. Pass new key value up to our parent. */
1939         if (ptr == 1) {
1940                 union xfs_btree_key     key;
1941
1942                 cur->bc_ops->init_key_from_rec(&key, rec);
1943                 error = xfs_btree_updkey(cur, &key, 1);
1944                 if (error)
1945                         goto error0;
1946         }
1947
1948         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1949         return 0;
1950
1951 error0:
1952         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1953         return error;
1954 }
1955
1956 /*
1957  * Move 1 record left from cur/level if possible.
1958  * Update cur to reflect the new path.
1959  */
1960 STATIC int                                      /* error */
1961 xfs_btree_lshift(
1962         struct xfs_btree_cur    *cur,
1963         int                     level,
1964         int                     *stat)          /* success/failure */
1965 {
1966         union xfs_btree_key     key;            /* btree key */
1967         struct xfs_buf          *lbp;           /* left buffer pointer */
1968         struct xfs_btree_block  *left;          /* left btree block */
1969         int                     lrecs;          /* left record count */
1970         struct xfs_buf          *rbp;           /* right buffer pointer */
1971         struct xfs_btree_block  *right;         /* right btree block */
1972         int                     rrecs;          /* right record count */
1973         union xfs_btree_ptr     lptr;           /* left btree pointer */
1974         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1975         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1976         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1977         int                     error;          /* error return value */
1978
1979         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1980         XFS_BTREE_TRACE_ARGI(cur, level);
1981
1982         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1983             level == cur->bc_nlevels - 1)
1984                 goto out0;
1985
1986         /* Set up variables for this block as "right". */
1987         right = xfs_btree_get_block(cur, level, &rbp);
1988
1989 #ifdef DEBUG
1990         error = xfs_btree_check_block(cur, right, level, rbp);
1991         if (error)
1992                 goto error0;
1993 #endif
1994
1995         /* If we've got no left sibling then we can't shift an entry left. */
1996         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1997         if (xfs_btree_ptr_is_null(cur, &lptr))
1998                 goto out0;
1999
2000         /*
2001          * If the cursor entry is the one that would be moved, don't
2002          * do it... it's too complicated.
2003          */
2004         if (cur->bc_ptrs[level] <= 1)
2005                 goto out0;
2006
2007         /* Set up the left neighbor as "left". */
2008         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
2009         if (error)
2010                 goto error0;
2011
2012         /* If it's full, it can't take another entry. */
2013         lrecs = xfs_btree_get_numrecs(left);
2014         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2015                 goto out0;
2016
2017         rrecs = xfs_btree_get_numrecs(right);
2018
2019         /*
2020          * We add one entry to the left side and remove one for the right side.
2021          * Account for it here, the changes will be updated on disk and logged
2022          * later.
2023          */
2024         lrecs++;
2025         rrecs--;
2026
2027         XFS_BTREE_STATS_INC(cur, lshift);
2028         XFS_BTREE_STATS_ADD(cur, moves, 1);
2029
2030         /*
2031          * If non-leaf, copy a key and a ptr to the left block.
2032          * Log the changes to the left block.
2033          */
2034         if (level > 0) {
2035                 /* It's a non-leaf.  Move keys and pointers. */
2036                 union xfs_btree_key     *lkp;   /* left btree key */
2037                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2038
2039                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2040                 rkp = xfs_btree_key_addr(cur, 1, right);
2041
2042                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2043                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2044 #ifdef DEBUG
2045                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2046                 if (error)
2047                         goto error0;
2048 #endif
2049                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2050                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2051
2052                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2053                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2054
2055                 ASSERT(cur->bc_ops->keys_inorder(cur,
2056                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2057         } else {
2058                 /* It's a leaf.  Move records.  */
2059                 union xfs_btree_rec     *lrp;   /* left record pointer */
2060
2061                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2062                 rrp = xfs_btree_rec_addr(cur, 1, right);
2063
2064                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2065                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2066
2067                 ASSERT(cur->bc_ops->recs_inorder(cur,
2068                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2069         }
2070
2071         xfs_btree_set_numrecs(left, lrecs);
2072         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2073
2074         xfs_btree_set_numrecs(right, rrecs);
2075         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2076
2077         /*
2078          * Slide the contents of right down one entry.
2079          */
2080         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2081         if (level > 0) {
2082                 /* It's a nonleaf. operate on keys and ptrs */
2083 #ifdef DEBUG
2084                 int                     i;              /* loop index */
2085
2086                 for (i = 0; i < rrecs; i++) {
2087                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2088                         if (error)
2089                                 goto error0;
2090                 }
2091 #endif
2092                 xfs_btree_shift_keys(cur,
2093                                 xfs_btree_key_addr(cur, 2, right),
2094                                 -1, rrecs);
2095                 xfs_btree_shift_ptrs(cur,
2096                                 xfs_btree_ptr_addr(cur, 2, right),
2097                                 -1, rrecs);
2098
2099                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2100                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2101         } else {
2102                 /* It's a leaf. operate on records */
2103                 xfs_btree_shift_recs(cur,
2104                         xfs_btree_rec_addr(cur, 2, right),
2105                         -1, rrecs);
2106                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2107
2108                 /*
2109                  * If it's the first record in the block, we'll need a key
2110                  * structure to pass up to the next level (updkey).
2111                  */
2112                 cur->bc_ops->init_key_from_rec(&key,
2113                         xfs_btree_rec_addr(cur, 1, right));
2114                 rkp = &key;
2115         }
2116
2117         /* Update the parent key values of right. */
2118         error = xfs_btree_updkey(cur, rkp, level + 1);
2119         if (error)
2120                 goto error0;
2121
2122         /* Slide the cursor value left one. */
2123         cur->bc_ptrs[level]--;
2124
2125         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2126         *stat = 1;
2127         return 0;
2128
2129 out0:
2130         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2131         *stat = 0;
2132         return 0;
2133
2134 error0:
2135         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2136         return error;
2137 }
2138
2139 /*
2140  * Move 1 record right from cur/level if possible.
2141  * Update cur to reflect the new path.
2142  */
2143 STATIC int                                      /* error */
2144 xfs_btree_rshift(
2145         struct xfs_btree_cur    *cur,
2146         int                     level,
2147         int                     *stat)          /* success/failure */
2148 {
2149         union xfs_btree_key     key;            /* btree key */
2150         struct xfs_buf          *lbp;           /* left buffer pointer */
2151         struct xfs_btree_block  *left;          /* left btree block */
2152         struct xfs_buf          *rbp;           /* right buffer pointer */
2153         struct xfs_btree_block  *right;         /* right btree block */
2154         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2155         union xfs_btree_ptr     rptr;           /* right block pointer */
2156         union xfs_btree_key     *rkp;           /* right btree key */
2157         int                     rrecs;          /* right record count */
2158         int                     lrecs;          /* left record count */
2159         int                     error;          /* error return value */
2160         int                     i;              /* loop counter */
2161
2162         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2163         XFS_BTREE_TRACE_ARGI(cur, level);
2164
2165         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2166             (level == cur->bc_nlevels - 1))
2167                 goto out0;
2168
2169         /* Set up variables for this block as "left". */
2170         left = xfs_btree_get_block(cur, level, &lbp);
2171
2172 #ifdef DEBUG
2173         error = xfs_btree_check_block(cur, left, level, lbp);
2174         if (error)
2175                 goto error0;
2176 #endif
2177
2178         /* If we've got no right sibling then we can't shift an entry right. */
2179         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2180         if (xfs_btree_ptr_is_null(cur, &rptr))
2181                 goto out0;
2182
2183         /*
2184          * If the cursor entry is the one that would be moved, don't
2185          * do it... it's too complicated.
2186          */
2187         lrecs = xfs_btree_get_numrecs(left);
2188         if (cur->bc_ptrs[level] >= lrecs)
2189                 goto out0;
2190
2191         /* Set up the right neighbor as "right". */
2192         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2193         if (error)
2194                 goto error0;
2195
2196         /* If it's full, it can't take another entry. */
2197         rrecs = xfs_btree_get_numrecs(right);
2198         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2199                 goto out0;
2200
2201         XFS_BTREE_STATS_INC(cur, rshift);
2202         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2203
2204         /*
2205          * Make a hole at the start of the right neighbor block, then
2206          * copy the last left block entry to the hole.
2207          */
2208         if (level > 0) {
2209                 /* It's a nonleaf. make a hole in the keys and ptrs */
2210                 union xfs_btree_key     *lkp;
2211                 union xfs_btree_ptr     *lpp;
2212                 union xfs_btree_ptr     *rpp;
2213
2214                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2215                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2216                 rkp = xfs_btree_key_addr(cur, 1, right);
2217                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2218
2219 #ifdef DEBUG
2220                 for (i = rrecs - 1; i >= 0; i--) {
2221                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2222                         if (error)
2223                                 goto error0;
2224                 }
2225 #endif
2226
2227                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2228                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2229
2230 #ifdef DEBUG
2231                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2232                 if (error)
2233                         goto error0;
2234 #endif
2235
2236                 /* Now put the new data in, and log it. */
2237                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2238                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2239
2240                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2241                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2242
2243                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2244                         xfs_btree_key_addr(cur, 2, right)));
2245         } else {
2246                 /* It's a leaf. make a hole in the records */
2247                 union xfs_btree_rec     *lrp;
2248                 union xfs_btree_rec     *rrp;
2249
2250                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2251                 rrp = xfs_btree_rec_addr(cur, 1, right);
2252
2253                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2254
2255                 /* Now put the new data in, and log it. */
2256                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2257                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2258
2259                 cur->bc_ops->init_key_from_rec(&key, rrp);
2260                 rkp = &key;
2261
2262                 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2263                         xfs_btree_rec_addr(cur, 2, right)));
2264         }
2265
2266         /*
2267          * Decrement and log left's numrecs, bump and log right's numrecs.
2268          */
2269         xfs_btree_set_numrecs(left, --lrecs);
2270         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2271
2272         xfs_btree_set_numrecs(right, ++rrecs);
2273         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2274
2275         /*
2276          * Using a temporary cursor, update the parent key values of the
2277          * block on the right.
2278          */
2279         error = xfs_btree_dup_cursor(cur, &tcur);
2280         if (error)
2281                 goto error0;
2282         i = xfs_btree_lastrec(tcur, level);
2283         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2284
2285         error = xfs_btree_increment(tcur, level, &i);
2286         if (error)
2287                 goto error1;
2288
2289         error = xfs_btree_updkey(tcur, rkp, level + 1);
2290         if (error)
2291                 goto error1;
2292
2293         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2294
2295         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2296         *stat = 1;
2297         return 0;
2298
2299 out0:
2300         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2301         *stat = 0;
2302         return 0;
2303
2304 error0:
2305         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2306         return error;
2307
2308 error1:
2309         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2310         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2311         return error;
2312 }
2313
2314 /*
2315  * Split cur/level block in half.
2316  * Return new block number and the key to its first
2317  * record (to be inserted into parent).
2318  */
2319 STATIC int                                      /* error */
2320 xfs_btree_split(
2321         struct xfs_btree_cur    *cur,
2322         int                     level,
2323         union xfs_btree_ptr     *ptrp,
2324         union xfs_btree_key     *key,
2325         struct xfs_btree_cur    **curp,
2326         int                     *stat)          /* success/failure */
2327 {
2328         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2329         struct xfs_buf          *lbp;           /* left buffer pointer */
2330         struct xfs_btree_block  *left;          /* left btree block */
2331         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2332         struct xfs_buf          *rbp;           /* right buffer pointer */
2333         struct xfs_btree_block  *right;         /* right btree block */
2334         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2335         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2336         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2337         int                     lrecs;
2338         int                     rrecs;
2339         int                     src_index;
2340         int                     error;          /* error return value */
2341 #ifdef DEBUG
2342         int                     i;
2343 #endif
2344
2345         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2346         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2347
2348         XFS_BTREE_STATS_INC(cur, split);
2349
2350         /* Set up left block (current one). */
2351         left = xfs_btree_get_block(cur, level, &lbp);
2352
2353 #ifdef DEBUG
2354         error = xfs_btree_check_block(cur, left, level, lbp);
2355         if (error)
2356                 goto error0;
2357 #endif
2358
2359         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2360
2361         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2362         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2363         if (error)
2364                 goto error0;
2365         if (*stat == 0)
2366                 goto out0;
2367         XFS_BTREE_STATS_INC(cur, alloc);
2368
2369         /* Set up the new block as "right". */
2370         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2371         if (error)
2372                 goto error0;
2373
2374         /* Fill in the btree header for the new right block. */
2375         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2376
2377         /*
2378          * Split the entries between the old and the new block evenly.
2379          * Make sure that if there's an odd number of entries now, that
2380          * each new block will have the same number of entries.
2381          */
2382         lrecs = xfs_btree_get_numrecs(left);
2383         rrecs = lrecs / 2;
2384         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2385                 rrecs++;
2386         src_index = (lrecs - rrecs + 1);
2387
2388         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2389
2390         /*
2391          * Copy btree block entries from the left block over to the
2392          * new block, the right. Update the right block and log the
2393          * changes.
2394          */
2395         if (level > 0) {
2396                 /* It's a non-leaf.  Move keys and pointers. */
2397                 union xfs_btree_key     *lkp;   /* left btree key */
2398                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2399                 union xfs_btree_key     *rkp;   /* right btree key */
2400                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2401
2402                 lkp = xfs_btree_key_addr(cur, src_index, left);
2403                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2404                 rkp = xfs_btree_key_addr(cur, 1, right);
2405                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2406
2407 #ifdef DEBUG
2408                 for (i = src_index; i < rrecs; i++) {
2409                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2410                         if (error)
2411                                 goto error0;
2412                 }
2413 #endif
2414
2415                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2416                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2417
2418                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2419                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2420
2421                 /* Grab the keys to the entries moved to the right block */
2422                 xfs_btree_copy_keys(cur, key, rkp, 1);
2423         } else {
2424                 /* It's a leaf.  Move records.  */
2425                 union xfs_btree_rec     *lrp;   /* left record pointer */
2426                 union xfs_btree_rec     *rrp;   /* right record pointer */
2427
2428                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2429                 rrp = xfs_btree_rec_addr(cur, 1, right);
2430
2431                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2432                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2433
2434                 cur->bc_ops->init_key_from_rec(key,
2435                         xfs_btree_rec_addr(cur, 1, right));
2436         }
2437
2438
2439         /*
2440          * Find the left block number by looking in the buffer.
2441          * Adjust numrecs, sibling pointers.
2442          */
2443         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2444         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2445         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2446         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2447
2448         lrecs -= rrecs;
2449         xfs_btree_set_numrecs(left, lrecs);
2450         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2451
2452         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2453         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2454
2455         /*
2456          * If there's a block to the new block's right, make that block
2457          * point back to right instead of to left.
2458          */
2459         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2460                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2461                                                         0, &rrblock, &rrbp);
2462                 if (error)
2463                         goto error0;
2464                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2465                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2466         }
2467         /*
2468          * If the cursor is really in the right block, move it there.
2469          * If it's just pointing past the last entry in left, then we'll
2470          * insert there, so don't change anything in that case.
2471          */
2472         if (cur->bc_ptrs[level] > lrecs + 1) {
2473                 xfs_btree_setbuf(cur, level, rbp);
2474                 cur->bc_ptrs[level] -= lrecs;
2475         }
2476         /*
2477          * If there are more levels, we'll need another cursor which refers
2478          * the right block, no matter where this cursor was.
2479          */
2480         if (level + 1 < cur->bc_nlevels) {
2481                 error = xfs_btree_dup_cursor(cur, curp);
2482                 if (error)
2483                         goto error0;
2484                 (*curp)->bc_ptrs[level + 1]++;
2485         }
2486         *ptrp = rptr;
2487         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2488         *stat = 1;
2489         return 0;
2490 out0:
2491         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2492         *stat = 0;
2493         return 0;
2494
2495 error0:
2496         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2497         return error;
2498 }
2499
2500 /*
2501  * Copy the old inode root contents into a real block and make the
2502  * broot point to it.
2503  */
2504 int                                             /* error */
2505 xfs_btree_new_iroot(
2506         struct xfs_btree_cur    *cur,           /* btree cursor */
2507         int                     *logflags,      /* logging flags for inode */
2508         int                     *stat)          /* return status - 0 fail */
2509 {
2510         struct xfs_buf          *cbp;           /* buffer for cblock */
2511         struct xfs_btree_block  *block;         /* btree block */
2512         struct xfs_btree_block  *cblock;        /* child btree block */
2513         union xfs_btree_key     *ckp;           /* child key pointer */
2514         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2515         union xfs_btree_key     *kp;            /* pointer to btree key */
2516         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2517         union xfs_btree_ptr     nptr;           /* new block addr */
2518         int                     level;          /* btree level */
2519         int                     error;          /* error return code */
2520 #ifdef DEBUG
2521         int                     i;              /* loop counter */
2522 #endif
2523
2524         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2525         XFS_BTREE_STATS_INC(cur, newroot);
2526
2527         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2528
2529         level = cur->bc_nlevels - 1;
2530
2531         block = xfs_btree_get_iroot(cur);
2532         pp = xfs_btree_ptr_addr(cur, 1, block);
2533
2534         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2535         error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2536         if (error)
2537                 goto error0;
2538         if (*stat == 0) {
2539                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2540                 return 0;
2541         }
2542         XFS_BTREE_STATS_INC(cur, alloc);
2543
2544         /* Copy the root into a real block. */
2545         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2546         if (error)
2547                 goto error0;
2548
2549         /*
2550          * we can't just memcpy() the root in for CRC enabled btree blocks.
2551          * In that case have to also ensure the blkno remains correct
2552          */
2553         memcpy(cblock, block, xfs_btree_block_len(cur));
2554         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2555                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2556                         cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2557                 else
2558                         cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2559         }
2560
2561         be16_add_cpu(&block->bb_level, 1);
2562         xfs_btree_set_numrecs(block, 1);
2563         cur->bc_nlevels++;
2564         cur->bc_ptrs[level + 1] = 1;
2565
2566         kp = xfs_btree_key_addr(cur, 1, block);
2567         ckp = xfs_btree_key_addr(cur, 1, cblock);
2568         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2569
2570         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2571 #ifdef DEBUG
2572         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2573                 error = xfs_btree_check_ptr(cur, pp, i, level);
2574                 if (error)
2575                         goto error0;
2576         }
2577 #endif
2578         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2579
2580 #ifdef DEBUG
2581         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2582         if (error)
2583                 goto error0;
2584 #endif
2585         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2586
2587         xfs_iroot_realloc(cur->bc_private.b.ip,
2588                           1 - xfs_btree_get_numrecs(cblock),
2589                           cur->bc_private.b.whichfork);
2590
2591         xfs_btree_setbuf(cur, level, cbp);
2592
2593         /*
2594          * Do all this logging at the end so that
2595          * the root is at the right level.
2596          */
2597         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2598         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2599         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2600
2601         *logflags |=
2602                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2603         *stat = 1;
2604         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2605         return 0;
2606 error0:
2607         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2608         return error;
2609 }
2610
2611 /*
2612  * Allocate a new root block, fill it in.
2613  */
2614 STATIC int                              /* error */
2615 xfs_btree_new_root(
2616         struct xfs_btree_cur    *cur,   /* btree cursor */
2617         int                     *stat)  /* success/failure */
2618 {
2619         struct xfs_btree_block  *block; /* one half of the old root block */
2620         struct xfs_buf          *bp;    /* buffer containing block */
2621         int                     error;  /* error return value */
2622         struct xfs_buf          *lbp;   /* left buffer pointer */
2623         struct xfs_btree_block  *left;  /* left btree block */
2624         struct xfs_buf          *nbp;   /* new (root) buffer */
2625         struct xfs_btree_block  *new;   /* new (root) btree block */
2626         int                     nptr;   /* new value for key index, 1 or 2 */
2627         struct xfs_buf          *rbp;   /* right buffer pointer */
2628         struct xfs_btree_block  *right; /* right btree block */
2629         union xfs_btree_ptr     rptr;
2630         union xfs_btree_ptr     lptr;
2631
2632         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2633         XFS_BTREE_STATS_INC(cur, newroot);
2634
2635         /* initialise our start point from the cursor */
2636         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2637
2638         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2639         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2640         if (error)
2641                 goto error0;
2642         if (*stat == 0)
2643                 goto out0;
2644         XFS_BTREE_STATS_INC(cur, alloc);
2645
2646         /* Set up the new block. */
2647         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2648         if (error)
2649                 goto error0;
2650
2651         /* Set the root in the holding structure  increasing the level by 1. */
2652         cur->bc_ops->set_root(cur, &lptr, 1);
2653
2654         /*
2655          * At the previous root level there are now two blocks: the old root,
2656          * and the new block generated when it was split.  We don't know which
2657          * one the cursor is pointing at, so we set up variables "left" and
2658          * "right" for each case.
2659          */
2660         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2661
2662 #ifdef DEBUG
2663         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2664         if (error)
2665                 goto error0;
2666 #endif
2667
2668         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2669         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2670                 /* Our block is left, pick up the right block. */
2671                 lbp = bp;
2672                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2673                 left = block;
2674                 error = xfs_btree_read_buf_block(cur, &rptr,
2675                                         cur->bc_nlevels - 1, 0, &right, &rbp);
2676                 if (error)
2677                         goto error0;
2678                 bp = rbp;
2679                 nptr = 1;
2680         } else {
2681                 /* Our block is right, pick up the left block. */
2682                 rbp = bp;
2683                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2684                 right = block;
2685                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2686                 error = xfs_btree_read_buf_block(cur, &lptr,
2687                                         cur->bc_nlevels - 1, 0, &left, &lbp);
2688                 if (error)
2689                         goto error0;
2690                 bp = lbp;
2691                 nptr = 2;
2692         }
2693         /* Fill in the new block's btree header and log it. */
2694         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2695         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2696         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2697                         !xfs_btree_ptr_is_null(cur, &rptr));
2698
2699         /* Fill in the key data in the new root. */
2700         if (xfs_btree_get_level(left) > 0) {
2701                 xfs_btree_copy_keys(cur,
2702                                 xfs_btree_key_addr(cur, 1, new),
2703                                 xfs_btree_key_addr(cur, 1, left), 1);
2704                 xfs_btree_copy_keys(cur,
2705                                 xfs_btree_key_addr(cur, 2, new),
2706                                 xfs_btree_key_addr(cur, 1, right), 1);
2707         } else {
2708                 cur->bc_ops->init_key_from_rec(
2709                                 xfs_btree_key_addr(cur, 1, new),
2710                                 xfs_btree_rec_addr(cur, 1, left));
2711                 cur->bc_ops->init_key_from_rec(
2712                                 xfs_btree_key_addr(cur, 2, new),
2713                                 xfs_btree_rec_addr(cur, 1, right));
2714         }
2715         xfs_btree_log_keys(cur, nbp, 1, 2);
2716
2717         /* Fill in the pointer data in the new root. */
2718         xfs_btree_copy_ptrs(cur,
2719                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2720         xfs_btree_copy_ptrs(cur,
2721                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2722         xfs_btree_log_ptrs(cur, nbp, 1, 2);
2723
2724         /* Fix up the cursor. */
2725         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2726         cur->bc_ptrs[cur->bc_nlevels] = nptr;
2727         cur->bc_nlevels++;
2728         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2729         *stat = 1;
2730         return 0;
2731 error0:
2732         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2733         return error;
2734 out0:
2735         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2736         *stat = 0;
2737         return 0;
2738 }
2739
2740 STATIC int
2741 xfs_btree_make_block_unfull(
2742         struct xfs_btree_cur    *cur,   /* btree cursor */
2743         int                     level,  /* btree level */
2744         int                     numrecs,/* # of recs in block */
2745         int                     *oindex,/* old tree index */
2746         int                     *index, /* new tree index */
2747         union xfs_btree_ptr     *nptr,  /* new btree ptr */
2748         struct xfs_btree_cur    **ncur, /* new btree cursor */
2749         union xfs_btree_rec     *nrec,  /* new record */
2750         int                     *stat)
2751 {
2752         union xfs_btree_key     key;    /* new btree key value */
2753         int                     error = 0;
2754
2755         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2756             level == cur->bc_nlevels - 1) {
2757                 struct xfs_inode *ip = cur->bc_private.b.ip;
2758
2759                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2760                         /* A root block that can be made bigger. */
2761                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2762                 } else {
2763                         /* A root block that needs replacing */
2764                         int     logflags = 0;
2765
2766                         error = xfs_btree_new_iroot(cur, &logflags, stat);
2767                         if (error || *stat == 0)
2768                                 return error;
2769
2770                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2771                 }
2772
2773                 return 0;
2774         }
2775
2776         /* First, try shifting an entry to the right neighbor. */
2777         error = xfs_btree_rshift(cur, level, stat);
2778         if (error || *stat)
2779                 return error;
2780
2781         /* Next, try shifting an entry to the left neighbor. */
2782         error = xfs_btree_lshift(cur, level, stat);
2783         if (error)
2784                 return error;
2785
2786         if (*stat) {
2787                 *oindex = *index = cur->bc_ptrs[level];
2788                 return 0;
2789         }
2790
2791         /*
2792          * Next, try splitting the current block in half.
2793          *
2794          * If this works we have to re-set our variables because we
2795          * could be in a different block now.
2796          */
2797         error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2798         if (error || *stat == 0)
2799                 return error;
2800
2801
2802         *index = cur->bc_ptrs[level];
2803         cur->bc_ops->init_rec_from_key(&key, nrec);
2804         return 0;
2805 }
2806
2807 /*
2808  * Insert one record/level.  Return information to the caller
2809  * allowing the next level up to proceed if necessary.
2810  */
2811 STATIC int
2812 xfs_btree_insrec(
2813         struct xfs_btree_cur    *cur,   /* btree cursor */
2814         int                     level,  /* level to insert record at */
2815         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2816         union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2817         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2818         int                     *stat)  /* success/failure */
2819 {
2820         struct xfs_btree_block  *block; /* btree block */
2821         struct xfs_buf          *bp;    /* buffer for block */
2822         union xfs_btree_key     key;    /* btree key */
2823         union xfs_btree_ptr     nptr;   /* new block ptr */
2824         struct xfs_btree_cur    *ncur;  /* new btree cursor */
2825         union xfs_btree_rec     nrec;   /* new record count */
2826         int                     optr;   /* old key/record index */
2827         int                     ptr;    /* key/record index */
2828         int                     numrecs;/* number of records */
2829         int                     error;  /* error return value */
2830 #ifdef DEBUG
2831         int                     i;
2832 #endif
2833
2834         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2835         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2836
2837         ncur = NULL;
2838
2839         /*
2840          * If we have an external root pointer, and we've made it to the
2841          * root level, allocate a new root block and we're done.
2842          */
2843         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2844             (level >= cur->bc_nlevels)) {
2845                 error = xfs_btree_new_root(cur, stat);
2846                 xfs_btree_set_ptr_null(cur, ptrp);
2847
2848                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2849                 return error;
2850         }
2851
2852         /* If we're off the left edge, return failure. */
2853         ptr = cur->bc_ptrs[level];
2854         if (ptr == 0) {
2855                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2856                 *stat = 0;
2857                 return 0;
2858         }
2859
2860         /* Make a key out of the record data to be inserted, and save it. */
2861         cur->bc_ops->init_key_from_rec(&key, recp);
2862
2863         optr = ptr;
2864
2865         XFS_BTREE_STATS_INC(cur, insrec);
2866
2867         /* Get pointers to the btree buffer and block. */
2868         block = xfs_btree_get_block(cur, level, &bp);
2869         numrecs = xfs_btree_get_numrecs(block);
2870
2871 #ifdef DEBUG
2872         error = xfs_btree_check_block(cur, block, level, bp);
2873         if (error)
2874                 goto error0;
2875
2876         /* Check that the new entry is being inserted in the right place. */
2877         if (ptr <= numrecs) {
2878                 if (level == 0) {
2879                         ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2880                                 xfs_btree_rec_addr(cur, ptr, block)));
2881                 } else {
2882                         ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2883                                 xfs_btree_key_addr(cur, ptr, block)));
2884                 }
2885         }
2886 #endif
2887
2888         /*
2889          * If the block is full, we can't insert the new entry until we
2890          * make the block un-full.
2891          */
2892         xfs_btree_set_ptr_null(cur, &nptr);
2893         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2894                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2895                                         &optr, &ptr, &nptr, &ncur, &nrec, stat);
2896                 if (error || *stat == 0)
2897                         goto error0;
2898         }
2899
2900         /*
2901          * The current block may have changed if the block was
2902          * previously full and we have just made space in it.
2903          */
2904         block = xfs_btree_get_block(cur, level, &bp);
2905         numrecs = xfs_btree_get_numrecs(block);
2906
2907 #ifdef DEBUG
2908         error = xfs_btree_check_block(cur, block, level, bp);
2909         if (error)
2910                 return error;
2911 #endif
2912
2913         /*
2914          * At this point we know there's room for our new entry in the block
2915          * we're pointing at.
2916          */
2917         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2918
2919         if (level > 0) {
2920                 /* It's a nonleaf. make a hole in the keys and ptrs */
2921                 union xfs_btree_key     *kp;
2922                 union xfs_btree_ptr     *pp;
2923
2924                 kp = xfs_btree_key_addr(cur, ptr, block);
2925                 pp = xfs_btree_ptr_addr(cur, ptr, block);
2926
2927 #ifdef DEBUG
2928                 for (i = numrecs - ptr; i >= 0; i--) {
2929                         error = xfs_btree_check_ptr(cur, pp, i, level);
2930                         if (error)
2931                                 return error;
2932                 }
2933 #endif
2934
2935                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2936                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2937
2938 #ifdef DEBUG
2939                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2940                 if (error)
2941                         goto error0;
2942 #endif
2943
2944                 /* Now put the new data in, bump numrecs and log it. */
2945                 xfs_btree_copy_keys(cur, kp, &key, 1);
2946                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2947                 numrecs++;
2948                 xfs_btree_set_numrecs(block, numrecs);
2949                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2950                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2951 #ifdef DEBUG
2952                 if (ptr < numrecs) {
2953                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2954                                 xfs_btree_key_addr(cur, ptr + 1, block)));
2955                 }
2956 #endif
2957         } else {
2958                 /* It's a leaf. make a hole in the records */
2959                 union xfs_btree_rec             *rp;
2960
2961                 rp = xfs_btree_rec_addr(cur, ptr, block);
2962
2963                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2964
2965                 /* Now put the new data in, bump numrecs and log it. */
2966                 xfs_btree_copy_recs(cur, rp, recp, 1);
2967                 xfs_btree_set_numrecs(block, ++numrecs);
2968                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2969 #ifdef DEBUG
2970                 if (ptr < numrecs) {
2971                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2972                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
2973                 }
2974 #endif
2975         }
2976
2977         /* Log the new number of records in the btree header. */
2978         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2979
2980         /* If we inserted at the start of a block, update the parents' keys. */
2981         if (optr == 1) {
2982                 error = xfs_btree_updkey(cur, &key, level + 1);
2983                 if (error)
2984                         goto error0;
2985         }
2986
2987         /*
2988          * If we are tracking the last record in the tree and
2989          * we are at the far right edge of the tree, update it.
2990          */
2991         if (xfs_btree_is_lastrec(cur, block, level)) {
2992                 cur->bc_ops->update_lastrec(cur, block, recp,
2993                                             ptr, LASTREC_INSREC);
2994         }
2995
2996         /*
2997          * Return the new block number, if any.
2998          * If there is one, give back a record value and a cursor too.
2999          */
3000         *ptrp = nptr;
3001         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3002                 *recp = nrec;
3003                 *curp = ncur;
3004         }
3005
3006         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3007         *stat = 1;
3008         return 0;
3009
3010 error0:
3011         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3012         return error;
3013 }
3014
3015 /*
3016  * Insert the record at the point referenced by cur.
3017  *
3018  * A multi-level split of the tree on insert will invalidate the original
3019  * cursor.  All callers of this function should assume that the cursor is
3020  * no longer valid and revalidate it.
3021  */
3022 int
3023 xfs_btree_insert(
3024         struct xfs_btree_cur    *cur,
3025         int                     *stat)
3026 {
3027         int                     error;  /* error return value */
3028         int                     i;      /* result value, 0 for failure */
3029         int                     level;  /* current level number in btree */
3030         union xfs_btree_ptr     nptr;   /* new block number (split result) */
3031         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
3032         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
3033         union xfs_btree_rec     rec;    /* record to insert */
3034
3035         level = 0;
3036         ncur = NULL;
3037         pcur = cur;
3038
3039         xfs_btree_set_ptr_null(cur, &nptr);
3040         cur->bc_ops->init_rec_from_cur(cur, &rec);
3041
3042         /*
3043          * Loop going up the tree, starting at the leaf level.
3044          * Stop when we don't get a split block, that must mean that
3045          * the insert is finished with this level.
3046          */
3047         do {
3048                 /*
3049                  * Insert nrec/nptr into this level of the tree.
3050                  * Note if we fail, nptr will be null.
3051                  */
3052                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
3053                 if (error) {
3054                         if (pcur != cur)
3055                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3056                         goto error0;
3057                 }
3058
3059                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3060                 level++;
3061
3062                 /*
3063                  * See if the cursor we just used is trash.
3064                  * Can't trash the caller's cursor, but otherwise we should
3065                  * if ncur is a new cursor or we're about to be done.
3066                  */
3067                 if (pcur != cur &&
3068                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3069                         /* Save the state from the cursor before we trash it */
3070                         if (cur->bc_ops->update_cursor)
3071                                 cur->bc_ops->update_cursor(pcur, cur);
3072                         cur->bc_nlevels = pcur->bc_nlevels;
3073                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3074                 }
3075                 /* If we got a new cursor, switch to it. */
3076                 if (ncur) {
3077                         pcur = ncur;
3078                         ncur = NULL;
3079                 }
3080         } while (!xfs_btree_ptr_is_null(cur, &nptr));
3081
3082         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3083         *stat = i;
3084         return 0;
3085 error0:
3086         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3087         return error;
3088 }
3089
3090 /*
3091  * Try to merge a non-leaf block back into the inode root.
3092  *
3093  * Note: the killroot names comes from the fact that we're effectively
3094  * killing the old root block.  But because we can't just delete the
3095  * inode we have to copy the single block it was pointing to into the
3096  * inode.
3097  */
3098 STATIC int
3099 xfs_btree_kill_iroot(
3100         struct xfs_btree_cur    *cur)
3101 {
3102         int                     whichfork = cur->bc_private.b.whichfork;
3103         struct xfs_inode        *ip = cur->bc_private.b.ip;
3104         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3105         struct xfs_btree_block  *block;
3106         struct xfs_btree_block  *cblock;
3107         union xfs_btree_key     *kp;
3108         union xfs_btree_key     *ckp;
3109         union xfs_btree_ptr     *pp;
3110         union xfs_btree_ptr     *cpp;
3111         struct xfs_buf          *cbp;
3112         int                     level;
3113         int                     index;
3114         int                     numrecs;
3115 #ifdef DEBUG
3116         union xfs_btree_ptr     ptr;
3117         int                     i;
3118 #endif
3119
3120         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3121
3122         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3123         ASSERT(cur->bc_nlevels > 1);
3124
3125         /*
3126          * Don't deal with the root block needs to be a leaf case.
3127          * We're just going to turn the thing back into extents anyway.
3128          */
3129         level = cur->bc_nlevels - 1;
3130         if (level == 1)
3131                 goto out0;
3132
3133         /*
3134          * Give up if the root has multiple children.
3135          */
3136         block = xfs_btree_get_iroot(cur);
3137         if (xfs_btree_get_numrecs(block) != 1)
3138                 goto out0;
3139
3140         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3141         numrecs = xfs_btree_get_numrecs(cblock);
3142
3143         /*
3144          * Only do this if the next level will fit.
3145          * Then the data must be copied up to the inode,
3146          * instead of freeing the root you free the next level.
3147          */
3148         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3149                 goto out0;
3150
3151         XFS_BTREE_STATS_INC(cur, killroot);
3152
3153 #ifdef DEBUG
3154         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3155         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3156         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3157         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3158 #endif
3159
3160         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3161         if (index) {
3162                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3163                                   cur->bc_private.b.whichfork);
3164                 block = ifp->if_broot;
3165         }
3166
3167         be16_add_cpu(&block->bb_numrecs, index);
3168         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3169
3170         kp = xfs_btree_key_addr(cur, 1, block);
3171         ckp = xfs_btree_key_addr(cur, 1, cblock);
3172         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3173
3174         pp = xfs_btree_ptr_addr(cur, 1, block);
3175         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3176 #ifdef DEBUG
3177         for (i = 0; i < numrecs; i++) {
3178                 int             error;
3179
3180                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3181                 if (error) {
3182                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3183                         return error;
3184                 }
3185         }
3186 #endif
3187         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3188
3189         cur->bc_ops->free_block(cur, cbp);
3190         XFS_BTREE_STATS_INC(cur, free);
3191
3192         cur->bc_bufs[level - 1] = NULL;
3193         be16_add_cpu(&block->bb_level, -1);
3194         xfs_trans_log_inode(cur->bc_tp, ip,
3195                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3196         cur->bc_nlevels--;
3197 out0:
3198         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3199         return 0;
3200 }
3201
3202 /*
3203  * Kill the current root node, and replace it with it's only child node.
3204  */
3205 STATIC int
3206 xfs_btree_kill_root(
3207         struct xfs_btree_cur    *cur,
3208         struct xfs_buf          *bp,
3209         int                     level,
3210         union xfs_btree_ptr     *newroot)
3211 {
3212         int                     error;
3213
3214         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3215         XFS_BTREE_STATS_INC(cur, killroot);
3216
3217         /*
3218          * Update the root pointer, decreasing the level by 1 and then
3219          * free the old root.
3220          */
3221         cur->bc_ops->set_root(cur, newroot, -1);
3222
3223         error = cur->bc_ops->free_block(cur, bp);
3224         if (error) {
3225                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3226                 return error;
3227         }
3228
3229         XFS_BTREE_STATS_INC(cur, free);
3230
3231         cur->bc_bufs[level] = NULL;
3232         cur->bc_ra[level] = 0;
3233         cur->bc_nlevels--;
3234
3235         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3236         return 0;
3237 }
3238
3239 STATIC int
3240 xfs_btree_dec_cursor(
3241         struct xfs_btree_cur    *cur,
3242         int                     level,
3243         int                     *stat)
3244 {
3245         int                     error;
3246         int                     i;
3247
3248         if (level > 0) {
3249                 error = xfs_btree_decrement(cur, level, &i);
3250                 if (error)
3251                         return error;
3252         }
3253
3254         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3255         *stat = 1;
3256         return 0;
3257 }
3258
3259 /*
3260  * Single level of the btree record deletion routine.
3261  * Delete record pointed to by cur/level.
3262  * Remove the record from its block then rebalance the tree.
3263  * Return 0 for error, 1 for done, 2 to go on to the next level.
3264  */
3265 STATIC int                                      /* error */
3266 xfs_btree_delrec(
3267         struct xfs_btree_cur    *cur,           /* btree cursor */
3268         int                     level,          /* level removing record from */
3269         int                     *stat)          /* fail/done/go-on */
3270 {
3271         struct xfs_btree_block  *block;         /* btree block */
3272         union xfs_btree_ptr     cptr;           /* current block ptr */
3273         struct xfs_buf          *bp;            /* buffer for block */
3274         int                     error;          /* error return value */
3275         int                     i;              /* loop counter */
3276         union xfs_btree_key     key;            /* storage for keyp */
3277         union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3278         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3279         struct xfs_buf          *lbp;           /* left buffer pointer */
3280         struct xfs_btree_block  *left;          /* left btree block */
3281         int                     lrecs = 0;      /* left record count */
3282         int                     ptr;            /* key/record index */
3283         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3284         struct xfs_buf          *rbp;           /* right buffer pointer */
3285         struct xfs_btree_block  *right;         /* right btree block */
3286         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3287         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3288         int                     rrecs = 0;      /* right record count */
3289         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3290         int                     numrecs;        /* temporary numrec count */
3291
3292         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3293         XFS_BTREE_TRACE_ARGI(cur, level);
3294
3295         tcur = NULL;
3296
3297         /* Get the index of the entry being deleted, check for nothing there. */
3298         ptr = cur->bc_ptrs[level];
3299         if (ptr == 0) {
3300                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3301                 *stat = 0;
3302                 return 0;
3303         }
3304
3305         /* Get the buffer & block containing the record or key/ptr. */
3306         block = xfs_btree_get_block(cur, level, &bp);
3307         numrecs = xfs_btree_get_numrecs(block);
3308
3309 #ifdef DEBUG
3310         error = xfs_btree_check_block(cur, block, level, bp);
3311         if (error)
3312                 goto error0;
3313 #endif
3314
3315         /* Fail if we're off the end of the block. */
3316         if (ptr > numrecs) {
3317                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3318                 *stat = 0;
3319                 return 0;
3320         }
3321
3322         XFS_BTREE_STATS_INC(cur, delrec);
3323         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3324
3325         /* Excise the entries being deleted. */
3326         if (level > 0) {
3327                 /* It's a nonleaf. operate on keys and ptrs */
3328                 union xfs_btree_key     *lkp;
3329                 union xfs_btree_ptr     *lpp;
3330
3331                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3332                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3333
3334 #ifdef DEBUG
3335                 for (i = 0; i < numrecs - ptr; i++) {
3336                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3337                         if (error)
3338                                 goto error0;
3339                 }
3340 #endif
3341
3342                 if (ptr < numrecs) {
3343                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3344                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3345                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3346                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3347                 }
3348
3349                 /*
3350                  * If it's the first record in the block, we'll need to pass a
3351                  * key up to the next level (updkey).
3352                  */
3353                 if (ptr == 1)
3354                         keyp = xfs_btree_key_addr(cur, 1, block);
3355         } else {
3356                 /* It's a leaf. operate on records */
3357                 if (ptr < numrecs) {
3358                         xfs_btree_shift_recs(cur,
3359                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3360                                 -1, numrecs - ptr);
3361                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3362                 }
3363
3364                 /*
3365                  * If it's the first record in the block, we'll need a key
3366                  * structure to pass up to the next level (updkey).
3367                  */
3368                 if (ptr == 1) {
3369                         cur->bc_ops->init_key_from_rec(&key,
3370                                         xfs_btree_rec_addr(cur, 1, block));
3371                         keyp = &key;
3372                 }
3373         }
3374
3375         /*
3376          * Decrement and log the number of entries in the block.
3377          */
3378         xfs_btree_set_numrecs(block, --numrecs);
3379         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3380
3381         /*
3382          * If we are tracking the last record in the tree and
3383          * we are at the far right edge of the tree, update it.
3384          */
3385         if (xfs_btree_is_lastrec(cur, block, level)) {
3386                 cur->bc_ops->update_lastrec(cur, block, NULL,
3387                                             ptr, LASTREC_DELREC);
3388         }
3389
3390         /*
3391          * We're at the root level.  First, shrink the root block in-memory.
3392          * Try to get rid of the next level down.  If we can't then there's
3393          * nothing left to do.
3394          */
3395         if (level == cur->bc_nlevels - 1) {
3396                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3397                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3398                                           cur->bc_private.b.whichfork);
3399
3400                         error = xfs_btree_kill_iroot(cur);
3401                         if (error)
3402                                 goto error0;
3403
3404                         error = xfs_btree_dec_cursor(cur, level, stat);
3405                         if (error)
3406                                 goto error0;
3407                         *stat = 1;
3408                         return 0;
3409                 }
3410
3411                 /*
3412                  * If this is the root level, and there's only one entry left,
3413                  * and it's NOT the leaf level, then we can get rid of this
3414                  * level.
3415                  */
3416                 if (numrecs == 1 && level > 0) {
3417                         union xfs_btree_ptr     *pp;
3418                         /*
3419                          * pp is still set to the first pointer in the block.
3420                          * Make it the new root of the btree.
3421                          */
3422                         pp = xfs_btree_ptr_addr(cur, 1, block);
3423                         error = xfs_btree_kill_root(cur, bp, level, pp);
3424                         if (error)
3425                                 goto error0;
3426                 } else if (level > 0) {
3427                         error = xfs_btree_dec_cursor(cur, level, stat);
3428                         if (error)
3429                                 goto error0;
3430                 }
3431                 *stat = 1;
3432                 return 0;
3433         }
3434
3435         /*
3436          * If we deleted the leftmost entry in the block, update the
3437          * key values above us in the tree.
3438          */
3439         if (ptr == 1) {
3440                 error = xfs_btree_updkey(cur, keyp, level + 1);
3441                 if (error)
3442                         goto error0;
3443         }
3444
3445         /*
3446          * If the number of records remaining in the block is at least
3447          * the minimum, we're done.
3448          */
3449         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3450                 error = xfs_btree_dec_cursor(cur, level, stat);
3451                 if (error)
3452                         goto error0;
3453                 return 0;
3454         }
3455
3456         /*
3457          * Otherwise, we have to move some records around to keep the
3458          * tree balanced.  Look at the left and right sibling blocks to
3459          * see if we can re-balance by moving only one record.
3460          */
3461         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3462         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3463
3464         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3465                 /*
3466                  * One child of root, need to get a chance to copy its contents
3467                  * into the root and delete it. Can't go up to next level,
3468                  * there's nothing to delete there.
3469                  */
3470                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3471                     xfs_btree_ptr_is_null(cur, &lptr) &&
3472                     level == cur->bc_nlevels - 2) {
3473                         error = xfs_btree_kill_iroot(cur);
3474                         if (!error)
3475                                 error = xfs_btree_dec_cursor(cur, level, stat);
3476                         if (error)
3477                                 goto error0;
3478                         return 0;
3479                 }
3480         }
3481
3482         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3483                !xfs_btree_ptr_is_null(cur, &lptr));
3484
3485         /*
3486          * Duplicate the cursor so our btree manipulations here won't
3487          * disrupt the next level up.
3488          */
3489         error = xfs_btree_dup_cursor(cur, &tcur);
3490         if (error)
3491                 goto error0;
3492
3493         /*
3494          * If there's a right sibling, see if it's ok to shift an entry
3495          * out of it.
3496          */
3497         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3498                 /*
3499                  * Move the temp cursor to the last entry in the next block.
3500                  * Actually any entry but the first would suffice.
3501                  */
3502                 i = xfs_btree_lastrec(tcur, level);
3503                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3504
3505                 error = xfs_btree_increment(tcur, level, &i);
3506                 if (error)
3507                         goto error0;
3508                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3509
3510                 i = xfs_btree_lastrec(tcur, level);
3511                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3512
3513                 /* Grab a pointer to the block. */
3514                 right = xfs_btree_get_block(tcur, level, &rbp);
3515 #ifdef DEBUG
3516                 error = xfs_btree_check_block(tcur, right, level, rbp);
3517                 if (error)
3518                         goto error0;
3519 #endif
3520                 /* Grab the current block number, for future use. */
3521                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3522
3523                 /*
3524                  * If right block is full enough so that removing one entry
3525                  * won't make it too empty, and left-shifting an entry out
3526                  * of right to us works, we're done.
3527                  */
3528                 if (xfs_btree_get_numrecs(right) - 1 >=
3529                     cur->bc_ops->get_minrecs(tcur, level)) {
3530                         error = xfs_btree_lshift(tcur, level, &i);
3531                         if (error)
3532                                 goto error0;
3533                         if (i) {
3534                                 ASSERT(xfs_btree_get_numrecs(block) >=
3535                                        cur->bc_ops->get_minrecs(tcur, level));
3536
3537                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3538                                 tcur = NULL;
3539
3540                                 error = xfs_btree_dec_cursor(cur, level, stat);
3541                                 if (error)
3542                                         goto error0;
3543                                 return 0;
3544                         }
3545                 }
3546
3547                 /*
3548                  * Otherwise, grab the number of records in right for
3549                  * future reference, and fix up the temp cursor to point
3550                  * to our block again (last record).
3551                  */
3552                 rrecs = xfs_btree_get_numrecs(right);
3553                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3554                         i = xfs_btree_firstrec(tcur, level);
3555                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3556
3557                         error = xfs_btree_decrement(tcur, level, &i);
3558                         if (error)
3559                                 goto error0;
3560                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3561                 }
3562         }
3563
3564         /*
3565          * If there's a left sibling, see if it's ok to shift an entry
3566          * out of it.
3567          */
3568         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3569                 /*
3570                  * Move the temp cursor to the first entry in the
3571                  * previous block.
3572                  */
3573                 i = xfs_btree_firstrec(tcur, level);
3574                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3575
3576                 error = xfs_btree_decrement(tcur, level, &i);
3577                 if (error)
3578                         goto error0;
3579                 i = xfs_btree_firstrec(tcur, level);
3580                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3581
3582                 /* Grab a pointer to the block. */
3583                 left = xfs_btree_get_block(tcur, level, &lbp);
3584 #ifdef DEBUG
3585                 error = xfs_btree_check_block(cur, left, level, lbp);
3586                 if (error)
3587                         goto error0;
3588 #endif
3589                 /* Grab the current block number, for future use. */
3590                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3591
3592                 /*
3593                  * If left block is full enough so that removing one entry
3594                  * won't make it too empty, and right-shifting an entry out
3595                  * of left to us works, we're done.
3596                  */
3597                 if (xfs_btree_get_numrecs(left) - 1 >=
3598                     cur->bc_ops->get_minrecs(tcur, level)) {
3599                         error = xfs_btree_rshift(tcur, level, &i);
3600                         if (error)
3601                                 goto error0;
3602                         if (i) {
3603                                 ASSERT(xfs_btree_get_numrecs(block) >=
3604                                        cur->bc_ops->get_minrecs(tcur, level));
3605                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3606                                 tcur = NULL;
3607                                 if (level == 0)
3608                                         cur->bc_ptrs[0]++;
3609                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3610                                 *stat = 1;
3611                                 return 0;
3612                         }
3613                 }
3614
3615                 /*
3616                  * Otherwise, grab the number of records in right for
3617                  * future reference.
3618                  */
3619                 lrecs = xfs_btree_get_numrecs(left);
3620         }
3621
3622         /* Delete the temp cursor, we're done with it. */
3623         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3624         tcur = NULL;
3625
3626         /* If here, we need to do a join to keep the tree balanced. */
3627         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3628
3629         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3630             lrecs + xfs_btree_get_numrecs(block) <=
3631                         cur->bc_ops->get_maxrecs(cur, level)) {
3632                 /*
3633                  * Set "right" to be the starting block,
3634                  * "left" to be the left neighbor.
3635                  */
3636                 rptr = cptr;
3637                 right = block;
3638                 rbp = bp;
3639                 error = xfs_btree_read_buf_block(cur, &lptr, level,
3640                                                         0, &left, &lbp);
3641                 if (error)
3642                         goto error0;
3643
3644         /*
3645          * If that won't work, see if we can join with the right neighbor block.
3646          */
3647         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3648                    rrecs + xfs_btree_get_numrecs(block) <=
3649                         cur->bc_ops->get_maxrecs(cur, level)) {
3650                 /*
3651                  * Set "left" to be the starting block,
3652                  * "right" to be the right neighbor.
3653                  */
3654                 lptr = cptr;
3655                 left = block;
3656                 lbp = bp;
3657                 error = xfs_btree_read_buf_block(cur, &rptr, level,
3658                                                         0, &right, &rbp);
3659                 if (error)
3660                         goto error0;
3661
3662         /*
3663          * Otherwise, we can't fix the imbalance.
3664          * Just return.  This is probably a logic error, but it's not fatal.
3665          */
3666         } else {
3667                 error = xfs_btree_dec_cursor(cur, level, stat);
3668                 if (error)
3669                         goto error0;
3670                 return 0;
3671         }
3672
3673         rrecs = xfs_btree_get_numrecs(right);
3674         lrecs = xfs_btree_get_numrecs(left);
3675
3676         /*
3677          * We're now going to join "left" and "right" by moving all the stuff
3678          * in "right" to "left" and deleting "right".
3679          */
3680         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3681         if (level > 0) {
3682                 /* It's a non-leaf.  Move keys and pointers. */
3683                 union xfs_btree_key     *lkp;   /* left btree key */
3684                 union xfs_btree_ptr     *lpp;   /* left address pointer */
3685                 union xfs_btree_key     *rkp;   /* right btree key */
3686                 union xfs_btree_ptr     *rpp;   /* right address pointer */
3687
3688                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3689                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3690                 rkp = xfs_btree_key_addr(cur, 1, right);
3691                 rpp = xfs_btree_ptr_addr(cur, 1, right);
3692 #ifdef DEBUG
3693                 for (i = 1; i < rrecs; i++) {
3694                         error = xfs_btree_check_ptr(cur, rpp, i, level);
3695                         if (error)
3696                                 goto error0;
3697                 }
3698 #endif
3699                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3700                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3701
3702                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3703                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3704         } else {
3705                 /* It's a leaf.  Move records.  */
3706                 union xfs_btree_rec     *lrp;   /* left record pointer */
3707                 union xfs_btree_rec     *rrp;   /* right record pointer */
3708
3709                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3710                 rrp = xfs_btree_rec_addr(cur, 1, right);
3711
3712                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3713                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3714         }
3715
3716         XFS_BTREE_STATS_INC(cur, join);
3717
3718         /*
3719          * Fix up the number of records and right block pointer in the
3720          * surviving block, and log it.
3721          */
3722         xfs_btree_set_numrecs(left, lrecs + rrecs);
3723         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3724         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3725         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3726
3727         /* If there is a right sibling, point it to the remaining block. */
3728         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3729         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3730                 error = xfs_btree_read_buf_block(cur, &cptr, level,
3731                                                         0, &rrblock, &rrbp);
3732                 if (error)
3733                         goto error0;
3734                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3735                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3736         }
3737
3738         /* Free the deleted block. */
3739         error = cur->bc_ops->free_block(cur, rbp);
3740         if (error)
3741                 goto error0;
3742         XFS_BTREE_STATS_INC(cur, free);
3743
3744         /*
3745          * If we joined with the left neighbor, set the buffer in the
3746          * cursor to the left block, and fix up the index.
3747          */
3748         if (bp != lbp) {
3749                 cur->bc_bufs[level] = lbp;
3750                 cur->bc_ptrs[level] += lrecs;
3751                 cur->bc_ra[level] = 0;
3752         }
3753         /*
3754          * If we joined with the right neighbor and there's a level above
3755          * us, increment the cursor at that level.
3756          */
3757         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3758                    (level + 1 < cur->bc_nlevels)) {
3759                 error = xfs_btree_increment(cur, level + 1, &i);
3760                 if (error)
3761                         goto error0;
3762         }
3763
3764         /*
3765          * Readjust the ptr at this level if it's not a leaf, since it's
3766          * still pointing at the deletion point, which makes the cursor
3767          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3768          * We can't use decrement because it would change the next level up.
3769          */
3770         if (level > 0)
3771                 cur->bc_ptrs[level]--;
3772
3773         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3774         /* Return value means the next level up has something to do. */
3775         *stat = 2;
3776         return 0;
3777
3778 error0:
3779         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3780         if (tcur)
3781                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3782         return error;
3783 }
3784
3785 /*
3786  * Delete the record pointed to by cur.
3787  * The cursor refers to the place where the record was (could be inserted)
3788  * when the operation returns.
3789  */
3790 int                                     /* error */
3791 xfs_btree_delete(
3792         struct xfs_btree_cur    *cur,
3793         int                     *stat)  /* success/failure */
3794 {
3795         int                     error;  /* error return value */
3796         int                     level;
3797         int                     i;
3798
3799         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3800
3801         /*
3802          * Go up the tree, starting at leaf level.
3803          *
3804          * If 2 is returned then a join was done; go to the next level.
3805          * Otherwise we are done.
3806          */
3807         for (level = 0, i = 2; i == 2; level++) {
3808                 error = xfs_btree_delrec(cur, level, &i);
3809                 if (error)
3810                         goto error0;
3811         }
3812
3813         if (i == 0) {
3814                 for (level = 1; level < cur->bc_nlevels; level++) {
3815                         if (cur->bc_ptrs[level] == 0) {
3816                                 error = xfs_btree_decrement(cur, level, &i);
3817                                 if (error)
3818                                         goto error0;
3819                                 break;
3820                         }
3821                 }
3822         }
3823
3824         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3825         *stat = i;
3826         return 0;
3827 error0:
3828         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3829         return error;
3830 }
3831
3832 /*
3833  * Get the data from the pointed-to record.
3834  */
3835 int                                     /* error */
3836 xfs_btree_get_rec(
3837         struct xfs_btree_cur    *cur,   /* btree cursor */
3838         union xfs_btree_rec     **recp, /* output: btree record */
3839         int                     *stat)  /* output: success/failure */
3840 {
3841         struct xfs_btree_block  *block; /* btree block */
3842         struct xfs_buf          *bp;    /* buffer pointer */
3843         int                     ptr;    /* record number */
3844 #ifdef DEBUG
3845         int                     error;  /* error return value */
3846 #endif
3847
3848         ptr = cur->bc_ptrs[0];
3849         block = xfs_btree_get_block(cur, 0, &bp);
3850
3851 #ifdef DEBUG
3852         error = xfs_btree_check_block(cur, block, 0, bp);
3853         if (error)
3854                 return error;
3855 #endif
3856
3857         /*
3858          * Off the right end or left end, return failure.
3859          */
3860         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3861                 *stat = 0;
3862                 return 0;
3863         }
3864
3865         /*
3866          * Point to the record and extract its data.
3867          */
3868         *recp = xfs_btree_rec_addr(cur, ptr, block);
3869         *stat = 1;
3870         return 0;
3871 }