]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/xfs/xfs_da_btree.c
[XFS] Improve buffered read throughput by removing unnecessary timer calls
[karo-tx-linux.git] / fs / xfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34
35 #include "xfs_macros.h"
36 #include "xfs_types.h"
37 #include "xfs_inum.h"
38 #include "xfs_log.h"
39 #include "xfs_trans.h"
40 #include "xfs_sb.h"
41 #include "xfs_ag.h"
42 #include "xfs_dir.h"
43 #include "xfs_dir2.h"
44 #include "xfs_dmapi.h"
45 #include "xfs_mount.h"
46 #include "xfs_alloc_btree.h"
47 #include "xfs_bmap_btree.h"
48 #include "xfs_ialloc_btree.h"
49 #include "xfs_alloc.h"
50 #include "xfs_btree.h"
51 #include "xfs_attr_sf.h"
52 #include "xfs_dir_sf.h"
53 #include "xfs_dir2_sf.h"
54 #include "xfs_dinode.h"
55 #include "xfs_inode_item.h"
56 #include "xfs_inode.h"
57 #include "xfs_bmap.h"
58 #include "xfs_da_btree.h"
59 #include "xfs_attr.h"
60 #include "xfs_attr_leaf.h"
61 #include "xfs_dir_leaf.h"
62 #include "xfs_dir2_data.h"
63 #include "xfs_dir2_leaf.h"
64 #include "xfs_dir2_block.h"
65 #include "xfs_dir2_node.h"
66 #include "xfs_error.h"
67 #include "xfs_bit.h"
68
69 /*
70  * xfs_da_btree.c
71  *
72  * Routines to implement directories as Btrees of hashed names.
73  */
74
75 /*========================================================================
76  * Function prototypes for the kernel.
77  *========================================================================*/
78
79 /*
80  * Routines used for growing the Btree.
81  */
82 STATIC int xfs_da_root_split(xfs_da_state_t *state,
83                                             xfs_da_state_blk_t *existing_root,
84                                             xfs_da_state_blk_t *new_child);
85 STATIC int xfs_da_node_split(xfs_da_state_t *state,
86                                             xfs_da_state_blk_t *existing_blk,
87                                             xfs_da_state_blk_t *split_blk,
88                                             xfs_da_state_blk_t *blk_to_add,
89                                             int treelevel,
90                                             int *result);
91 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
92                                          xfs_da_state_blk_t *node_blk_1,
93                                          xfs_da_state_blk_t *node_blk_2);
94 STATIC void xfs_da_node_add(xfs_da_state_t *state,
95                                    xfs_da_state_blk_t *old_node_blk,
96                                    xfs_da_state_blk_t *new_node_blk);
97
98 /*
99  * Routines used for shrinking the Btree.
100  */
101 STATIC int xfs_da_root_join(xfs_da_state_t *state,
102                                            xfs_da_state_blk_t *root_blk);
103 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
104 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
105                                               xfs_da_state_blk_t *drop_blk);
106 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
107                                          xfs_da_state_blk_t *src_node_blk,
108                                          xfs_da_state_blk_t *dst_node_blk);
109
110 /*
111  * Utility routines.
112  */
113 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
114 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
115 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
116 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
117                                   xfs_da_state_blk_t *drop_blk,
118                                   xfs_da_state_blk_t *save_blk);
119 STATIC void     xfs_da_state_kill_altpath(xfs_da_state_t *state);
120
121 /*========================================================================
122  * Routines used for growing the Btree.
123  *========================================================================*/
124
125 /*
126  * Create the initial contents of an intermediate node.
127  */
128 int
129 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
130                                  xfs_dabuf_t **bpp, int whichfork)
131 {
132         xfs_da_intnode_t *node;
133         xfs_dabuf_t *bp;
134         int error;
135         xfs_trans_t *tp;
136
137         tp = args->trans;
138         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
139         if (error)
140                 return(error);
141         ASSERT(bp != NULL);
142         node = bp->data;
143         node->hdr.info.forw = 0;
144         node->hdr.info.back = 0;
145         INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
146         node->hdr.info.pad = 0;
147         node->hdr.count = 0;
148         INT_SET(node->hdr.level, ARCH_CONVERT, level);
149
150         xfs_da_log_buf(tp, bp,
151                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
152
153         *bpp = bp;
154         return(0);
155 }
156
157 /*
158  * Split a leaf node, rebalance, then possibly split
159  * intermediate nodes, rebalance, etc.
160  */
161 int                                                     /* error */
162 xfs_da_split(xfs_da_state_t *state)
163 {
164         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
165         xfs_da_intnode_t *node;
166         xfs_dabuf_t *bp;
167         int max, action, error, i;
168
169         /*
170          * Walk back up the tree splitting/inserting/adjusting as necessary.
171          * If we need to insert and there isn't room, split the node, then
172          * decide which fragment to insert the new block from below into.
173          * Note that we may split the root this way, but we need more fixup.
174          */
175         max = state->path.active - 1;
176         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
177         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
178                state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
179
180         addblk = &state->path.blk[max];         /* initial dummy value */
181         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
182                 oldblk = &state->path.blk[i];
183                 newblk = &state->altpath.blk[i];
184
185                 /*
186                  * If a leaf node then
187                  *     Allocate a new leaf node, then rebalance across them.
188                  * else if an intermediate node then
189                  *     We split on the last layer, must we split the node?
190                  */
191                 switch (oldblk->magic) {
192                 case XFS_ATTR_LEAF_MAGIC:
193                         error = xfs_attr_leaf_split(state, oldblk, newblk);
194                         if ((error != 0) && (error != ENOSPC)) {
195                                 return(error);  /* GROT: attr is inconsistent */
196                         }
197                         if (!error) {
198                                 addblk = newblk;
199                                 break;
200                         }
201                         /*
202                          * Entry wouldn't fit, split the leaf again.
203                          */
204                         state->extravalid = 1;
205                         if (state->inleaf) {
206                                 state->extraafter = 0;  /* before newblk */
207                                 error = xfs_attr_leaf_split(state, oldblk,
208                                                             &state->extrablk);
209                         } else {
210                                 state->extraafter = 1;  /* after newblk */
211                                 error = xfs_attr_leaf_split(state, newblk,
212                                                             &state->extrablk);
213                         }
214                         if (error)
215                                 return(error);  /* GROT: attr inconsistent */
216                         addblk = newblk;
217                         break;
218                 case XFS_DIR_LEAF_MAGIC:
219                         ASSERT(XFS_DIR_IS_V1(state->mp));
220                         error = xfs_dir_leaf_split(state, oldblk, newblk);
221                         if ((error != 0) && (error != ENOSPC)) {
222                                 return(error);  /* GROT: dir is inconsistent */
223                         }
224                         if (!error) {
225                                 addblk = newblk;
226                                 break;
227                         }
228                         /*
229                          * Entry wouldn't fit, split the leaf again.
230                          */
231                         state->extravalid = 1;
232                         if (state->inleaf) {
233                                 state->extraafter = 0;  /* before newblk */
234                                 error = xfs_dir_leaf_split(state, oldblk,
235                                                            &state->extrablk);
236                                 if (error)
237                                         return(error);  /* GROT: dir incon. */
238                                 addblk = newblk;
239                         } else {
240                                 state->extraafter = 1;  /* after newblk */
241                                 error = xfs_dir_leaf_split(state, newblk,
242                                                            &state->extrablk);
243                                 if (error)
244                                         return(error);  /* GROT: dir incon. */
245                                 addblk = newblk;
246                         }
247                         break;
248                 case XFS_DIR2_LEAFN_MAGIC:
249                         ASSERT(XFS_DIR_IS_V2(state->mp));
250                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
251                         if (error)
252                                 return error;
253                         addblk = newblk;
254                         break;
255                 case XFS_DA_NODE_MAGIC:
256                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
257                                                          max - i, &action);
258                         xfs_da_buf_done(addblk->bp);
259                         addblk->bp = NULL;
260                         if (error)
261                                 return(error);  /* GROT: dir is inconsistent */
262                         /*
263                          * Record the newly split block for the next time thru?
264                          */
265                         if (action)
266                                 addblk = newblk;
267                         else
268                                 addblk = NULL;
269                         break;
270                 }
271
272                 /*
273                  * Update the btree to show the new hashval for this child.
274                  */
275                 xfs_da_fixhashpath(state, &state->path);
276                 /*
277                  * If we won't need this block again, it's getting dropped
278                  * from the active path by the loop control, so we need
279                  * to mark it done now.
280                  */
281                 if (i > 0 || !addblk)
282                         xfs_da_buf_done(oldblk->bp);
283         }
284         if (!addblk)
285                 return(0);
286
287         /*
288          * Split the root node.
289          */
290         ASSERT(state->path.active == 0);
291         oldblk = &state->path.blk[0];
292         error = xfs_da_root_split(state, oldblk, addblk);
293         if (error) {
294                 xfs_da_buf_done(oldblk->bp);
295                 xfs_da_buf_done(addblk->bp);
296                 addblk->bp = NULL;
297                 return(error);  /* GROT: dir is inconsistent */
298         }
299
300         /*
301          * Update pointers to the node which used to be block 0 and
302          * just got bumped because of the addition of a new root node.
303          * There might be three blocks involved if a double split occurred,
304          * and the original block 0 could be at any position in the list.
305          */
306
307         node = oldblk->bp->data;
308         if (node->hdr.info.forw) {
309                 if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
310                         bp = addblk->bp;
311                 } else {
312                         ASSERT(state->extravalid);
313                         bp = state->extrablk.bp;
314                 }
315                 node = bp->data;
316                 INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
317                 xfs_da_log_buf(state->args->trans, bp,
318                     XFS_DA_LOGRANGE(node, &node->hdr.info,
319                     sizeof(node->hdr.info)));
320         }
321         node = oldblk->bp->data;
322         if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
323                 if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
324                         bp = addblk->bp;
325                 } else {
326                         ASSERT(state->extravalid);
327                         bp = state->extrablk.bp;
328                 }
329                 node = bp->data;
330                 INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
331                 xfs_da_log_buf(state->args->trans, bp,
332                     XFS_DA_LOGRANGE(node, &node->hdr.info,
333                     sizeof(node->hdr.info)));
334         }
335         xfs_da_buf_done(oldblk->bp);
336         xfs_da_buf_done(addblk->bp);
337         addblk->bp = NULL;
338         return(0);
339 }
340
341 /*
342  * Split the root.  We have to create a new root and point to the two
343  * parts (the split old root) that we just created.  Copy block zero to
344  * the EOF, extending the inode in process.
345  */
346 STATIC int                                              /* error */
347 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
348                                  xfs_da_state_blk_t *blk2)
349 {
350         xfs_da_intnode_t *node, *oldroot;
351         xfs_da_args_t *args;
352         xfs_dablk_t blkno;
353         xfs_dabuf_t *bp;
354         int error, size;
355         xfs_inode_t *dp;
356         xfs_trans_t *tp;
357         xfs_mount_t *mp;
358         xfs_dir2_leaf_t *leaf;
359
360         /*
361          * Copy the existing (incorrect) block from the root node position
362          * to a free space somewhere.
363          */
364         args = state->args;
365         ASSERT(args != NULL);
366         error = xfs_da_grow_inode(args, &blkno);
367         if (error)
368                 return(error);
369         dp = args->dp;
370         tp = args->trans;
371         mp = state->mp;
372         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
373         if (error)
374                 return(error);
375         ASSERT(bp != NULL);
376         node = bp->data;
377         oldroot = blk1->bp->data;
378         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
379                 size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
380                              (char *)oldroot);
381         } else {
382                 ASSERT(XFS_DIR_IS_V2(mp));
383                 ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
384                 leaf = (xfs_dir2_leaf_t *)oldroot;
385                 size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
386                              (char *)leaf);
387         }
388         memcpy(node, oldroot, size);
389         xfs_da_log_buf(tp, bp, 0, size - 1);
390         xfs_da_buf_done(blk1->bp);
391         blk1->bp = bp;
392         blk1->blkno = blkno;
393
394         /*
395          * Set up the new root node.
396          */
397         error = xfs_da_node_create(args,
398                 args->whichfork == XFS_DATA_FORK &&
399                 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
400                 INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
401         if (error)
402                 return(error);
403         node = bp->data;
404         INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
405         INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
406         INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
407         INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
408         INT_SET(node->hdr.count, ARCH_CONVERT, 2);
409
410 #ifdef DEBUG
411         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
412                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
413                        blk1->blkno < mp->m_dirfreeblk);
414                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
415                        blk2->blkno < mp->m_dirfreeblk);
416         }
417 #endif
418
419         /* Header is already logged by xfs_da_node_create */
420         xfs_da_log_buf(tp, bp,
421                 XFS_DA_LOGRANGE(node, node->btree,
422                         sizeof(xfs_da_node_entry_t) * 2));
423         xfs_da_buf_done(bp);
424
425         return(0);
426 }
427
428 /*
429  * Split the node, rebalance, then add the new entry.
430  */
431 STATIC int                                              /* error */
432 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
433                                  xfs_da_state_blk_t *newblk,
434                                  xfs_da_state_blk_t *addblk,
435                                  int treelevel, int *result)
436 {
437         xfs_da_intnode_t *node;
438         xfs_dablk_t blkno;
439         int newcount, error;
440         int useextra;
441
442         node = oldblk->bp->data;
443         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
444
445         /*
446          * With V2 the extra block is data or freespace.
447          */
448         useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
449         newcount = 1 + useextra;
450         /*
451          * Do we have to split the node?
452          */
453         if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
454                 /*
455                  * Allocate a new node, add to the doubly linked chain of
456                  * nodes, then move some of our excess entries into it.
457                  */
458                 error = xfs_da_grow_inode(state->args, &blkno);
459                 if (error)
460                         return(error);  /* GROT: dir is inconsistent */
461
462                 error = xfs_da_node_create(state->args, blkno, treelevel,
463                                            &newblk->bp, state->args->whichfork);
464                 if (error)
465                         return(error);  /* GROT: dir is inconsistent */
466                 newblk->blkno = blkno;
467                 newblk->magic = XFS_DA_NODE_MAGIC;
468                 xfs_da_node_rebalance(state, oldblk, newblk);
469                 error = xfs_da_blk_link(state, oldblk, newblk);
470                 if (error)
471                         return(error);
472                 *result = 1;
473         } else {
474                 *result = 0;
475         }
476
477         /*
478          * Insert the new entry(s) into the correct block
479          * (updating last hashval in the process).
480          *
481          * xfs_da_node_add() inserts BEFORE the given index,
482          * and as a result of using node_lookup_int() we always
483          * point to a valid entry (not after one), but a split
484          * operation always results in a new block whose hashvals
485          * FOLLOW the current block.
486          *
487          * If we had double-split op below us, then add the extra block too.
488          */
489         node = oldblk->bp->data;
490         if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
491                 oldblk->index++;
492                 xfs_da_node_add(state, oldblk, addblk);
493                 if (useextra) {
494                         if (state->extraafter)
495                                 oldblk->index++;
496                         xfs_da_node_add(state, oldblk, &state->extrablk);
497                         state->extravalid = 0;
498                 }
499         } else {
500                 newblk->index++;
501                 xfs_da_node_add(state, newblk, addblk);
502                 if (useextra) {
503                         if (state->extraafter)
504                                 newblk->index++;
505                         xfs_da_node_add(state, newblk, &state->extrablk);
506                         state->extravalid = 0;
507                 }
508         }
509
510         return(0);
511 }
512
513 /*
514  * Balance the btree elements between two intermediate nodes,
515  * usually one full and one empty.
516  *
517  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
518  */
519 STATIC void
520 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
521                                      xfs_da_state_blk_t *blk2)
522 {
523         xfs_da_intnode_t *node1, *node2, *tmpnode;
524         xfs_da_node_entry_t *btree_s, *btree_d;
525         int count, tmp;
526         xfs_trans_t *tp;
527
528         node1 = blk1->bp->data;
529         node2 = blk2->bp->data;
530         /*
531          * Figure out how many entries need to move, and in which direction.
532          * Swap the nodes around if that makes it simpler.
533          */
534         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
535             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
536              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
537               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
538                 tmpnode = node1;
539                 node1 = node2;
540                 node2 = tmpnode;
541         }
542         ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
543         ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
544         count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
545         if (count == 0)
546                 return;
547         tp = state->args->trans;
548         /*
549          * Two cases: high-to-low and low-to-high.
550          */
551         if (count > 0) {
552                 /*
553                  * Move elements in node2 up to make a hole.
554                  */
555                 if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
556                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
557                         btree_s = &node2->btree[0];
558                         btree_d = &node2->btree[count];
559                         memmove(btree_d, btree_s, tmp);
560                 }
561
562                 /*
563                  * Move the req'd B-tree elements from high in node1 to
564                  * low in node2.
565                  */
566                 INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
567                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
568                 btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
569                 btree_d = &node2->btree[0];
570                 memcpy(btree_d, btree_s, tmp);
571                 INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
572
573         } else {
574                 /*
575                  * Move the req'd B-tree elements from low in node2 to
576                  * high in node1.
577                  */
578                 count = -count;
579                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
580                 btree_s = &node2->btree[0];
581                 btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
582                 memcpy(btree_d, btree_s, tmp);
583                 INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
584                 xfs_da_log_buf(tp, blk1->bp,
585                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
586
587                 /*
588                  * Move elements in node2 down to fill the hole.
589                  */
590                 tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
591                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
592                 btree_s = &node2->btree[count];
593                 btree_d = &node2->btree[0];
594                 memmove(btree_d, btree_s, tmp);
595                 INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
596         }
597
598         /*
599          * Log header of node 1 and all current bits of node 2.
600          */
601         xfs_da_log_buf(tp, blk1->bp,
602                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
603         xfs_da_log_buf(tp, blk2->bp,
604                 XFS_DA_LOGRANGE(node2, &node2->hdr,
605                         sizeof(node2->hdr) +
606                         sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
607
608         /*
609          * Record the last hashval from each block for upward propagation.
610          * (note: don't use the swapped node pointers)
611          */
612         node1 = blk1->bp->data;
613         node2 = blk2->bp->data;
614         blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
615         blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
616
617         /*
618          * Adjust the expected index for insertion.
619          */
620         if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
621                 blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
622                 blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
623         }
624 }
625
626 /*
627  * Add a new entry to an intermediate node.
628  */
629 STATIC void
630 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
631                                xfs_da_state_blk_t *newblk)
632 {
633         xfs_da_intnode_t *node;
634         xfs_da_node_entry_t *btree;
635         int tmp;
636         xfs_mount_t *mp;
637
638         node = oldblk->bp->data;
639         mp = state->mp;
640         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
641         ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
642         ASSERT(newblk->blkno != 0);
643         if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
644                 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
645                        newblk->blkno < mp->m_dirfreeblk);
646
647         /*
648          * We may need to make some room before we insert the new node.
649          */
650         tmp = 0;
651         btree = &node->btree[ oldblk->index ];
652         if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
653                 tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
654                 memmove(btree + 1, btree, tmp);
655         }
656         INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
657         INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
658         xfs_da_log_buf(state->args->trans, oldblk->bp,
659                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
660         INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
661         xfs_da_log_buf(state->args->trans, oldblk->bp,
662                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
663
664         /*
665          * Copy the last hash value from the oldblk to propagate upwards.
666          */
667         oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
668 }
669
670 /*========================================================================
671  * Routines used for shrinking the Btree.
672  *========================================================================*/
673
674 /*
675  * Deallocate an empty leaf node, remove it from its parent,
676  * possibly deallocating that block, etc...
677  */
678 int
679 xfs_da_join(xfs_da_state_t *state)
680 {
681         xfs_da_state_blk_t *drop_blk, *save_blk;
682         int action, error;
683
684         action = 0;
685         drop_blk = &state->path.blk[ state->path.active-1 ];
686         save_blk = &state->altpath.blk[ state->path.active-1 ];
687         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
688         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
689                drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
690
691         /*
692          * Walk back up the tree joining/deallocating as necessary.
693          * When we stop dropping blocks, break out.
694          */
695         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
696                  state->path.active--) {
697                 /*
698                  * See if we can combine the block with a neighbor.
699                  *   (action == 0) => no options, just leave
700                  *   (action == 1) => coalesce, then unlink
701                  *   (action == 2) => block empty, unlink it
702                  */
703                 switch (drop_blk->magic) {
704                 case XFS_ATTR_LEAF_MAGIC:
705                         error = xfs_attr_leaf_toosmall(state, &action);
706                         if (error)
707                                 return(error);
708                         if (action == 0)
709                                 return(0);
710                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
711                         break;
712                 case XFS_DIR_LEAF_MAGIC:
713                         ASSERT(XFS_DIR_IS_V1(state->mp));
714                         error = xfs_dir_leaf_toosmall(state, &action);
715                         if (error)
716                                 return(error);
717                         if (action == 0)
718                                 return(0);
719                         xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
720                         break;
721                 case XFS_DIR2_LEAFN_MAGIC:
722                         ASSERT(XFS_DIR_IS_V2(state->mp));
723                         error = xfs_dir2_leafn_toosmall(state, &action);
724                         if (error)
725                                 return error;
726                         if (action == 0)
727                                 return 0;
728                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
729                         break;
730                 case XFS_DA_NODE_MAGIC:
731                         /*
732                          * Remove the offending node, fixup hashvals,
733                          * check for a toosmall neighbor.
734                          */
735                         xfs_da_node_remove(state, drop_blk);
736                         xfs_da_fixhashpath(state, &state->path);
737                         error = xfs_da_node_toosmall(state, &action);
738                         if (error)
739                                 return(error);
740                         if (action == 0)
741                                 return 0;
742                         xfs_da_node_unbalance(state, drop_blk, save_blk);
743                         break;
744                 }
745                 xfs_da_fixhashpath(state, &state->altpath);
746                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
747                 xfs_da_state_kill_altpath(state);
748                 if (error)
749                         return(error);
750                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
751                                                          drop_blk->bp);
752                 drop_blk->bp = NULL;
753                 if (error)
754                         return(error);
755         }
756         /*
757          * We joined all the way to the top.  If it turns out that
758          * we only have one entry in the root, make the child block
759          * the new root.
760          */
761         xfs_da_node_remove(state, drop_blk);
762         xfs_da_fixhashpath(state, &state->path);
763         error = xfs_da_root_join(state, &state->path.blk[0]);
764         return(error);
765 }
766
767 /*
768  * We have only one entry in the root.  Copy the only remaining child of
769  * the old root to block 0 as the new root node.
770  */
771 STATIC int
772 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
773 {
774         xfs_da_intnode_t *oldroot;
775         /* REFERENCED */
776         xfs_da_blkinfo_t *blkinfo;
777         xfs_da_args_t *args;
778         xfs_dablk_t child;
779         xfs_dabuf_t *bp;
780         int error;
781
782         args = state->args;
783         ASSERT(args != NULL);
784         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
785         oldroot = root_blk->bp->data;
786         ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
787         ASSERT(!oldroot->hdr.info.forw);
788         ASSERT(!oldroot->hdr.info.back);
789
790         /*
791          * If the root has more than one child, then don't do anything.
792          */
793         if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
794                 return(0);
795
796         /*
797          * Read in the (only) child block, then copy those bytes into
798          * the root block's buffer and free the original child block.
799          */
800         child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
801         ASSERT(child != 0);
802         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
803                                              args->whichfork);
804         if (error)
805                 return(error);
806         ASSERT(bp != NULL);
807         blkinfo = bp->data;
808         if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
809                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
810                        INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
811         } else {
812                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
813         }
814         ASSERT(!blkinfo->forw);
815         ASSERT(!blkinfo->back);
816         memcpy(root_blk->bp->data, bp->data, state->blocksize);
817         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
818         error = xfs_da_shrink_inode(args, child, bp);
819         return(error);
820 }
821
822 /*
823  * Check a node block and its neighbors to see if the block should be
824  * collapsed into one or the other neighbor.  Always keep the block
825  * with the smaller block number.
826  * If the current block is over 50% full, don't try to join it, return 0.
827  * If the block is empty, fill in the state structure and return 2.
828  * If it can be collapsed, fill in the state structure and return 1.
829  * If nothing can be done, return 0.
830  */
831 STATIC int
832 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
833 {
834         xfs_da_intnode_t *node;
835         xfs_da_state_blk_t *blk;
836         xfs_da_blkinfo_t *info;
837         int count, forward, error, retval, i;
838         xfs_dablk_t blkno;
839         xfs_dabuf_t *bp;
840
841         /*
842          * Check for the degenerate case of the block being over 50% full.
843          * If so, it's not worth even looking to see if we might be able
844          * to coalesce with a sibling.
845          */
846         blk = &state->path.blk[ state->path.active-1 ];
847         info = blk->bp->data;
848         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
849         node = (xfs_da_intnode_t *)info;
850         count = INT_GET(node->hdr.count, ARCH_CONVERT);
851         if (count > (state->node_ents >> 1)) {
852                 *action = 0;    /* blk over 50%, don't try to join */
853                 return(0);      /* blk over 50%, don't try to join */
854         }
855
856         /*
857          * Check for the degenerate case of the block being empty.
858          * If the block is empty, we'll simply delete it, no need to
859          * coalesce it with a sibling block.  We choose (aribtrarily)
860          * to merge with the forward block unless it is NULL.
861          */
862         if (count == 0) {
863                 /*
864                  * Make altpath point to the block we want to keep and
865                  * path point to the block we want to drop (this one).
866                  */
867                 forward = info->forw;
868                 memcpy(&state->altpath, &state->path, sizeof(state->path));
869                 error = xfs_da_path_shift(state, &state->altpath, forward,
870                                                  0, &retval);
871                 if (error)
872                         return(error);
873                 if (retval) {
874                         *action = 0;
875                 } else {
876                         *action = 2;
877                 }
878                 return(0);
879         }
880
881         /*
882          * Examine each sibling block to see if we can coalesce with
883          * at least 25% free space to spare.  We need to figure out
884          * whether to merge with the forward or the backward block.
885          * We prefer coalescing with the lower numbered sibling so as
886          * to shrink a directory over time.
887          */
888         /* start with smaller blk num */
889         forward = (INT_GET(info->forw, ARCH_CONVERT)
890                                 < INT_GET(info->back, ARCH_CONVERT));
891         for (i = 0; i < 2; forward = !forward, i++) {
892                 if (forward)
893                         blkno = INT_GET(info->forw, ARCH_CONVERT);
894                 else
895                         blkno = INT_GET(info->back, ARCH_CONVERT);
896                 if (blkno == 0)
897                         continue;
898                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
899                                         blkno, -1, &bp, state->args->whichfork);
900                 if (error)
901                         return(error);
902                 ASSERT(bp != NULL);
903
904                 node = (xfs_da_intnode_t *)info;
905                 count  = state->node_ents;
906                 count -= state->node_ents >> 2;
907                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
908                 node = bp->data;
909                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
910                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
911                 xfs_da_brelse(state->args->trans, bp);
912                 if (count >= 0)
913                         break;  /* fits with at least 25% to spare */
914         }
915         if (i >= 2) {
916                 *action = 0;
917                 return(0);
918         }
919
920         /*
921          * Make altpath point to the block we want to keep (the lower
922          * numbered block) and path point to the block we want to drop.
923          */
924         memcpy(&state->altpath, &state->path, sizeof(state->path));
925         if (blkno < blk->blkno) {
926                 error = xfs_da_path_shift(state, &state->altpath, forward,
927                                                  0, &retval);
928                 if (error) {
929                         return(error);
930                 }
931                 if (retval) {
932                         *action = 0;
933                         return(0);
934                 }
935         } else {
936                 error = xfs_da_path_shift(state, &state->path, forward,
937                                                  0, &retval);
938                 if (error) {
939                         return(error);
940                 }
941                 if (retval) {
942                         *action = 0;
943                         return(0);
944                 }
945         }
946         *action = 1;
947         return(0);
948 }
949
950 /*
951  * Walk back up the tree adjusting hash values as necessary,
952  * when we stop making changes, return.
953  */
954 void
955 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
956 {
957         xfs_da_state_blk_t *blk;
958         xfs_da_intnode_t *node;
959         xfs_da_node_entry_t *btree;
960         xfs_dahash_t lasthash=0;
961         int level, count;
962
963         level = path->active-1;
964         blk = &path->blk[ level ];
965         switch (blk->magic) {
966         case XFS_ATTR_LEAF_MAGIC:
967                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
968                 if (count == 0)
969                         return;
970                 break;
971         case XFS_DIR_LEAF_MAGIC:
972                 ASSERT(XFS_DIR_IS_V1(state->mp));
973                 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
974                 if (count == 0)
975                         return;
976                 break;
977         case XFS_DIR2_LEAFN_MAGIC:
978                 ASSERT(XFS_DIR_IS_V2(state->mp));
979                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
980                 if (count == 0)
981                         return;
982                 break;
983         case XFS_DA_NODE_MAGIC:
984                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
985                 if (count == 0)
986                         return;
987                 break;
988         }
989         for (blk--, level--; level >= 0; blk--, level--) {
990                 node = blk->bp->data;
991                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
992                 btree = &node->btree[ blk->index ];
993                 if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
994                         break;
995                 blk->hashval = lasthash;
996                 INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
997                 xfs_da_log_buf(state->args->trans, blk->bp,
998                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
999
1000                 lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1001         }
1002 }
1003
1004 /*
1005  * Remove an entry from an intermediate node.
1006  */
1007 STATIC void
1008 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
1009 {
1010         xfs_da_intnode_t *node;
1011         xfs_da_node_entry_t *btree;
1012         int tmp;
1013
1014         node = drop_blk->bp->data;
1015         ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
1016         ASSERT(drop_blk->index >= 0);
1017
1018         /*
1019          * Copy over the offending entry, or just zero it out.
1020          */
1021         btree = &node->btree[drop_blk->index];
1022         if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1023                 tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
1024                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1025                 memmove(btree, btree + 1, tmp);
1026                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
1027                     XFS_DA_LOGRANGE(node, btree, tmp));
1028                 btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
1029         }
1030         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1031         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1032             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1033         INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
1034         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1035             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1036
1037         /*
1038          * Copy the last hash value from the block to propagate upwards.
1039          */
1040         btree--;
1041         drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
1042 }
1043
1044 /*
1045  * Unbalance the btree elements between two intermediate nodes,
1046  * move all Btree elements from one node into another.
1047  */
1048 STATIC void
1049 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1050                                      xfs_da_state_blk_t *save_blk)
1051 {
1052         xfs_da_intnode_t *drop_node, *save_node;
1053         xfs_da_node_entry_t *btree;
1054         int tmp;
1055         xfs_trans_t *tp;
1056
1057         drop_node = drop_blk->bp->data;
1058         save_node = save_blk->bp->data;
1059         ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1060         ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1061         tp = state->args->trans;
1062
1063         /*
1064          * If the dying block has lower hashvals, then move all the
1065          * elements in the remaining block up to make a hole.
1066          */
1067         if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1068             (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1069              INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
1070         {
1071                 btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
1072                 tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1073                 memmove(btree, &save_node->btree[0], tmp);
1074                 btree = &save_node->btree[0];
1075                 xfs_da_log_buf(tp, save_blk->bp,
1076                         XFS_DA_LOGRANGE(save_node, btree,
1077                                 (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
1078                                 sizeof(xfs_da_node_entry_t)));
1079         } else {
1080                 btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
1081                 xfs_da_log_buf(tp, save_blk->bp,
1082                         XFS_DA_LOGRANGE(save_node, btree,
1083                                 INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
1084                                 sizeof(xfs_da_node_entry_t)));
1085         }
1086
1087         /*
1088          * Move all the B-tree elements from drop_blk to save_blk.
1089          */
1090         tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1091         memcpy(btree, &drop_node->btree[0], tmp);
1092         INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
1093
1094         xfs_da_log_buf(tp, save_blk->bp,
1095                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1096                         sizeof(save_node->hdr)));
1097
1098         /*
1099          * Save the last hashval in the remaining block for upward propagation.
1100          */
1101         save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1102 }
1103
1104 /*========================================================================
1105  * Routines used for finding things in the Btree.
1106  *========================================================================*/
1107
1108 /*
1109  * Walk down the Btree looking for a particular filename, filling
1110  * in the state structure as we go.
1111  *
1112  * We will set the state structure to point to each of the elements
1113  * in each of the nodes where either the hashval is or should be.
1114  *
1115  * We support duplicate hashval's so for each entry in the current
1116  * node that could contain the desired hashval, descend.  This is a
1117  * pruned depth-first tree search.
1118  */
1119 int                                                     /* error */
1120 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1121 {
1122         xfs_da_state_blk_t *blk;
1123         xfs_da_blkinfo_t *curr;
1124         xfs_da_intnode_t *node;
1125         xfs_da_node_entry_t *btree;
1126         xfs_dablk_t blkno;
1127         int probe, span, max, error, retval;
1128         xfs_dahash_t hashval;
1129         xfs_da_args_t *args;
1130
1131         args = state->args;
1132
1133         /*
1134          * Descend thru the B-tree searching each level for the right
1135          * node to use, until the right hashval is found.
1136          */
1137         if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1138                 blkno = state->mp->m_dirleafblk;
1139         else
1140                 blkno = 0;
1141         for (blk = &state->path.blk[0], state->path.active = 1;
1142                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1143                          blk++, state->path.active++) {
1144                 /*
1145                  * Read the next node down in the tree.
1146                  */
1147                 blk->blkno = blkno;
1148                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
1149                                         -1, &blk->bp, args->whichfork);
1150                 if (error) {
1151                         blk->blkno = 0;
1152                         state->path.active--;
1153                         return(error);
1154                 }
1155                 curr = blk->bp->data;
1156                 ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1157                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1158                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1159
1160                 /*
1161                  * Search an intermediate node for a match.
1162                  */
1163                 blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
1164                 if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1165                         node = blk->bp->data;
1166                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1167
1168                         /*
1169                          * Binary search.  (note: small blocks will skip loop)
1170                          */
1171                         max = INT_GET(node->hdr.count, ARCH_CONVERT);
1172                         probe = span = max / 2;
1173                         hashval = args->hashval;
1174                         for (btree = &node->btree[probe]; span > 4;
1175                                    btree = &node->btree[probe]) {
1176                                 span /= 2;
1177                                 if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
1178                                         probe += span;
1179                                 else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
1180                                         probe -= span;
1181                                 else
1182                                         break;
1183                         }
1184                         ASSERT((probe >= 0) && (probe < max));
1185                         ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
1186
1187                         /*
1188                          * Since we may have duplicate hashval's, find the first
1189                          * matching hashval in the node.
1190                          */
1191                         while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
1192                                 btree--;
1193                                 probe--;
1194                         }
1195                         while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
1196                                 btree++;
1197                                 probe++;
1198                         }
1199
1200                         /*
1201                          * Pick the right block to descend on.
1202                          */
1203                         if (probe == max) {
1204                                 blk->index = max-1;
1205                                 blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
1206                         } else {
1207                                 blk->index = probe;
1208                                 blkno = INT_GET(btree->before, ARCH_CONVERT);
1209                         }
1210                 }
1211                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
1212                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1213                         break;
1214                 }
1215                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1216                         blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1217                         break;
1218                 }
1219                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1220                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1221                         break;
1222                 }
1223         }
1224
1225         /*
1226          * A leaf block that ends in the hashval that we are interested in
1227          * (final hashval == search hashval) means that the next block may
1228          * contain more entries with the same hashval, shift upward to the
1229          * next leaf and keep searching.
1230          */
1231         for (;;) {
1232                 if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1233                         ASSERT(XFS_DIR_IS_V1(state->mp));
1234                         retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1235                                                                   &blk->index);
1236                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1237                         ASSERT(XFS_DIR_IS_V2(state->mp));
1238                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1239                                                         &blk->index, state);
1240                 }
1241                 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1242                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1243                         blk->index = args->index;
1244                         args->blkno = blk->blkno;
1245                 }
1246                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1247                     (blk->hashval == args->hashval)) {
1248                         error = xfs_da_path_shift(state, &state->path, 1, 1,
1249                                                          &retval);
1250                         if (error)
1251                                 return(error);
1252                         if (retval == 0) {
1253                                 continue;
1254                         }
1255                         else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1256                                 /* path_shift() gives ENOENT */
1257                                 retval = XFS_ERROR(ENOATTR);
1258                         }
1259                 }
1260                 break;
1261         }
1262         *result = retval;
1263         return(0);
1264 }
1265
1266 /*========================================================================
1267  * Utility routines.
1268  *========================================================================*/
1269
1270 /*
1271  * Link a new block into a doubly linked list of blocks (of whatever type).
1272  */
1273 int                                                     /* error */
1274 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1275                                xfs_da_state_blk_t *new_blk)
1276 {
1277         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1278         xfs_da_args_t *args;
1279         int before=0, error;
1280         xfs_dabuf_t *bp;
1281
1282         /*
1283          * Set up environment.
1284          */
1285         args = state->args;
1286         ASSERT(args != NULL);
1287         old_info = old_blk->bp->data;
1288         new_info = new_blk->bp->data;
1289         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1290                old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1291                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1292         ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
1293         ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
1294         ASSERT(old_blk->magic == new_blk->magic);
1295
1296         switch (old_blk->magic) {
1297         case XFS_ATTR_LEAF_MAGIC:
1298                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1299                 break;
1300         case XFS_DIR_LEAF_MAGIC:
1301                 ASSERT(XFS_DIR_IS_V1(state->mp));
1302                 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1303                 break;
1304         case XFS_DIR2_LEAFN_MAGIC:
1305                 ASSERT(XFS_DIR_IS_V2(state->mp));
1306                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1307                 break;
1308         case XFS_DA_NODE_MAGIC:
1309                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1310                 break;
1311         }
1312
1313         /*
1314          * Link blocks in appropriate order.
1315          */
1316         if (before) {
1317                 /*
1318                  * Link new block in before existing block.
1319                  */
1320                 INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
1321                 new_info->back = old_info->back; /* INT_: direct copy */
1322                 if (INT_GET(old_info->back, ARCH_CONVERT)) {
1323                         error = xfs_da_read_buf(args->trans, args->dp,
1324                                                 INT_GET(old_info->back,
1325                                                         ARCH_CONVERT), -1, &bp,
1326                                                 args->whichfork);
1327                         if (error)
1328                                 return(error);
1329                         ASSERT(bp != NULL);
1330                         tmp_info = bp->data;
1331                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
1332                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
1333                         INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
1334                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1335                         xfs_da_buf_done(bp);
1336                 }
1337                 INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
1338         } else {
1339                 /*
1340                  * Link new block in after existing block.
1341                  */
1342                 new_info->forw = old_info->forw; /* INT_: direct copy */
1343                 INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
1344                 if (INT_GET(old_info->forw, ARCH_CONVERT)) {
1345                         error = xfs_da_read_buf(args->trans, args->dp,
1346                                                 INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
1347                                                 args->whichfork);
1348                         if (error)
1349                                 return(error);
1350                         ASSERT(bp != NULL);
1351                         tmp_info = bp->data;
1352                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1353                                     == INT_GET(old_info->magic, ARCH_CONVERT));
1354                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1355                                     == old_blk->blkno);
1356                         INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
1357                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1358                         xfs_da_buf_done(bp);
1359                 }
1360                 INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
1361         }
1362
1363         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1364         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1365         return(0);
1366 }
1367
1368 /*
1369  * Compare two intermediate nodes for "order".
1370  */
1371 STATIC int
1372 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1373 {
1374         xfs_da_intnode_t *node1, *node2;
1375
1376         node1 = node1_bp->data;
1377         node2 = node2_bp->data;
1378         ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
1379                (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
1380         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
1381             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
1382               INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1383              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1384               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1385                 return(1);
1386         }
1387         return(0);
1388 }
1389
1390 /*
1391  * Pick up the last hashvalue from an intermediate node.
1392  */
1393 STATIC uint
1394 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1395 {
1396         xfs_da_intnode_t *node;
1397
1398         node = bp->data;
1399         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1400         if (count)
1401                 *count = INT_GET(node->hdr.count, ARCH_CONVERT);
1402         if (!node->hdr.count)
1403                 return(0);
1404         return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1405 }
1406
1407 /*
1408  * Unlink a block from a doubly linked list of blocks.
1409  */
1410 STATIC int                                              /* error */
1411 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1412                                  xfs_da_state_blk_t *save_blk)
1413 {
1414         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1415         xfs_da_args_t *args;
1416         xfs_dabuf_t *bp;
1417         int error;
1418
1419         /*
1420          * Set up environment.
1421          */
1422         args = state->args;
1423         ASSERT(args != NULL);
1424         save_info = save_blk->bp->data;
1425         drop_info = drop_blk->bp->data;
1426         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1427                save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1428                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1429         ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
1430         ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
1431         ASSERT(save_blk->magic == drop_blk->magic);
1432         ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
1433                (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
1434         ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
1435                (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
1436
1437         /*
1438          * Unlink the leaf block from the doubly linked chain of leaves.
1439          */
1440         if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
1441                 save_info->back = drop_info->back; /* INT_: direct copy */
1442                 if (INT_GET(drop_info->back, ARCH_CONVERT)) {
1443                         error = xfs_da_read_buf(args->trans, args->dp,
1444                                                 INT_GET(drop_info->back,
1445                                                         ARCH_CONVERT), -1, &bp,
1446                                                 args->whichfork);
1447                         if (error)
1448                                 return(error);
1449                         ASSERT(bp != NULL);
1450                         tmp_info = bp->data;
1451                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
1452                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
1453                         INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
1454                         xfs_da_log_buf(args->trans, bp, 0,
1455                                                     sizeof(*tmp_info) - 1);
1456                         xfs_da_buf_done(bp);
1457                 }
1458         } else {
1459                 save_info->forw = drop_info->forw; /* INT_: direct copy */
1460                 if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
1461                         error = xfs_da_read_buf(args->trans, args->dp,
1462                                                 INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
1463                                                 args->whichfork);
1464                         if (error)
1465                                 return(error);
1466                         ASSERT(bp != NULL);
1467                         tmp_info = bp->data;
1468                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1469                                     == INT_GET(save_info->magic, ARCH_CONVERT));
1470                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1471                                     == drop_blk->blkno);
1472                         INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
1473                         xfs_da_log_buf(args->trans, bp, 0,
1474                                                     sizeof(*tmp_info) - 1);
1475                         xfs_da_buf_done(bp);
1476                 }
1477         }
1478
1479         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1480         return(0);
1481 }
1482
1483 /*
1484  * Move a path "forward" or "!forward" one block at the current level.
1485  *
1486  * This routine will adjust a "path" to point to the next block
1487  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1488  * Btree, including updating pointers to the intermediate nodes between
1489  * the new bottom and the root.
1490  */
1491 int                                                     /* error */
1492 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1493                                  int forward, int release, int *result)
1494 {
1495         xfs_da_state_blk_t *blk;
1496         xfs_da_blkinfo_t *info;
1497         xfs_da_intnode_t *node;
1498         xfs_da_args_t *args;
1499         xfs_dablk_t blkno=0;
1500         int level, error;
1501
1502         /*
1503          * Roll up the Btree looking for the first block where our
1504          * current index is not at the edge of the block.  Note that
1505          * we skip the bottom layer because we want the sibling block.
1506          */
1507         args = state->args;
1508         ASSERT(args != NULL);
1509         ASSERT(path != NULL);
1510         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1511         level = (path->active-1) - 1;   /* skip bottom layer in path */
1512         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1513                 ASSERT(blk->bp != NULL);
1514                 node = blk->bp->data;
1515                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1516                 if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1517                         blk->index++;
1518                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1519                         break;
1520                 } else if (!forward && (blk->index > 0)) {
1521                         blk->index--;
1522                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1523                         break;
1524                 }
1525         }
1526         if (level < 0) {
1527                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
1528                 ASSERT(args->oknoent);
1529                 return(0);
1530         }
1531
1532         /*
1533          * Roll down the edge of the subtree until we reach the
1534          * same depth we were at originally.
1535          */
1536         for (blk++, level++; level < path->active; blk++, level++) {
1537                 /*
1538                  * Release the old block.
1539                  * (if it's dirty, trans won't actually let go)
1540                  */
1541                 if (release)
1542                         xfs_da_brelse(args->trans, blk->bp);
1543
1544                 /*
1545                  * Read the next child block.
1546                  */
1547                 blk->blkno = blkno;
1548                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1549                                                      &blk->bp, args->whichfork);
1550                 if (error)
1551                         return(error);
1552                 ASSERT(blk->bp != NULL);
1553                 info = blk->bp->data;
1554                 ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1555                        INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1556                        INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1557                 blk->magic = INT_GET(info->magic, ARCH_CONVERT);
1558                 if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1559                         node = (xfs_da_intnode_t *)info;
1560                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1561                         if (forward)
1562                                 blk->index = 0;
1563                         else
1564                                 blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
1565                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1566                 } else {
1567                         ASSERT(level == path->active-1);
1568                         blk->index = 0;
1569                         switch(blk->magic) {
1570                         case XFS_ATTR_LEAF_MAGIC:
1571                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1572                                                                       NULL);
1573                                 break;
1574                         case XFS_DIR_LEAF_MAGIC:
1575                                 ASSERT(XFS_DIR_IS_V1(state->mp));
1576                                 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1577                                                                      NULL);
1578                                 break;
1579                         case XFS_DIR2_LEAFN_MAGIC:
1580                                 ASSERT(XFS_DIR_IS_V2(state->mp));
1581                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1582                                                                        NULL);
1583                                 break;
1584                         default:
1585                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1586                                        blk->magic ==
1587                                        XFS_DIRX_LEAF_MAGIC(state->mp));
1588                                 break;
1589                         }
1590                 }
1591         }
1592         *result = 0;
1593         return(0);
1594 }
1595
1596
1597 /*========================================================================
1598  * Utility routines.
1599  *========================================================================*/
1600
1601 /*
1602  * Implement a simple hash on a character string.
1603  * Rotate the hash value by 7 bits, then XOR each character in.
1604  * This is implemented with some source-level loop unrolling.
1605  */
1606 xfs_dahash_t
1607 xfs_da_hashname(const uchar_t *name, int namelen)
1608 {
1609         xfs_dahash_t hash;
1610
1611 #ifdef SLOWVERSION
1612         /*
1613          * This is the old one-byte-at-a-time version.
1614          */
1615         for (hash = 0; namelen > 0; namelen--)
1616                 hash = *name++ ^ rol32(hash, 7);
1617
1618         return(hash);
1619 #else
1620         /*
1621          * Do four characters at a time as long as we can.
1622          */
1623         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1624                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1625                        (name[3] << 0) ^ rol32(hash, 7 * 4);
1626
1627         /*
1628          * Now do the rest of the characters.
1629          */
1630         switch (namelen) {
1631         case 3:
1632                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1633                        rol32(hash, 7 * 3);
1634         case 2:
1635                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1636         case 1:
1637                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1638         case 0:
1639                 return hash;
1640         }
1641         /* NOTREACHED */
1642 #endif
1643         return 0; /* keep gcc happy */
1644 }
1645
1646 /*
1647  * Add a block to the btree ahead of the file.
1648  * Return the new block number to the caller.
1649  */
1650 int
1651 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1652 {
1653         xfs_fileoff_t bno, b;
1654         xfs_bmbt_irec_t map;
1655         xfs_bmbt_irec_t *mapp;
1656         xfs_inode_t *dp;
1657         int nmap, error, w, count, c, got, i, mapi;
1658         xfs_fsize_t size;
1659         xfs_trans_t *tp;
1660         xfs_mount_t *mp;
1661
1662         dp = args->dp;
1663         mp = dp->i_mount;
1664         w = args->whichfork;
1665         tp = args->trans;
1666         /*
1667          * For new directories adjust the file offset and block count.
1668          */
1669         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
1670                 bno = mp->m_dirleafblk;
1671                 count = mp->m_dirblkfsbs;
1672         } else {
1673                 bno = 0;
1674                 count = 1;
1675         }
1676         /*
1677          * Find a spot in the file space to put the new block.
1678          */
1679         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
1680                 return error;
1681         }
1682         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1683                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1684         /*
1685          * Try mapping it in one filesystem block.
1686          */
1687         nmap = 1;
1688         ASSERT(args->firstblock != NULL);
1689         if ((error = xfs_bmapi(tp, dp, bno, count,
1690                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1691                         XFS_BMAPI_CONTIG,
1692                         args->firstblock, args->total, &map, &nmap,
1693                         args->flist))) {
1694                 return error;
1695         }
1696         ASSERT(nmap <= 1);
1697         if (nmap == 1) {
1698                 mapp = &map;
1699                 mapi = 1;
1700         }
1701         /*
1702          * If we didn't get it and the block might work if fragmented,
1703          * try without the CONTIG flag.  Loop until we get it all.
1704          */
1705         else if (nmap == 0 && count > 1) {
1706                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1707                 for (b = bno, mapi = 0; b < bno + count; ) {
1708                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1709                         c = (int)(bno + count - b);
1710                         if ((error = xfs_bmapi(tp, dp, b, c,
1711                                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1712                                         XFS_BMAPI_METADATA,
1713                                         args->firstblock, args->total,
1714                                         &mapp[mapi], &nmap, args->flist))) {
1715                                 kmem_free(mapp, sizeof(*mapp) * count);
1716                                 return error;
1717                         }
1718                         if (nmap < 1)
1719                                 break;
1720                         mapi += nmap;
1721                         b = mapp[mapi - 1].br_startoff +
1722                             mapp[mapi - 1].br_blockcount;
1723                 }
1724         } else {
1725                 mapi = 0;
1726                 mapp = NULL;
1727         }
1728         /*
1729          * Count the blocks we got, make sure it matches the total.
1730          */
1731         for (i = 0, got = 0; i < mapi; i++)
1732                 got += mapp[i].br_blockcount;
1733         if (got != count || mapp[0].br_startoff != bno ||
1734             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1735             bno + count) {
1736                 if (mapp != &map)
1737                         kmem_free(mapp, sizeof(*mapp) * count);
1738                 return XFS_ERROR(ENOSPC);
1739         }
1740         if (mapp != &map)
1741                 kmem_free(mapp, sizeof(*mapp) * count);
1742         *new_blkno = (xfs_dablk_t)bno;
1743         /*
1744          * For version 1 directories, adjust the file size if it changed.
1745          */
1746         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1747                 ASSERT(mapi == 1);
1748                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1749                         return error;
1750                 size = XFS_FSB_TO_B(mp, bno);
1751                 if (size != dp->i_d.di_size) {
1752                         dp->i_d.di_size = size;
1753                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1754                 }
1755         }
1756         return 0;
1757 }
1758
1759 /*
1760  * Ick.  We need to always be able to remove a btree block, even
1761  * if there's no space reservation because the filesystem is full.
1762  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1763  * It swaps the target block with the last block in the file.  The
1764  * last block in the file can always be removed since it can't cause
1765  * a bmap btree split to do that.
1766  */
1767 STATIC int
1768 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1769                       xfs_dabuf_t **dead_bufp)
1770 {
1771         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1772         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1773         xfs_fileoff_t lastoff;
1774         xfs_inode_t *ip;
1775         xfs_trans_t *tp;
1776         xfs_mount_t *mp;
1777         int error, w, entno, level, dead_level;
1778         xfs_da_blkinfo_t *dead_info, *sib_info;
1779         xfs_da_intnode_t *par_node, *dead_node;
1780         xfs_dir_leafblock_t *dead_leaf;
1781         xfs_dir2_leaf_t *dead_leaf2;
1782         xfs_dahash_t dead_hash;
1783
1784         dead_buf = *dead_bufp;
1785         dead_blkno = *dead_blknop;
1786         tp = args->trans;
1787         ip = args->dp;
1788         w = args->whichfork;
1789         ASSERT(w == XFS_DATA_FORK);
1790         mp = ip->i_mount;
1791         if (XFS_DIR_IS_V2(mp)) {
1792                 lastoff = mp->m_dirfreeblk;
1793                 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1794         } else
1795                 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1796         if (error)
1797                 return error;
1798         if (unlikely(lastoff == 0)) {
1799                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1800                                  mp);
1801                 return XFS_ERROR(EFSCORRUPTED);
1802         }
1803         /*
1804          * Read the last block in the btree space.
1805          */
1806         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1807         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1808                 return error;
1809         /*
1810          * Copy the last block into the dead buffer and log it.
1811          */
1812         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1813         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1814         dead_info = dead_buf->data;
1815         /*
1816          * Get values from the moved block.
1817          */
1818         if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1819                 ASSERT(XFS_DIR_IS_V1(mp));
1820                 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1821                 dead_level = 0;
1822                 dead_hash =
1823                         INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1824         } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1825                 ASSERT(XFS_DIR_IS_V2(mp));
1826                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1827                 dead_level = 0;
1828                 dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1829         } else {
1830                 ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1831                 dead_node = (xfs_da_intnode_t *)dead_info;
1832                 dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
1833                 dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1834         }
1835         sib_buf = par_buf = NULL;
1836         /*
1837          * If the moved block has a left sibling, fix up the pointers.
1838          */
1839         if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
1840                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1841                         goto done;
1842                 sib_info = sib_buf->data;
1843                 if (unlikely(
1844                     INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
1845                     INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1846                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1847                                          XFS_ERRLEVEL_LOW, mp);
1848                         error = XFS_ERROR(EFSCORRUPTED);
1849                         goto done;
1850                 }
1851                 INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
1852                 xfs_da_log_buf(tp, sib_buf,
1853                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1854                                         sizeof(sib_info->forw)));
1855                 xfs_da_buf_done(sib_buf);
1856                 sib_buf = NULL;
1857         }
1858         /*
1859          * If the moved block has a right sibling, fix up the pointers.
1860          */
1861         if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
1862                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1863                         goto done;
1864                 sib_info = sib_buf->data;
1865                 if (unlikely(
1866                        INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
1867                     || INT_GET(sib_info->magic, ARCH_CONVERT)
1868                                 != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1869                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1870                                          XFS_ERRLEVEL_LOW, mp);
1871                         error = XFS_ERROR(EFSCORRUPTED);
1872                         goto done;
1873                 }
1874                 INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
1875                 xfs_da_log_buf(tp, sib_buf,
1876                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1877                                         sizeof(sib_info->back)));
1878                 xfs_da_buf_done(sib_buf);
1879                 sib_buf = NULL;
1880         }
1881         par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
1882         level = -1;
1883         /*
1884          * Walk down the tree looking for the parent of the moved block.
1885          */
1886         for (;;) {
1887                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1888                         goto done;
1889                 par_node = par_buf->data;
1890                 if (unlikely(
1891                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
1892                     (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
1893                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1894                                          XFS_ERRLEVEL_LOW, mp);
1895                         error = XFS_ERROR(EFSCORRUPTED);
1896                         goto done;
1897                 }
1898                 level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
1899                 for (entno = 0;
1900                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1901                      INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
1902                      entno++)
1903                         continue;
1904                 if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
1905                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1906                                          XFS_ERRLEVEL_LOW, mp);
1907                         error = XFS_ERROR(EFSCORRUPTED);
1908                         goto done;
1909                 }
1910                 par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
1911                 if (level == dead_level + 1)
1912                         break;
1913                 xfs_da_brelse(tp, par_buf);
1914                 par_buf = NULL;
1915         }
1916         /*
1917          * We're in the right parent block.
1918          * Look for the right entry.
1919          */
1920         for (;;) {
1921                 for (;
1922                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1923                      INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
1924                      entno++)
1925                         continue;
1926                 if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
1927                         break;
1928                 par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
1929                 xfs_da_brelse(tp, par_buf);
1930                 par_buf = NULL;
1931                 if (unlikely(par_blkno == 0)) {
1932                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1933                                          XFS_ERRLEVEL_LOW, mp);
1934                         error = XFS_ERROR(EFSCORRUPTED);
1935                         goto done;
1936                 }
1937                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1938                         goto done;
1939                 par_node = par_buf->data;
1940                 if (unlikely(
1941                     INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
1942                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
1943                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1944                                          XFS_ERRLEVEL_LOW, mp);
1945                         error = XFS_ERROR(EFSCORRUPTED);
1946                         goto done;
1947                 }
1948                 entno = 0;
1949         }
1950         /*
1951          * Update the parent entry pointing to the moved block.
1952          */
1953         INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
1954         xfs_da_log_buf(tp, par_buf,
1955                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1956                                 sizeof(par_node->btree[entno].before)));
1957         xfs_da_buf_done(par_buf);
1958         xfs_da_buf_done(dead_buf);
1959         *dead_blknop = last_blkno;
1960         *dead_bufp = last_buf;
1961         return 0;
1962 done:
1963         if (par_buf)
1964                 xfs_da_brelse(tp, par_buf);
1965         if (sib_buf)
1966                 xfs_da_brelse(tp, sib_buf);
1967         xfs_da_brelse(tp, last_buf);
1968         return error;
1969 }
1970
1971 /*
1972  * Remove a btree block from a directory or attribute.
1973  */
1974 int
1975 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1976                     xfs_dabuf_t *dead_buf)
1977 {
1978         xfs_inode_t *dp;
1979         int done, error, w, count;
1980         xfs_fileoff_t bno;
1981         xfs_fsize_t size;
1982         xfs_trans_t *tp;
1983         xfs_mount_t *mp;
1984
1985         dp = args->dp;
1986         w = args->whichfork;
1987         tp = args->trans;
1988         mp = dp->i_mount;
1989         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1990                 count = mp->m_dirblkfsbs;
1991         else
1992                 count = 1;
1993         for (;;) {
1994                 /*
1995                  * Remove extents.  If we get ENOSPC for a dir we have to move
1996                  * the last block to the place we want to kill.
1997                  */
1998                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1999                                 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
2000                                 0, args->firstblock, args->flist,
2001                                 &done)) == ENOSPC) {
2002                         if (w != XFS_DATA_FORK)
2003                                 goto done;
2004                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
2005                                         &dead_buf)))
2006                                 goto done;
2007                 } else if (error)
2008                         goto done;
2009                 else
2010                         break;
2011         }
2012         ASSERT(done);
2013         xfs_da_binval(tp, dead_buf);
2014         /*
2015          * Adjust the directory size for version 1.
2016          */
2017         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
2018                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
2019                         return error;
2020                 size = XFS_FSB_TO_B(dp->i_mount, bno);
2021                 if (size != dp->i_d.di_size) {
2022                         dp->i_d.di_size = size;
2023                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2024                 }
2025         }
2026         return 0;
2027 done:
2028         xfs_da_binval(tp, dead_buf);
2029         return error;
2030 }
2031
2032 /*
2033  * See if the mapping(s) for this btree block are valid, i.e.
2034  * don't contain holes, are logically contiguous, and cover the whole range.
2035  */
2036 STATIC int
2037 xfs_da_map_covers_blocks(
2038         int             nmap,
2039         xfs_bmbt_irec_t *mapp,
2040         xfs_dablk_t     bno,
2041         int             count)
2042 {
2043         int             i;
2044         xfs_fileoff_t   off;
2045
2046         for (i = 0, off = bno; i < nmap; i++) {
2047                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2048                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
2049                         return 0;
2050                 }
2051                 if (off != mapp[i].br_startoff) {
2052                         return 0;
2053                 }
2054                 off += mapp[i].br_blockcount;
2055         }
2056         return off == bno + count;
2057 }
2058
2059 /*
2060  * Make a dabuf.
2061  * Used for get_buf, read_buf, read_bufr, and reada_buf.
2062  */
2063 STATIC int
2064 xfs_da_do_buf(
2065         xfs_trans_t     *trans,
2066         xfs_inode_t     *dp,
2067         xfs_dablk_t     bno,
2068         xfs_daddr_t     *mappedbnop,
2069         xfs_dabuf_t     **bpp,
2070         int             whichfork,
2071         int             caller,
2072         inst_t          *ra)
2073 {
2074         xfs_buf_t       *bp = NULL;
2075         xfs_buf_t       **bplist;
2076         int             error=0;
2077         int             i;
2078         xfs_bmbt_irec_t map;
2079         xfs_bmbt_irec_t *mapp;
2080         xfs_daddr_t     mappedbno;
2081         xfs_mount_t     *mp;
2082         int             nbplist=0;
2083         int             nfsb;
2084         int             nmap;
2085         xfs_dabuf_t     *rbp;
2086
2087         mp = dp->i_mount;
2088         if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2089                 nfsb = mp->m_dirblkfsbs;
2090         else
2091                 nfsb = 1;
2092         mappedbno = *mappedbnop;
2093         /*
2094          * Caller doesn't have a mapping.  -2 means don't complain
2095          * if we land in a hole.
2096          */
2097         if (mappedbno == -1 || mappedbno == -2) {
2098                 /*
2099                  * Optimize the one-block case.
2100                  */
2101                 if (nfsb == 1) {
2102                         xfs_fsblock_t   fsb;
2103
2104                         if ((error =
2105                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
2106                                     (xfs_fileoff_t)bno))) {
2107                                 return error;
2108                         }
2109                         mapp = &map;
2110                         if (fsb == NULLFSBLOCK) {
2111                                 nmap = 0;
2112                         } else {
2113                                 map.br_startblock = fsb;
2114                                 map.br_startoff = (xfs_fileoff_t)bno;
2115                                 map.br_blockcount = 1;
2116                                 nmap = 1;
2117                         }
2118                 } else {
2119                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2120                         nmap = nfsb;
2121                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
2122                                         nfsb,
2123                                         XFS_BMAPI_METADATA |
2124                                                 XFS_BMAPI_AFLAG(whichfork),
2125                                         NULL, 0, mapp, &nmap, NULL)))
2126                                 goto exit0;
2127                 }
2128         } else {
2129                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2130                 map.br_startoff = (xfs_fileoff_t)bno;
2131                 map.br_blockcount = nfsb;
2132                 mapp = &map;
2133                 nmap = 1;
2134         }
2135         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2136                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2137                 if (unlikely(error == EFSCORRUPTED)) {
2138                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2139                                 int     i;
2140                                 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
2141                                         (long long)bno);
2142                                 cmn_err(CE_ALERT, "dir: inode %lld\n",
2143                                         (long long)dp->i_ino);
2144                                 for (i = 0; i < nmap; i++) {
2145                                         cmn_err(CE_ALERT,
2146                                                 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
2147                                                 i,
2148                                                 (long long)mapp[i].br_startoff,
2149                                                 (long long)mapp[i].br_startblock,
2150                                                 (long long)mapp[i].br_blockcount,
2151                                                 mapp[i].br_state);
2152                                 }
2153                         }
2154                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2155                                          XFS_ERRLEVEL_LOW, mp);
2156                 }
2157                 goto exit0;
2158         }
2159         if (caller != 3 && nmap > 1) {
2160                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2161                 nbplist = 0;
2162         } else
2163                 bplist = NULL;
2164         /*
2165          * Turn the mapping(s) into buffer(s).
2166          */
2167         for (i = 0; i < nmap; i++) {
2168                 int     nmapped;
2169
2170                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2171                 if (i == 0)
2172                         *mappedbnop = mappedbno;
2173                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2174                 switch (caller) {
2175                 case 0:
2176                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2177                                 mappedbno, nmapped, 0);
2178                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2179                         break;
2180                 case 1:
2181                 case 2:
2182                         bp = NULL;
2183                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2184                                 mappedbno, nmapped, 0, &bp);
2185                         break;
2186                 case 3:
2187                         xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
2188                         error = 0;
2189                         bp = NULL;
2190                         break;
2191                 }
2192                 if (error) {
2193                         if (bp)
2194                                 xfs_trans_brelse(trans, bp);
2195                         goto exit1;
2196                 }
2197                 if (!bp)
2198                         continue;
2199                 if (caller == 1) {
2200                         if (whichfork == XFS_ATTR_FORK) {
2201                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2202                                                 XFS_ATTR_BTREE_REF);
2203                         } else {
2204                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2205                                                 XFS_DIR_BTREE_REF);
2206                         }
2207                 }
2208                 if (bplist) {
2209                         bplist[nbplist++] = bp;
2210                 }
2211         }
2212         /*
2213          * Build a dabuf structure.
2214          */
2215         if (bplist) {
2216                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
2217         } else if (bp)
2218                 rbp = xfs_da_buf_make(1, &bp, ra);
2219         else
2220                 rbp = NULL;
2221         /*
2222          * For read_buf, check the magic number.
2223          */
2224         if (caller == 1) {
2225                 xfs_dir2_data_t         *data;
2226                 xfs_dir2_free_t         *free;
2227                 xfs_da_blkinfo_t        *info;
2228                 uint                    magic, magic1;
2229
2230                 info = rbp->data;
2231                 data = rbp->data;
2232                 free = rbp->data;
2233                 magic = INT_GET(info->magic, ARCH_CONVERT);
2234                 magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
2235                 if (unlikely(
2236                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2237                                    (magic != XFS_DIR_LEAF_MAGIC) &&
2238                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
2239                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
2240                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
2241                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2242                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
2243                                    (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
2244                                 mp, XFS_ERRTAG_DA_READ_BUF,
2245                                 XFS_RANDOM_DA_READ_BUF))) {
2246                         xfs_buftrace("DA READ ERROR", rbp->bps[0]);
2247                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2248                                              XFS_ERRLEVEL_LOW, mp, info);
2249                         error = XFS_ERROR(EFSCORRUPTED);
2250                         xfs_da_brelse(trans, rbp);
2251                         nbplist = 0;
2252                         goto exit1;
2253                 }
2254         }
2255         if (bplist) {
2256                 kmem_free(bplist, sizeof(*bplist) * nmap);
2257         }
2258         if (mapp != &map) {
2259                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2260         }
2261         if (bpp)
2262                 *bpp = rbp;
2263         return 0;
2264 exit1:
2265         if (bplist) {
2266                 for (i = 0; i < nbplist; i++)
2267                         xfs_trans_brelse(trans, bplist[i]);
2268                 kmem_free(bplist, sizeof(*bplist) * nmap);
2269         }
2270 exit0:
2271         if (mapp != &map)
2272                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2273         if (bpp)
2274                 *bpp = NULL;
2275         return error;
2276 }
2277
2278 /*
2279  * Get a buffer for the dir/attr block.
2280  */
2281 int
2282 xfs_da_get_buf(
2283         xfs_trans_t     *trans,
2284         xfs_inode_t     *dp,
2285         xfs_dablk_t     bno,
2286         xfs_daddr_t             mappedbno,
2287         xfs_dabuf_t     **bpp,
2288         int             whichfork)
2289 {
2290         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2291                                                  (inst_t *)__return_address);
2292 }
2293
2294 /*
2295  * Get a buffer for the dir/attr block, fill in the contents.
2296  */
2297 int
2298 xfs_da_read_buf(
2299         xfs_trans_t     *trans,
2300         xfs_inode_t     *dp,
2301         xfs_dablk_t     bno,
2302         xfs_daddr_t             mappedbno,
2303         xfs_dabuf_t     **bpp,
2304         int             whichfork)
2305 {
2306         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2307                 (inst_t *)__return_address);
2308 }
2309
2310 /*
2311  * Readahead the dir/attr block.
2312  */
2313 xfs_daddr_t
2314 xfs_da_reada_buf(
2315         xfs_trans_t     *trans,
2316         xfs_inode_t     *dp,
2317         xfs_dablk_t     bno,
2318         int             whichfork)
2319 {
2320         xfs_daddr_t             rval;
2321
2322         rval = -1;
2323         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2324                         (inst_t *)__return_address))
2325                 return -1;
2326         else
2327                 return rval;
2328 }
2329
2330 /*
2331  * Calculate the number of bits needed to hold i different values.
2332  */
2333 uint
2334 xfs_da_log2_roundup(uint i)
2335 {
2336         uint rval;
2337
2338         for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2339                 if ((1 << rval) >= i)
2340                         break;
2341         }
2342         return(rval);
2343 }
2344
2345 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2346 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
2347
2348 /*
2349  * Allocate a dir-state structure.
2350  * We don't put them on the stack since they're large.
2351  */
2352 xfs_da_state_t *
2353 xfs_da_state_alloc(void)
2354 {
2355         return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
2356 }
2357
2358 /*
2359  * Kill the altpath contents of a da-state structure.
2360  */
2361 STATIC void
2362 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2363 {
2364         int     i;
2365
2366         for (i = 0; i < state->altpath.active; i++) {
2367                 if (state->altpath.blk[i].bp) {
2368                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2369                                 xfs_da_buf_done(state->altpath.blk[i].bp);
2370                         state->altpath.blk[i].bp = NULL;
2371                 }
2372         }
2373         state->altpath.active = 0;
2374 }
2375
2376 /*
2377  * Free a da-state structure.
2378  */
2379 void
2380 xfs_da_state_free(xfs_da_state_t *state)
2381 {
2382         int     i;
2383
2384         xfs_da_state_kill_altpath(state);
2385         for (i = 0; i < state->path.active; i++) {
2386                 if (state->path.blk[i].bp)
2387                         xfs_da_buf_done(state->path.blk[i].bp);
2388         }
2389         if (state->extravalid && state->extrablk.bp)
2390                 xfs_da_buf_done(state->extrablk.bp);
2391 #ifdef DEBUG
2392         memset((char *)state, 0, sizeof(*state));
2393 #endif /* DEBUG */
2394         kmem_zone_free(xfs_da_state_zone, state);
2395 }
2396
2397 #ifdef XFS_DABUF_DEBUG
2398 xfs_dabuf_t     *xfs_dabuf_global_list;
2399 lock_t          xfs_dabuf_global_lock;
2400 #endif
2401
2402 /*
2403  * Create a dabuf.
2404  */
2405 /* ARGSUSED */
2406 STATIC xfs_dabuf_t *
2407 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2408 {
2409         xfs_buf_t       *bp;
2410         xfs_dabuf_t     *dabuf;
2411         int             i;
2412         int             off;
2413
2414         if (nbuf == 1)
2415                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
2416         else
2417                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
2418         dabuf->dirty = 0;
2419 #ifdef XFS_DABUF_DEBUG
2420         dabuf->ra = ra;
2421         dabuf->target = XFS_BUF_TARGET(bps[0]);
2422         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2423 #endif
2424         if (nbuf == 1) {
2425                 dabuf->nbuf = 1;
2426                 bp = bps[0];
2427                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2428                 dabuf->data = XFS_BUF_PTR(bp);
2429                 dabuf->bps[0] = bp;
2430         } else {
2431                 dabuf->nbuf = nbuf;
2432                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2433                         dabuf->bps[i] = bp = bps[i];
2434                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2435                 }
2436                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2437                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2438                         bp = bps[i];
2439                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2440                                 XFS_BUF_COUNT(bp));
2441                 }
2442         }
2443 #ifdef XFS_DABUF_DEBUG
2444         {
2445                 SPLDECL(s);
2446                 xfs_dabuf_t     *p;
2447
2448                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2449                 for (p = xfs_dabuf_global_list; p; p = p->next) {
2450                         ASSERT(p->blkno != dabuf->blkno ||
2451                                p->target != dabuf->target);
2452                 }
2453                 dabuf->prev = NULL;
2454                 if (xfs_dabuf_global_list)
2455                         xfs_dabuf_global_list->prev = dabuf;
2456                 dabuf->next = xfs_dabuf_global_list;
2457                 xfs_dabuf_global_list = dabuf;
2458                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2459         }
2460 #endif
2461         return dabuf;
2462 }
2463
2464 /*
2465  * Un-dirty a dabuf.
2466  */
2467 STATIC void
2468 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2469 {
2470         xfs_buf_t       *bp;
2471         int             i;
2472         int             off;
2473
2474         if (dabuf->dirty) {
2475                 ASSERT(dabuf->nbuf > 1);
2476                 dabuf->dirty = 0;
2477                 for (i = off = 0; i < dabuf->nbuf;
2478                                 i++, off += XFS_BUF_COUNT(bp)) {
2479                         bp = dabuf->bps[i];
2480                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2481                                 XFS_BUF_COUNT(bp));
2482                 }
2483         }
2484 }
2485
2486 /*
2487  * Release a dabuf.
2488  */
2489 void
2490 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2491 {
2492         ASSERT(dabuf);
2493         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2494         if (dabuf->dirty)
2495                 xfs_da_buf_clean(dabuf);
2496         if (dabuf->nbuf > 1)
2497                 kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
2498 #ifdef XFS_DABUF_DEBUG
2499         {
2500                 SPLDECL(s);
2501
2502                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2503                 if (dabuf->prev)
2504                         dabuf->prev->next = dabuf->next;
2505                 else
2506                         xfs_dabuf_global_list = dabuf->next;
2507                 if (dabuf->next)
2508                         dabuf->next->prev = dabuf->prev;
2509                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2510         }
2511         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2512 #endif
2513         if (dabuf->nbuf == 1)
2514                 kmem_zone_free(xfs_dabuf_zone, dabuf);
2515         else
2516                 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
2517 }
2518
2519 /*
2520  * Log transaction from a dabuf.
2521  */
2522 void
2523 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2524 {
2525         xfs_buf_t       *bp;
2526         uint            f;
2527         int             i;
2528         uint            l;
2529         int             off;
2530
2531         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2532         if (dabuf->nbuf == 1) {
2533                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2534                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2535                 return;
2536         }
2537         dabuf->dirty = 1;
2538         ASSERT(first <= last);
2539         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2540                 bp = dabuf->bps[i];
2541                 f = off;
2542                 l = f + XFS_BUF_COUNT(bp) - 1;
2543                 if (f < first)
2544                         f = first;
2545                 if (l > last)
2546                         l = last;
2547                 if (f <= l)
2548                         xfs_trans_log_buf(tp, bp, f - off, l - off);
2549                 /*
2550                  * B_DONE is set by xfs_trans_log buf.
2551                  * If we don't set it on a new buffer (get not read)
2552                  * then if we don't put anything in the buffer it won't
2553                  * be set, and at commit it it released into the cache,
2554                  * and then a read will fail.
2555                  */
2556                 else if (!(XFS_BUF_ISDONE(bp)))
2557                   XFS_BUF_DONE(bp);
2558         }
2559         ASSERT(last < off);
2560 }
2561
2562 /*
2563  * Release dabuf from a transaction.
2564  * Have to free up the dabuf before the buffers are released,
2565  * since the synchronization on the dabuf is really the lock on the buffer.
2566  */
2567 void
2568 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2569 {
2570         xfs_buf_t       *bp;
2571         xfs_buf_t       **bplist;
2572         int             i;
2573         int             nbuf;
2574
2575         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2576         if ((nbuf = dabuf->nbuf) == 1) {
2577                 bplist = &bp;
2578                 bp = dabuf->bps[0];
2579         } else {
2580                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2581                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2582         }
2583         xfs_da_buf_done(dabuf);
2584         for (i = 0; i < nbuf; i++)
2585                 xfs_trans_brelse(tp, bplist[i]);
2586         if (bplist != &bp)
2587                 kmem_free(bplist, nbuf * sizeof(*bplist));
2588 }
2589
2590 /*
2591  * Invalidate dabuf from a transaction.
2592  */
2593 void
2594 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2595 {
2596         xfs_buf_t       *bp;
2597         xfs_buf_t       **bplist;
2598         int             i;
2599         int             nbuf;
2600
2601         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2602         if ((nbuf = dabuf->nbuf) == 1) {
2603                 bplist = &bp;
2604                 bp = dabuf->bps[0];
2605         } else {
2606                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2607                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2608         }
2609         xfs_da_buf_done(dabuf);
2610         for (i = 0; i < nbuf; i++)
2611                 xfs_trans_binval(tp, bplist[i]);
2612         if (bplist != &bp)
2613                 kmem_free(bplist, nbuf * sizeof(*bplist));
2614 }
2615
2616 /*
2617  * Get the first daddr from a dabuf.
2618  */
2619 xfs_daddr_t
2620 xfs_da_blkno(xfs_dabuf_t *dabuf)
2621 {
2622         ASSERT(dabuf->nbuf);
2623         ASSERT(dabuf->data);
2624         return XFS_BUF_ADDR(dabuf->bps[0]);
2625 }