]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/nilfs2/btree.c
f5a0ec64e1aa6446bdcc781feae6b45efa49b9b8
[mv-sheeva.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         brelse(path[level].bp_bh);
113                         path[level].bp_bh = NULL;
114                 }
115                 /* sib_bh is released or deleted by prepare or commit
116                  * operations. */
117                 path[level].bp_sib_bh = NULL;
118                 path[level].bp_index = 0;
119                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
120                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_op = NULL;
122         }
123 }
124
125 /*
126  * B-tree node operations
127  */
128 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
129                                  struct buffer_head **bhp)
130 {
131         struct address_space *btnc =
132                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
133         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
134 }
135
136 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
137                                      __u64 ptr, struct buffer_head **bhp)
138 {
139         struct address_space *btnc =
140                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
141         int ret;
142
143         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
144         if (!ret)
145                 set_buffer_nilfs_volatile(*bhp);
146         return ret;
147 }
148
149 static inline int
150 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
151                            const struct nilfs_btree_node *node)
152 {
153         return node->bn_flags;
154 }
155
156 static inline void
157 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
158                            struct nilfs_btree_node *node,
159                            int flags)
160 {
161         node->bn_flags = flags;
162 }
163
164 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
165                                         const struct nilfs_btree_node *node)
166 {
167         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
168 }
169
170 static inline int
171 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
172                            const struct nilfs_btree_node *node)
173 {
174         return node->bn_level;
175 }
176
177 static inline void
178 nilfs_btree_node_set_level(struct nilfs_btree *btree,
179                            struct nilfs_btree_node *node,
180                            int level)
181 {
182         node->bn_level = level;
183 }
184
185 static inline int
186 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
187                                const struct nilfs_btree_node *node)
188 {
189         return le16_to_cpu(node->bn_nchildren);
190 }
191
192 static inline void
193 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
194                                struct nilfs_btree_node *node,
195                                int nchildren)
196 {
197         node->bn_nchildren = cpu_to_le16(nchildren);
198 }
199
200 static inline int
201 nilfs_btree_node_size(const struct nilfs_btree *btree)
202 {
203         return 1 << btree->bt_bmap.b_inode->i_blkbits;
204 }
205
206 static inline int
207 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
208                                const struct nilfs_btree_node *node)
209 {
210         return nilfs_btree_node_root(btree, node) ?
211                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
212                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
213 }
214
215 static inline int
216 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
217                                const struct nilfs_btree_node *node)
218 {
219         return nilfs_btree_node_root(btree, node) ?
220                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
221                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
222 }
223
224 static inline __le64 *
225 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
226                        const struct nilfs_btree_node *node)
227 {
228         return (__le64 *)((char *)(node + 1) +
229                           (nilfs_btree_node_root(btree, node) ?
230                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
231 }
232
233 static inline __le64 *
234 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
235                        const struct nilfs_btree_node *node)
236 {
237         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
238                           nilfs_btree_node_nchildren_max(btree, node));
239 }
240
241 static inline __u64
242 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
243                          const struct nilfs_btree_node *node, int index)
244 {
245         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
246                                         index));
247 }
248
249 static inline void
250 nilfs_btree_node_set_key(struct nilfs_btree *btree,
251                          struct nilfs_btree_node *node, int index, __u64 key)
252 {
253         *(nilfs_btree_node_dkeys(btree, node) + index) =
254                 nilfs_bmap_key_to_dkey(key);
255 }
256
257 static inline __u64
258 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
259                          const struct nilfs_btree_node *node,
260                          int index)
261 {
262         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
263                                         index));
264 }
265
266 static inline void
267 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
268                          struct nilfs_btree_node *node,
269                          int index,
270                          __u64 ptr)
271 {
272         *(nilfs_btree_node_dptrs(btree, node) + index) =
273                 nilfs_bmap_ptr_to_dptr(ptr);
274 }
275
276 static void nilfs_btree_node_init(struct nilfs_btree *btree,
277                                   struct nilfs_btree_node *node,
278                                   int flags, int level, int nchildren,
279                                   const __u64 *keys, const __u64 *ptrs)
280 {
281         __le64 *dkeys;
282         __le64 *dptrs;
283         int i;
284
285         nilfs_btree_node_set_flags(btree, node, flags);
286         nilfs_btree_node_set_level(btree, node, level);
287         nilfs_btree_node_set_nchildren(btree, node, nchildren);
288
289         dkeys = nilfs_btree_node_dkeys(btree, node);
290         dptrs = nilfs_btree_node_dptrs(btree, node);
291         for (i = 0; i < nchildren; i++) {
292                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
293                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
294         }
295 }
296
297 /* Assume the buffer heads corresponding to left and right are locked. */
298 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
299                                        struct nilfs_btree_node *left,
300                                        struct nilfs_btree_node *right,
301                                        int n)
302 {
303         __le64 *ldkeys, *rdkeys;
304         __le64 *ldptrs, *rdptrs;
305         int lnchildren, rnchildren;
306
307         ldkeys = nilfs_btree_node_dkeys(btree, left);
308         ldptrs = nilfs_btree_node_dptrs(btree, left);
309         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
310
311         rdkeys = nilfs_btree_node_dkeys(btree, right);
312         rdptrs = nilfs_btree_node_dptrs(btree, right);
313         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
314
315         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
316         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
317         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
318         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
319
320         lnchildren += n;
321         rnchildren -= n;
322         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
323         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
324 }
325
326 /* Assume that the buffer heads corresponding to left and right are locked. */
327 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
328                                         struct nilfs_btree_node *left,
329                                         struct nilfs_btree_node *right,
330                                         int n)
331 {
332         __le64 *ldkeys, *rdkeys;
333         __le64 *ldptrs, *rdptrs;
334         int lnchildren, rnchildren;
335
336         ldkeys = nilfs_btree_node_dkeys(btree, left);
337         ldptrs = nilfs_btree_node_dptrs(btree, left);
338         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
339
340         rdkeys = nilfs_btree_node_dkeys(btree, right);
341         rdptrs = nilfs_btree_node_dptrs(btree, right);
342         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
343
344         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
345         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
346         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
347         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
348
349         lnchildren -= n;
350         rnchildren += n;
351         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
352         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
353 }
354
355 /* Assume that the buffer head corresponding to node is locked. */
356 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
357                                     struct nilfs_btree_node *node,
358                                     __u64 key, __u64 ptr, int index)
359 {
360         __le64 *dkeys;
361         __le64 *dptrs;
362         int nchildren;
363
364         dkeys = nilfs_btree_node_dkeys(btree, node);
365         dptrs = nilfs_btree_node_dptrs(btree, node);
366         nchildren = nilfs_btree_node_get_nchildren(btree, node);
367         if (index < nchildren) {
368                 memmove(dkeys + index + 1, dkeys + index,
369                         (nchildren - index) * sizeof(*dkeys));
370                 memmove(dptrs + index + 1, dptrs + index,
371                         (nchildren - index) * sizeof(*dptrs));
372         }
373         dkeys[index] = nilfs_bmap_key_to_dkey(key);
374         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
375         nchildren++;
376         nilfs_btree_node_set_nchildren(btree, node, nchildren);
377 }
378
379 /* Assume that the buffer head corresponding to node is locked. */
380 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
381                                     struct nilfs_btree_node *node,
382                                     __u64 *keyp, __u64 *ptrp, int index)
383 {
384         __u64 key;
385         __u64 ptr;
386         __le64 *dkeys;
387         __le64 *dptrs;
388         int nchildren;
389
390         dkeys = nilfs_btree_node_dkeys(btree, node);
391         dptrs = nilfs_btree_node_dptrs(btree, node);
392         key = nilfs_bmap_dkey_to_key(dkeys[index]);
393         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
394         nchildren = nilfs_btree_node_get_nchildren(btree, node);
395         if (keyp != NULL)
396                 *keyp = key;
397         if (ptrp != NULL)
398                 *ptrp = ptr;
399
400         if (index < nchildren - 1) {
401                 memmove(dkeys + index, dkeys + index + 1,
402                         (nchildren - index - 1) * sizeof(*dkeys));
403                 memmove(dptrs + index, dptrs + index + 1,
404                         (nchildren - index - 1) * sizeof(*dptrs));
405         }
406         nchildren--;
407         nilfs_btree_node_set_nchildren(btree, node, nchildren);
408 }
409
410 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
411                                    const struct nilfs_btree_node *node,
412                                    __u64 key, int *indexp)
413 {
414         __u64 nkey;
415         int index, low, high, s;
416
417         /* binary search */
418         low = 0;
419         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
420         index = 0;
421         s = 0;
422         while (low <= high) {
423                 index = (low + high) / 2;
424                 nkey = nilfs_btree_node_get_key(btree, node, index);
425                 if (nkey == key) {
426                         s = 0;
427                         goto out;
428                 } else if (nkey < key) {
429                         low = index + 1;
430                         s = -1;
431                 } else {
432                         high = index - 1;
433                         s = 1;
434                 }
435         }
436
437         /* adjust index */
438         if (nilfs_btree_node_get_level(btree, node) >
439             NILFS_BTREE_LEVEL_NODE_MIN) {
440                 if ((s > 0) && (index > 0))
441                         index--;
442         } else if (s < 0)
443                 index++;
444
445  out:
446         *indexp = index;
447
448         return s == 0;
449 }
450
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_root(const struct nilfs_btree *btree)
453 {
454         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
455 }
456
457 static inline struct nilfs_btree_node *
458 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
459                              const struct nilfs_btree_path *path,
460                              int level)
461 {
462         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
463 }
464
465 static inline struct nilfs_btree_node *
466 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
467                          const struct nilfs_btree_path *path,
468                          int level)
469 {
470         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
471 }
472
473 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
474 {
475         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
476                 + 1;
477 }
478
479 static inline struct nilfs_btree_node *
480 nilfs_btree_get_node(const struct nilfs_btree *btree,
481                      const struct nilfs_btree_path *path,
482                      int level)
483 {
484         return (level == nilfs_btree_height(btree) - 1) ?
485                 nilfs_btree_get_root(btree) :
486                 nilfs_btree_get_nonroot_node(btree, path, level);
487 }
488
489 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
490                                  struct nilfs_btree_path *path,
491                                  __u64 key, __u64 *ptrp, int minlevel)
492 {
493         struct nilfs_btree_node *node;
494         __u64 ptr;
495         int level, index, found, ret;
496
497         node = nilfs_btree_get_root(btree);
498         level = nilfs_btree_node_get_level(btree, node);
499         if ((level < minlevel) ||
500             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
501                 return -ENOENT;
502
503         found = nilfs_btree_node_lookup(btree, node, key, &index);
504         ptr = nilfs_btree_node_get_ptr(btree, node, index);
505         path[level].bp_bh = NULL;
506         path[level].bp_index = index;
507
508         for (level--; level >= minlevel; level--) {
509                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
510                 if (ret < 0)
511                         return ret;
512                 node = nilfs_btree_get_nonroot_node(btree, path, level);
513                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
514                 if (!found)
515                         found = nilfs_btree_node_lookup(btree, node, key,
516                                                         &index);
517                 else
518                         index = 0;
519                 if (index < nilfs_btree_node_nchildren_max(btree, node))
520                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
521                 else {
522                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
523                         /* insert */
524                         ptr = NILFS_BMAP_INVALID_PTR;
525                 }
526                 path[level].bp_index = index;
527         }
528         if (!found)
529                 return -ENOENT;
530
531         if (ptrp != NULL)
532                 *ptrp = ptr;
533
534         return 0;
535 }
536
537 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
538                                       struct nilfs_btree_path *path,
539                                       __u64 *keyp, __u64 *ptrp)
540 {
541         struct nilfs_btree_node *node;
542         __u64 ptr;
543         int index, level, ret;
544
545         node = nilfs_btree_get_root(btree);
546         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
547         if (index < 0)
548                 return -ENOENT;
549         level = nilfs_btree_node_get_level(btree, node);
550         ptr = nilfs_btree_node_get_ptr(btree, node, index);
551         path[level].bp_bh = NULL;
552         path[level].bp_index = index;
553
554         for (level--; level > 0; level--) {
555                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
556                 if (ret < 0)
557                         return ret;
558                 node = nilfs_btree_get_nonroot_node(btree, path, level);
559                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
560                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
561                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
562                 path[level].bp_index = index;
563         }
564
565         if (keyp != NULL)
566                 *keyp = nilfs_btree_node_get_key(btree, node, index);
567         if (ptrp != NULL)
568                 *ptrp = ptr;
569
570         return 0;
571 }
572
573 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
574                               __u64 key, int level, __u64 *ptrp)
575 {
576         struct nilfs_btree *btree;
577         struct nilfs_btree_path *path;
578         __u64 ptr;
579         int ret;
580
581         btree = (struct nilfs_btree *)bmap;
582         path = nilfs_btree_alloc_path(btree);
583         if (path == NULL)
584                 return -ENOMEM;
585         nilfs_btree_init_path(btree, path);
586
587         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
588
589         if (ptrp != NULL)
590                 *ptrp = ptr;
591
592         nilfs_btree_clear_path(btree, path);
593         nilfs_btree_free_path(btree, path);
594
595         return ret;
596 }
597
598 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
599                                     struct nilfs_btree_path *path,
600                                     int level, __u64 key)
601 {
602         if (level < nilfs_btree_height(btree) - 1) {
603                 do {
604                         lock_buffer(path[level].bp_bh);
605                         nilfs_btree_node_set_key(
606                                 btree,
607                                 nilfs_btree_get_nonroot_node(
608                                         btree, path, level),
609                                 path[level].bp_index, key);
610                         if (!buffer_dirty(path[level].bp_bh))
611                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
612                         unlock_buffer(path[level].bp_bh);
613                 } while ((path[level].bp_index == 0) &&
614                          (++level < nilfs_btree_height(btree) - 1));
615         }
616
617         /* root */
618         if (level == nilfs_btree_height(btree) - 1) {
619                 nilfs_btree_node_set_key(btree,
620                                          nilfs_btree_get_root(btree),
621                                          path[level].bp_index, key);
622         }
623 }
624
625 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
626                                   struct nilfs_btree_path *path,
627                                   int level, __u64 *keyp, __u64 *ptrp)
628 {
629         struct nilfs_btree_node *node;
630
631         if (level < nilfs_btree_height(btree) - 1) {
632                 lock_buffer(path[level].bp_bh);
633                 node = nilfs_btree_get_nonroot_node(btree, path, level);
634                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
635                                         path[level].bp_index);
636                 if (!buffer_dirty(path[level].bp_bh))
637                         nilfs_btnode_mark_dirty(path[level].bp_bh);
638                 unlock_buffer(path[level].bp_bh);
639
640                 if (path[level].bp_index == 0)
641                         nilfs_btree_promote_key(btree, path, level + 1,
642                                                 nilfs_btree_node_get_key(
643                                                         btree, node, 0));
644         } else {
645                 node = nilfs_btree_get_root(btree);
646                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
647                                         path[level].bp_index);
648         }
649 }
650
651 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
652                                    struct nilfs_btree_path *path,
653                                    int level, __u64 *keyp, __u64 *ptrp)
654 {
655         struct nilfs_btree_node *node, *left;
656         int nchildren, lnchildren, n, move;
657
658         lock_buffer(path[level].bp_bh);
659         lock_buffer(path[level].bp_sib_bh);
660
661         node = nilfs_btree_get_nonroot_node(btree, path, level);
662         left = nilfs_btree_get_sib_node(btree, path, level);
663         nchildren = nilfs_btree_node_get_nchildren(btree, node);
664         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
665         move = 0;
666
667         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
668         if (n > path[level].bp_index) {
669                 /* move insert point */
670                 n--;
671                 move = 1;
672         }
673
674         nilfs_btree_node_move_left(btree, left, node, n);
675
676         if (!buffer_dirty(path[level].bp_bh))
677                 nilfs_btnode_mark_dirty(path[level].bp_bh);
678         if (!buffer_dirty(path[level].bp_sib_bh))
679                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
680
681         unlock_buffer(path[level].bp_bh);
682         unlock_buffer(path[level].bp_sib_bh);
683
684         nilfs_btree_promote_key(btree, path, level + 1,
685                                 nilfs_btree_node_get_key(btree, node, 0));
686
687         if (move) {
688                 brelse(path[level].bp_bh);
689                 path[level].bp_bh = path[level].bp_sib_bh;
690                 path[level].bp_sib_bh = NULL;
691                 path[level].bp_index += lnchildren;
692                 path[level + 1].bp_index--;
693         } else {
694                 brelse(path[level].bp_sib_bh);
695                 path[level].bp_sib_bh = NULL;
696                 path[level].bp_index -= n;
697         }
698
699         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
700 }
701
702 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
703                                     struct nilfs_btree_path *path,
704                                     int level, __u64 *keyp, __u64 *ptrp)
705 {
706         struct nilfs_btree_node *node, *right;
707         int nchildren, rnchildren, n, move;
708
709         lock_buffer(path[level].bp_bh);
710         lock_buffer(path[level].bp_sib_bh);
711
712         node = nilfs_btree_get_nonroot_node(btree, path, level);
713         right = nilfs_btree_get_sib_node(btree, path, level);
714         nchildren = nilfs_btree_node_get_nchildren(btree, node);
715         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
716         move = 0;
717
718         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
719         if (n > nchildren - path[level].bp_index) {
720                 /* move insert point */
721                 n--;
722                 move = 1;
723         }
724
725         nilfs_btree_node_move_right(btree, node, right, n);
726
727         if (!buffer_dirty(path[level].bp_bh))
728                 nilfs_btnode_mark_dirty(path[level].bp_bh);
729         if (!buffer_dirty(path[level].bp_sib_bh))
730                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
731
732         unlock_buffer(path[level].bp_bh);
733         unlock_buffer(path[level].bp_sib_bh);
734
735         path[level + 1].bp_index++;
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(btree, right, 0));
738         path[level + 1].bp_index--;
739
740         if (move) {
741                 brelse(path[level].bp_bh);
742                 path[level].bp_bh = path[level].bp_sib_bh;
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -=
745                         nilfs_btree_node_get_nchildren(btree, node);
746                 path[level + 1].bp_index++;
747         } else {
748                 brelse(path[level].bp_sib_bh);
749                 path[level].bp_sib_bh = NULL;
750         }
751
752         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
753 }
754
755 static void nilfs_btree_split(struct nilfs_btree *btree,
756                               struct nilfs_btree_path *path,
757                               int level, __u64 *keyp, __u64 *ptrp)
758 {
759         struct nilfs_btree_node *node, *right;
760         __u64 newkey;
761         __u64 newptr;
762         int nchildren, n, move;
763
764         lock_buffer(path[level].bp_bh);
765         lock_buffer(path[level].bp_sib_bh);
766
767         node = nilfs_btree_get_nonroot_node(btree, path, level);
768         right = nilfs_btree_get_sib_node(btree, path, level);
769         nchildren = nilfs_btree_node_get_nchildren(btree, node);
770         move = 0;
771
772         n = (nchildren + 1) / 2;
773         if (n > nchildren - path[level].bp_index) {
774                 n--;
775                 move = 1;
776         }
777
778         nilfs_btree_node_move_right(btree, node, right, n);
779
780         if (!buffer_dirty(path[level].bp_bh))
781                 nilfs_btnode_mark_dirty(path[level].bp_bh);
782         if (!buffer_dirty(path[level].bp_sib_bh))
783                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
784
785         unlock_buffer(path[level].bp_bh);
786         unlock_buffer(path[level].bp_sib_bh);
787
788         newkey = nilfs_btree_node_get_key(btree, right, 0);
789         newptr = path[level].bp_newreq.bpr_ptr;
790
791         if (move) {
792                 path[level].bp_index -=
793                         nilfs_btree_node_get_nchildren(btree, node);
794                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
795                                         path[level].bp_index);
796
797                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
798                 *ptrp = path[level].bp_newreq.bpr_ptr;
799
800                 brelse(path[level].bp_bh);
801                 path[level].bp_bh = path[level].bp_sib_bh;
802                 path[level].bp_sib_bh = NULL;
803         } else {
804                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
805
806                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
807                 *ptrp = path[level].bp_newreq.bpr_ptr;
808
809                 brelse(path[level].bp_sib_bh);
810                 path[level].bp_sib_bh = NULL;
811         }
812
813         path[level + 1].bp_index++;
814 }
815
816 static void nilfs_btree_grow(struct nilfs_btree *btree,
817                              struct nilfs_btree_path *path,
818                              int level, __u64 *keyp, __u64 *ptrp)
819 {
820         struct nilfs_btree_node *root, *child;
821         int n;
822
823         lock_buffer(path[level].bp_sib_bh);
824
825         root = nilfs_btree_get_root(btree);
826         child = nilfs_btree_get_sib_node(btree, path, level);
827
828         n = nilfs_btree_node_get_nchildren(btree, root);
829
830         nilfs_btree_node_move_right(btree, root, child, n);
831         nilfs_btree_node_set_level(btree, root, level + 1);
832
833         if (!buffer_dirty(path[level].bp_sib_bh))
834                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
835
836         unlock_buffer(path[level].bp_sib_bh);
837
838         path[level].bp_bh = path[level].bp_sib_bh;
839         path[level].bp_sib_bh = NULL;
840
841         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
842
843         *keyp = nilfs_btree_node_get_key(btree, child, 0);
844         *ptrp = path[level].bp_newreq.bpr_ptr;
845 }
846
847 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
848                                    const struct nilfs_btree_path *path)
849 {
850         struct nilfs_btree_node *node;
851         int level;
852
853         if (path == NULL)
854                 return NILFS_BMAP_INVALID_PTR;
855
856         /* left sibling */
857         level = NILFS_BTREE_LEVEL_NODE_MIN;
858         if (path[level].bp_index > 0) {
859                 node = nilfs_btree_get_node(btree, path, level);
860                 return nilfs_btree_node_get_ptr(btree, node,
861                                                 path[level].bp_index - 1);
862         }
863
864         /* parent */
865         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
866         if (level <= nilfs_btree_height(btree) - 1) {
867                 node = nilfs_btree_get_node(btree, path, level);
868                 return nilfs_btree_node_get_ptr(btree, node,
869                                                 path[level].bp_index);
870         }
871
872         return NILFS_BMAP_INVALID_PTR;
873 }
874
875 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
876                                        const struct nilfs_btree_path *path,
877                                        __u64 key)
878 {
879         __u64 ptr;
880
881         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
882         if (ptr != NILFS_BMAP_INVALID_PTR)
883                 /* sequential access */
884                 return ptr;
885         else {
886                 ptr = nilfs_btree_find_near(btree, path);
887                 if (ptr != NILFS_BMAP_INVALID_PTR)
888                         /* near */
889                         return ptr;
890         }
891         /* block group */
892         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
893 }
894
895 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
896                                      __u64 ptr)
897 {
898         btree->bt_bmap.b_last_allocated_key = key;
899         btree->bt_bmap.b_last_allocated_ptr = ptr;
900 }
901
902 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
903                                       struct nilfs_btree_path *path,
904                                       int *levelp, __u64 key, __u64 ptr,
905                                       struct nilfs_bmap_stats *stats)
906 {
907         struct buffer_head *bh;
908         struct nilfs_btree_node *node, *parent, *sib;
909         __u64 sibptr;
910         int pindex, level, ret;
911
912         stats->bs_nblocks = 0;
913         level = NILFS_BTREE_LEVEL_DATA;
914
915         /* allocate a new ptr for data block */
916         if (btree->bt_ops->btop_find_target != NULL)
917                 path[level].bp_newreq.bpr_ptr =
918                         btree->bt_ops->btop_find_target(btree, path, key);
919
920         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
921                 &btree->bt_bmap, &path[level].bp_newreq);
922         if (ret < 0)
923                 goto err_out_data;
924
925         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
926              level < nilfs_btree_height(btree) - 1;
927              level++) {
928                 node = nilfs_btree_get_nonroot_node(btree, path, level);
929                 if (nilfs_btree_node_get_nchildren(btree, node) <
930                     nilfs_btree_node_nchildren_max(btree, node)) {
931                         path[level].bp_op = nilfs_btree_do_insert;
932                         stats->bs_nblocks++;
933                         goto out;
934                 }
935
936                 parent = nilfs_btree_get_node(btree, path, level + 1);
937                 pindex = path[level + 1].bp_index;
938
939                 /* left sibling */
940                 if (pindex > 0) {
941                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
942                                                           pindex - 1);
943                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
944                         if (ret < 0)
945                                 goto err_out_child_node;
946                         sib = (struct nilfs_btree_node *)bh->b_data;
947                         if (nilfs_btree_node_get_nchildren(btree, sib) <
948                             nilfs_btree_node_nchildren_max(btree, sib)) {
949                                 path[level].bp_sib_bh = bh;
950                                 path[level].bp_op = nilfs_btree_carry_left;
951                                 stats->bs_nblocks++;
952                                 goto out;
953                         } else
954                                 brelse(bh);
955                 }
956
957                 /* right sibling */
958                 if (pindex <
959                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
960                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
961                                                           pindex + 1);
962                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
963                         if (ret < 0)
964                                 goto err_out_child_node;
965                         sib = (struct nilfs_btree_node *)bh->b_data;
966                         if (nilfs_btree_node_get_nchildren(btree, sib) <
967                             nilfs_btree_node_nchildren_max(btree, sib)) {
968                                 path[level].bp_sib_bh = bh;
969                                 path[level].bp_op = nilfs_btree_carry_right;
970                                 stats->bs_nblocks++;
971                                 goto out;
972                         } else
973                                 brelse(bh);
974                 }
975
976                 /* split */
977                 path[level].bp_newreq.bpr_ptr =
978                         path[level - 1].bp_newreq.bpr_ptr + 1;
979                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
980                         &btree->bt_bmap, &path[level].bp_newreq);
981                 if (ret < 0)
982                         goto err_out_child_node;
983                 ret = nilfs_btree_get_new_block(btree,
984                                                 path[level].bp_newreq.bpr_ptr,
985                                                 &bh);
986                 if (ret < 0)
987                         goto err_out_curr_node;
988
989                 stats->bs_nblocks++;
990
991                 lock_buffer(bh);
992                 nilfs_btree_node_init(btree,
993                                       (struct nilfs_btree_node *)bh->b_data,
994                                       0, level, 0, NULL, NULL);
995                 unlock_buffer(bh);
996                 path[level].bp_sib_bh = bh;
997                 path[level].bp_op = nilfs_btree_split;
998         }
999
1000         /* root */
1001         node = nilfs_btree_get_root(btree);
1002         if (nilfs_btree_node_get_nchildren(btree, node) <
1003             nilfs_btree_node_nchildren_max(btree, node)) {
1004                 path[level].bp_op = nilfs_btree_do_insert;
1005                 stats->bs_nblocks++;
1006                 goto out;
1007         }
1008
1009         /* grow */
1010         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1011         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
1012                 &btree->bt_bmap, &path[level].bp_newreq);
1013         if (ret < 0)
1014                 goto err_out_child_node;
1015         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1016                                         &bh);
1017         if (ret < 0)
1018                 goto err_out_curr_node;
1019
1020         lock_buffer(bh);
1021         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1022                               0, level, 0, NULL, NULL);
1023         unlock_buffer(bh);
1024         path[level].bp_sib_bh = bh;
1025         path[level].bp_op = nilfs_btree_grow;
1026
1027         level++;
1028         path[level].bp_op = nilfs_btree_do_insert;
1029
1030         /* a newly-created node block and a data block are added */
1031         stats->bs_nblocks += 2;
1032
1033         /* success */
1034  out:
1035         *levelp = level;
1036         return ret;
1037
1038         /* error */
1039  err_out_curr_node:
1040         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1041                                                     &path[level].bp_newreq);
1042  err_out_child_node:
1043         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1044                 nilfs_btnode_delete(path[level].bp_sib_bh);
1045                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1046                         &btree->bt_bmap, &path[level].bp_newreq);
1047
1048         }
1049
1050         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1051                                                        &path[level].bp_newreq);
1052  err_out_data:
1053         *levelp = level;
1054         stats->bs_nblocks = 0;
1055         return ret;
1056 }
1057
1058 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1059                                       struct nilfs_btree_path *path,
1060                                       int maxlevel, __u64 key, __u64 ptr)
1061 {
1062         int level;
1063
1064         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1065         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1066         if (btree->bt_ops->btop_set_target != NULL)
1067                 btree->bt_ops->btop_set_target(btree, key, ptr);
1068
1069         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1070                 btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1071                         &btree->bt_bmap, &path[level - 1].bp_newreq);
1072                 path[level].bp_op(btree, path, level, &key, &ptr);
1073         }
1074
1075         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1076                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1077 }
1078
1079 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1080 {
1081         struct nilfs_btree *btree;
1082         struct nilfs_btree_path *path;
1083         struct nilfs_bmap_stats stats;
1084         int level, ret;
1085
1086         btree = (struct nilfs_btree *)bmap;
1087         path = nilfs_btree_alloc_path(btree);
1088         if (path == NULL)
1089                 return -ENOMEM;
1090         nilfs_btree_init_path(btree, path);
1091
1092         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1093                                     NILFS_BTREE_LEVEL_NODE_MIN);
1094         if (ret != -ENOENT) {
1095                 if (ret == 0)
1096                         ret = -EEXIST;
1097                 goto out;
1098         }
1099
1100         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1101         if (ret < 0)
1102                 goto out;
1103         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1104         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1105
1106  out:
1107         nilfs_btree_clear_path(btree, path);
1108         nilfs_btree_free_path(btree, path);
1109         return ret;
1110 }
1111
1112 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1113                                   struct nilfs_btree_path *path,
1114                                   int level, __u64 *keyp, __u64 *ptrp)
1115 {
1116         struct nilfs_btree_node *node;
1117
1118         if (level < nilfs_btree_height(btree) - 1) {
1119                 lock_buffer(path[level].bp_bh);
1120                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1121                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1122                                         path[level].bp_index);
1123                 if (!buffer_dirty(path[level].bp_bh))
1124                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1125                 unlock_buffer(path[level].bp_bh);
1126                 if (path[level].bp_index == 0)
1127                         nilfs_btree_promote_key(btree, path, level + 1,
1128                                 nilfs_btree_node_get_key(btree, node, 0));
1129         } else {
1130                 node = nilfs_btree_get_root(btree);
1131                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1132                                         path[level].bp_index);
1133         }
1134 }
1135
1136 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1137                                     struct nilfs_btree_path *path,
1138                                     int level, __u64 *keyp, __u64 *ptrp)
1139 {
1140         struct nilfs_btree_node *node, *left;
1141         int nchildren, lnchildren, n;
1142
1143         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1144
1145         lock_buffer(path[level].bp_bh);
1146         lock_buffer(path[level].bp_sib_bh);
1147
1148         node = nilfs_btree_get_nonroot_node(btree, path, level);
1149         left = nilfs_btree_get_sib_node(btree, path, level);
1150         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1151         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1152
1153         n = (nchildren + lnchildren) / 2 - nchildren;
1154
1155         nilfs_btree_node_move_right(btree, left, node, n);
1156
1157         if (!buffer_dirty(path[level].bp_bh))
1158                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1159         if (!buffer_dirty(path[level].bp_sib_bh))
1160                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1161
1162         unlock_buffer(path[level].bp_bh);
1163         unlock_buffer(path[level].bp_sib_bh);
1164
1165         nilfs_btree_promote_key(btree, path, level + 1,
1166                                 nilfs_btree_node_get_key(btree, node, 0));
1167
1168         brelse(path[level].bp_sib_bh);
1169         path[level].bp_sib_bh = NULL;
1170         path[level].bp_index += n;
1171 }
1172
1173 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1174                                      struct nilfs_btree_path *path,
1175                                      int level, __u64 *keyp, __u64 *ptrp)
1176 {
1177         struct nilfs_btree_node *node, *right;
1178         int nchildren, rnchildren, n;
1179
1180         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1181
1182         lock_buffer(path[level].bp_bh);
1183         lock_buffer(path[level].bp_sib_bh);
1184
1185         node = nilfs_btree_get_nonroot_node(btree, path, level);
1186         right = nilfs_btree_get_sib_node(btree, path, level);
1187         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1188         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1189
1190         n = (nchildren + rnchildren) / 2 - nchildren;
1191
1192         nilfs_btree_node_move_left(btree, node, right, n);
1193
1194         if (!buffer_dirty(path[level].bp_bh))
1195                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1196         if (!buffer_dirty(path[level].bp_sib_bh))
1197                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1198
1199         unlock_buffer(path[level].bp_bh);
1200         unlock_buffer(path[level].bp_sib_bh);
1201
1202         path[level + 1].bp_index++;
1203         nilfs_btree_promote_key(btree, path, level + 1,
1204                                 nilfs_btree_node_get_key(btree, right, 0));
1205         path[level + 1].bp_index--;
1206
1207         brelse(path[level].bp_sib_bh);
1208         path[level].bp_sib_bh = NULL;
1209 }
1210
1211 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1212                                     struct nilfs_btree_path *path,
1213                                     int level, __u64 *keyp, __u64 *ptrp)
1214 {
1215         struct nilfs_btree_node *node, *left;
1216         int n;
1217
1218         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219
1220         lock_buffer(path[level].bp_bh);
1221         lock_buffer(path[level].bp_sib_bh);
1222
1223         node = nilfs_btree_get_nonroot_node(btree, path, level);
1224         left = nilfs_btree_get_sib_node(btree, path, level);
1225
1226         n = nilfs_btree_node_get_nchildren(btree, node);
1227
1228         nilfs_btree_node_move_left(btree, left, node, n);
1229
1230         if (!buffer_dirty(path[level].bp_sib_bh))
1231                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1232
1233         unlock_buffer(path[level].bp_bh);
1234         unlock_buffer(path[level].bp_sib_bh);
1235
1236         nilfs_btnode_delete(path[level].bp_bh);
1237         path[level].bp_bh = path[level].bp_sib_bh;
1238         path[level].bp_sib_bh = NULL;
1239         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1240 }
1241
1242 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1243                                      struct nilfs_btree_path *path,
1244                                      int level, __u64 *keyp, __u64 *ptrp)
1245 {
1246         struct nilfs_btree_node *node, *right;
1247         int n;
1248
1249         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1250
1251         lock_buffer(path[level].bp_bh);
1252         lock_buffer(path[level].bp_sib_bh);
1253
1254         node = nilfs_btree_get_nonroot_node(btree, path, level);
1255         right = nilfs_btree_get_sib_node(btree, path, level);
1256
1257         n = nilfs_btree_node_get_nchildren(btree, right);
1258
1259         nilfs_btree_node_move_left(btree, node, right, n);
1260
1261         if (!buffer_dirty(path[level].bp_bh))
1262                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1263
1264         unlock_buffer(path[level].bp_bh);
1265         unlock_buffer(path[level].bp_sib_bh);
1266
1267         nilfs_btnode_delete(path[level].bp_sib_bh);
1268         path[level].bp_sib_bh = NULL;
1269         path[level + 1].bp_index++;
1270 }
1271
1272 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1273                                struct nilfs_btree_path *path,
1274                                int level, __u64 *keyp, __u64 *ptrp)
1275 {
1276         struct nilfs_btree_node *root, *child;
1277         int n;
1278
1279         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1280
1281         lock_buffer(path[level].bp_bh);
1282         root = nilfs_btree_get_root(btree);
1283         child = nilfs_btree_get_nonroot_node(btree, path, level);
1284
1285         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1286         nilfs_btree_node_set_level(btree, root, level);
1287         n = nilfs_btree_node_get_nchildren(btree, child);
1288         nilfs_btree_node_move_left(btree, root, child, n);
1289         unlock_buffer(path[level].bp_bh);
1290
1291         nilfs_btnode_delete(path[level].bp_bh);
1292         path[level].bp_bh = NULL;
1293 }
1294
1295
1296 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1297                                       struct nilfs_btree_path *path,
1298                                       int *levelp,
1299                                       struct nilfs_bmap_stats *stats)
1300 {
1301         struct buffer_head *bh;
1302         struct nilfs_btree_node *node, *parent, *sib;
1303         __u64 sibptr;
1304         int pindex, level, ret;
1305
1306         ret = 0;
1307         stats->bs_nblocks = 0;
1308         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1309              level < nilfs_btree_height(btree) - 1;
1310              level++) {
1311                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1312                 path[level].bp_oldreq.bpr_ptr =
1313                         nilfs_btree_node_get_ptr(btree, node,
1314                                                  path[level].bp_index);
1315                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1316                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1317                                 &btree->bt_bmap, &path[level].bp_oldreq);
1318                         if (ret < 0)
1319                                 goto err_out_child_node;
1320                 }
1321
1322                 if (nilfs_btree_node_get_nchildren(btree, node) >
1323                     nilfs_btree_node_nchildren_min(btree, node)) {
1324                         path[level].bp_op = nilfs_btree_do_delete;
1325                         stats->bs_nblocks++;
1326                         goto out;
1327                 }
1328
1329                 parent = nilfs_btree_get_node(btree, path, level + 1);
1330                 pindex = path[level + 1].bp_index;
1331
1332                 if (pindex > 0) {
1333                         /* left sibling */
1334                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1335                                                           pindex - 1);
1336                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1337                         if (ret < 0)
1338                                 goto err_out_curr_node;
1339                         sib = (struct nilfs_btree_node *)bh->b_data;
1340                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1341                             nilfs_btree_node_nchildren_min(btree, sib)) {
1342                                 path[level].bp_sib_bh = bh;
1343                                 path[level].bp_op = nilfs_btree_borrow_left;
1344                                 stats->bs_nblocks++;
1345                                 goto out;
1346                         } else {
1347                                 path[level].bp_sib_bh = bh;
1348                                 path[level].bp_op = nilfs_btree_concat_left;
1349                                 stats->bs_nblocks++;
1350                                 /* continue; */
1351                         }
1352                 } else if (pindex <
1353                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1354                         /* right sibling */
1355                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1356                                                           pindex + 1);
1357                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1358                         if (ret < 0)
1359                                 goto err_out_curr_node;
1360                         sib = (struct nilfs_btree_node *)bh->b_data;
1361                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1362                             nilfs_btree_node_nchildren_min(btree, sib)) {
1363                                 path[level].bp_sib_bh = bh;
1364                                 path[level].bp_op = nilfs_btree_borrow_right;
1365                                 stats->bs_nblocks++;
1366                                 goto out;
1367                         } else {
1368                                 path[level].bp_sib_bh = bh;
1369                                 path[level].bp_op = nilfs_btree_concat_right;
1370                                 stats->bs_nblocks++;
1371                                 /* continue; */
1372                         }
1373                 } else {
1374                         /* no siblings */
1375                         /* the only child of the root node */
1376                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1377                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1378                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1379                                 path[level].bp_op = nilfs_btree_shrink;
1380                                 stats->bs_nblocks += 2;
1381                         } else {
1382                                 path[level].bp_op = nilfs_btree_do_delete;
1383                                 stats->bs_nblocks++;
1384                         }
1385
1386                         goto out;
1387
1388                 }
1389         }
1390
1391         node = nilfs_btree_get_root(btree);
1392         path[level].bp_oldreq.bpr_ptr =
1393                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1394         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1395                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1396                         &btree->bt_bmap, &path[level].bp_oldreq);
1397                 if (ret < 0)
1398                         goto err_out_child_node;
1399         }
1400         /* child of the root node is deleted */
1401         path[level].bp_op = nilfs_btree_do_delete;
1402         stats->bs_nblocks++;
1403
1404         /* success */
1405  out:
1406         *levelp = level;
1407         return ret;
1408
1409         /* error */
1410  err_out_curr_node:
1411         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1412                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1413                         &btree->bt_bmap, &path[level].bp_oldreq);
1414  err_out_child_node:
1415         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1416                 brelse(path[level].bp_sib_bh);
1417                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1418                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1419                                 &btree->bt_bmap, &path[level].bp_oldreq);
1420         }
1421         *levelp = level;
1422         stats->bs_nblocks = 0;
1423         return ret;
1424 }
1425
1426 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1427                                       struct nilfs_btree_path *path,
1428                                       int maxlevel)
1429 {
1430         int level;
1431
1432         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1433                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1434                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1435                                 &btree->bt_bmap, &path[level].bp_oldreq);
1436                 path[level].bp_op(btree, path, level, NULL, NULL);
1437         }
1438
1439         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1440                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1441 }
1442
1443 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1444
1445 {
1446         struct nilfs_btree *btree;
1447         struct nilfs_btree_path *path;
1448         struct nilfs_bmap_stats stats;
1449         int level, ret;
1450
1451         btree = (struct nilfs_btree *)bmap;
1452         path = nilfs_btree_alloc_path(btree);
1453         if (path == NULL)
1454                 return -ENOMEM;
1455         nilfs_btree_init_path(btree, path);
1456         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1457                                     NILFS_BTREE_LEVEL_NODE_MIN);
1458         if (ret < 0)
1459                 goto out;
1460
1461         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1462         if (ret < 0)
1463                 goto out;
1464         nilfs_btree_commit_delete(btree, path, level);
1465         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1466
1467 out:
1468         nilfs_btree_clear_path(btree, path);
1469         nilfs_btree_free_path(btree, path);
1470         return ret;
1471 }
1472
1473 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1474 {
1475         struct nilfs_btree *btree;
1476         struct nilfs_btree_path *path;
1477         int ret;
1478
1479         btree = (struct nilfs_btree *)bmap;
1480         path = nilfs_btree_alloc_path(btree);
1481         if (path == NULL)
1482                 return -ENOMEM;
1483         nilfs_btree_init_path(btree, path);
1484
1485         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1486
1487         nilfs_btree_clear_path(btree, path);
1488         nilfs_btree_free_path(btree, path);
1489
1490         return ret;
1491 }
1492
1493 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1494 {
1495         struct buffer_head *bh;
1496         struct nilfs_btree *btree;
1497         struct nilfs_btree_node *root, *node;
1498         __u64 maxkey, nextmaxkey;
1499         __u64 ptr;
1500         int nchildren, ret;
1501
1502         btree = (struct nilfs_btree *)bmap;
1503         root = nilfs_btree_get_root(btree);
1504         switch (nilfs_btree_height(btree)) {
1505         case 2:
1506                 bh = NULL;
1507                 node = root;
1508                 break;
1509         case 3:
1510                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1511                 if (nchildren > 1)
1512                         return 0;
1513                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1514                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1515                 if (ret < 0)
1516                         return ret;
1517                 node = (struct nilfs_btree_node *)bh->b_data;
1518                 break;
1519         default:
1520                 return 0;
1521         }
1522
1523         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1524         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1525         nextmaxkey = (nchildren > 1) ?
1526                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1527         if (bh != NULL)
1528                 brelse(bh);
1529
1530         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1531 }
1532
1533 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1534                                    __u64 *keys, __u64 *ptrs, int nitems)
1535 {
1536         struct buffer_head *bh;
1537         struct nilfs_btree *btree;
1538         struct nilfs_btree_node *node, *root;
1539         __le64 *dkeys;
1540         __le64 *dptrs;
1541         __u64 ptr;
1542         int nchildren, i, ret;
1543
1544         btree = (struct nilfs_btree *)bmap;
1545         root = nilfs_btree_get_root(btree);
1546         switch (nilfs_btree_height(btree)) {
1547         case 2:
1548                 bh = NULL;
1549                 node = root;
1550                 break;
1551         case 3:
1552                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1553                 WARN_ON(nchildren > 1);
1554                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1555                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1556                 if (ret < 0)
1557                         return ret;
1558                 node = (struct nilfs_btree_node *)bh->b_data;
1559                 break;
1560         default:
1561                 node = NULL;
1562                 return -EINVAL;
1563         }
1564
1565         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1566         if (nchildren < nitems)
1567                 nitems = nchildren;
1568         dkeys = nilfs_btree_node_dkeys(btree, node);
1569         dptrs = nilfs_btree_node_dptrs(btree, node);
1570         for (i = 0; i < nitems; i++) {
1571                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1572                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1573         }
1574
1575         if (bh != NULL)
1576                 brelse(bh);
1577
1578         return nitems;
1579 }
1580
1581 static int
1582 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1583                                        union nilfs_bmap_ptr_req *dreq,
1584                                        union nilfs_bmap_ptr_req *nreq,
1585                                        struct buffer_head **bhp,
1586                                        struct nilfs_bmap_stats *stats)
1587 {
1588         struct buffer_head *bh;
1589         struct nilfs_btree *btree;
1590         int ret;
1591
1592         btree = (struct nilfs_btree *)bmap;
1593         stats->bs_nblocks = 0;
1594
1595         /* for data */
1596         /* cannot find near ptr */
1597         if (btree->bt_ops->btop_find_target != NULL)
1598                 dreq->bpr_ptr
1599                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1600         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1601         if (ret < 0)
1602                 return ret;
1603
1604         *bhp = NULL;
1605         stats->bs_nblocks++;
1606         if (nreq != NULL) {
1607                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1608                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1609                 if (ret < 0)
1610                         goto err_out_dreq;
1611
1612                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1613                 if (ret < 0)
1614                         goto err_out_nreq;
1615
1616                 *bhp = bh;
1617                 stats->bs_nblocks++;
1618         }
1619
1620         /* success */
1621         return 0;
1622
1623         /* error */
1624  err_out_nreq:
1625         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1626  err_out_dreq:
1627         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1628         stats->bs_nblocks = 0;
1629         return ret;
1630
1631 }
1632
1633 static void
1634 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1635                                       __u64 key, __u64 ptr,
1636                                       const __u64 *keys, const __u64 *ptrs,
1637                                       int n,
1638                                       union nilfs_bmap_ptr_req *dreq,
1639                                       union nilfs_bmap_ptr_req *nreq,
1640                                       struct buffer_head *bh)
1641 {
1642         struct nilfs_btree *btree;
1643         struct nilfs_btree_node *node;
1644         __u64 tmpptr;
1645
1646         /* free resources */
1647         if (bmap->b_ops->bop_clear != NULL)
1648                 bmap->b_ops->bop_clear(bmap);
1649
1650         /* ptr must be a pointer to a buffer head. */
1651         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1652
1653         /* convert and insert */
1654         btree = (struct nilfs_btree *)bmap;
1655         nilfs_btree_init(bmap);
1656         if (nreq != NULL) {
1657                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1658                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1659
1660                 /* create child node at level 1 */
1661                 lock_buffer(bh);
1662                 node = (struct nilfs_btree_node *)bh->b_data;
1663                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1664                 nilfs_btree_node_insert(btree, node,
1665                                         key, dreq->bpr_ptr, n);
1666                 if (!buffer_dirty(bh))
1667                         nilfs_btnode_mark_dirty(bh);
1668                 if (!nilfs_bmap_dirty(bmap))
1669                         nilfs_bmap_set_dirty(bmap);
1670
1671                 unlock_buffer(bh);
1672                 brelse(bh);
1673
1674                 /* create root node at level 2 */
1675                 node = nilfs_btree_get_root(btree);
1676                 tmpptr = nreq->bpr_ptr;
1677                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678                                       2, 1, &keys[0], &tmpptr);
1679         } else {
1680                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1681
1682                 /* create root node at level 1 */
1683                 node = nilfs_btree_get_root(btree);
1684                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1685                                       1, n, keys, ptrs);
1686                 nilfs_btree_node_insert(btree, node,
1687                                         key, dreq->bpr_ptr, n);
1688                 if (!nilfs_bmap_dirty(bmap))
1689                         nilfs_bmap_set_dirty(bmap);
1690         }
1691
1692         if (btree->bt_ops->btop_set_target != NULL)
1693                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1694 }
1695
1696 /**
1697  * nilfs_btree_convert_and_insert -
1698  * @bmap:
1699  * @key:
1700  * @ptr:
1701  * @keys:
1702  * @ptrs:
1703  * @n:
1704  */
1705 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1706                                    __u64 key, __u64 ptr,
1707                                    const __u64 *keys, const __u64 *ptrs, int n)
1708 {
1709         struct buffer_head *bh;
1710         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1711         struct nilfs_bmap_stats stats;
1712         int ret;
1713
1714         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1715                 di = &dreq;
1716                 ni = NULL;
1717         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1718                            1 << bmap->b_inode->i_blkbits)) {
1719                 di = &dreq;
1720                 ni = &nreq;
1721         } else {
1722                 di = NULL;
1723                 ni = NULL;
1724                 BUG();
1725         }
1726
1727         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1728                                                      &stats);
1729         if (ret < 0)
1730                 return ret;
1731         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1732                                               di, ni, bh);
1733         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1734         return 0;
1735 }
1736
1737 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1738                                    struct nilfs_btree_path *path,
1739                                    int level,
1740                                    struct buffer_head *bh)
1741 {
1742         while ((++level < nilfs_btree_height(btree) - 1) &&
1743                !buffer_dirty(path[level].bp_bh))
1744                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1745
1746         return 0;
1747 }
1748
1749 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1750                                         struct nilfs_btree_path *path,
1751                                         int level)
1752 {
1753         struct nilfs_btree_node *parent;
1754         int ret;
1755
1756         parent = nilfs_btree_get_node(btree, path, level + 1);
1757         path[level].bp_oldreq.bpr_ptr =
1758                 nilfs_btree_node_get_ptr(btree, parent,
1759                                          path[level + 1].bp_index);
1760         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1761         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1762                                         &path[level].bp_oldreq,
1763                                         &path[level].bp_newreq);
1764         if (ret < 0)
1765                 return ret;
1766
1767         if (buffer_nilfs_node(path[level].bp_bh)) {
1768                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1769                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1770                 path[level].bp_ctxt.bh = path[level].bp_bh;
1771                 ret = nilfs_btnode_prepare_change_key(
1772                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1773                         &path[level].bp_ctxt);
1774                 if (ret < 0) {
1775                         nilfs_bmap_abort_update(&btree->bt_bmap,
1776                                                 &path[level].bp_oldreq,
1777                                                 &path[level].bp_newreq);
1778                         return ret;
1779                 }
1780         }
1781
1782         return 0;
1783 }
1784
1785 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1786                                         struct nilfs_btree_path *path,
1787                                         int level)
1788 {
1789         struct nilfs_btree_node *parent;
1790
1791         nilfs_bmap_commit_update(&btree->bt_bmap,
1792                                  &path[level].bp_oldreq,
1793                                  &path[level].bp_newreq);
1794
1795         if (buffer_nilfs_node(path[level].bp_bh)) {
1796                 nilfs_btnode_commit_change_key(
1797                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1798                         &path[level].bp_ctxt);
1799                 path[level].bp_bh = path[level].bp_ctxt.bh;
1800         }
1801         set_buffer_nilfs_volatile(path[level].bp_bh);
1802
1803         parent = nilfs_btree_get_node(btree, path, level + 1);
1804         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1805                                  path[level].bp_newreq.bpr_ptr);
1806 }
1807
1808 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1809                                        struct nilfs_btree_path *path,
1810                                        int level)
1811 {
1812         nilfs_bmap_abort_update(&btree->bt_bmap,
1813                                 &path[level].bp_oldreq,
1814                                 &path[level].bp_newreq);
1815         if (buffer_nilfs_node(path[level].bp_bh))
1816                 nilfs_btnode_abort_change_key(
1817                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1818                         &path[level].bp_ctxt);
1819 }
1820
1821 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1822                                            struct nilfs_btree_path *path,
1823                                            int minlevel,
1824                                            int *maxlevelp)
1825 {
1826         int level, ret;
1827
1828         level = minlevel;
1829         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1830                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1831                 if (ret < 0)
1832                         return ret;
1833         }
1834         while ((++level < nilfs_btree_height(btree) - 1) &&
1835                !buffer_dirty(path[level].bp_bh)) {
1836
1837                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1838                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1839                 if (ret < 0)
1840                         goto out;
1841         }
1842
1843         /* success */
1844         *maxlevelp = level - 1;
1845         return 0;
1846
1847         /* error */
1848  out:
1849         while (--level > minlevel)
1850                 nilfs_btree_abort_update_v(btree, path, level);
1851         if (!buffer_nilfs_volatile(path[level].bp_bh))
1852                 nilfs_btree_abort_update_v(btree, path, level);
1853         return ret;
1854 }
1855
1856 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1857                                            struct nilfs_btree_path *path,
1858                                            int minlevel,
1859                                            int maxlevel,
1860                                            struct buffer_head *bh)
1861 {
1862         int level;
1863
1864         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1865                 nilfs_btree_commit_update_v(btree, path, minlevel);
1866
1867         for (level = minlevel + 1; level <= maxlevel; level++)
1868                 nilfs_btree_commit_update_v(btree, path, level);
1869 }
1870
1871 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1872                                    struct nilfs_btree_path *path,
1873                                    int level,
1874                                    struct buffer_head *bh)
1875 {
1876         int maxlevel, ret;
1877         struct nilfs_btree_node *parent;
1878         __u64 ptr;
1879
1880         get_bh(bh);
1881         path[level].bp_bh = bh;
1882         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1883         if (ret < 0)
1884                 goto out;
1885
1886         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1887                 parent = nilfs_btree_get_node(btree, path, level + 1);
1888                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1889                                                path[level + 1].bp_index);
1890                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1891                 if (ret < 0)
1892                         goto out;
1893         }
1894
1895         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1896
1897  out:
1898         brelse(path[level].bp_bh);
1899         path[level].bp_bh = NULL;
1900         return ret;
1901 }
1902
1903 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1904                                  struct buffer_head *bh)
1905 {
1906         struct nilfs_btree *btree;
1907         struct nilfs_btree_path *path;
1908         struct nilfs_btree_node *node;
1909         __u64 key;
1910         int level, ret;
1911
1912         WARN_ON(!buffer_dirty(bh));
1913
1914         btree = (struct nilfs_btree *)bmap;
1915         path = nilfs_btree_alloc_path(btree);
1916         if (path == NULL)
1917                 return -ENOMEM;
1918         nilfs_btree_init_path(btree, path);
1919
1920         if (buffer_nilfs_node(bh)) {
1921                 node = (struct nilfs_btree_node *)bh->b_data;
1922                 key = nilfs_btree_node_get_key(btree, node, 0);
1923                 level = nilfs_btree_node_get_level(btree, node);
1924         } else {
1925                 key = nilfs_bmap_data_get_key(bmap, bh);
1926                 level = NILFS_BTREE_LEVEL_DATA;
1927         }
1928
1929         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1930         if (ret < 0) {
1931                 if (unlikely(ret == -ENOENT))
1932                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1933                                __func__, (unsigned long long)key, level);
1934                 goto out;
1935         }
1936
1937         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1938
1939  out:
1940         nilfs_btree_clear_path(btree, path);
1941         nilfs_btree_free_path(btree, path);
1942
1943         return ret;
1944 }
1945
1946 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1947                                     struct buffer_head *bh)
1948 {
1949         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1950 }
1951
1952 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1953                                          struct list_head *lists,
1954                                          struct buffer_head *bh)
1955 {
1956         struct list_head *head;
1957         struct buffer_head *cbh;
1958         struct nilfs_btree_node *node, *cnode;
1959         __u64 key, ckey;
1960         int level;
1961
1962         get_bh(bh);
1963         node = (struct nilfs_btree_node *)bh->b_data;
1964         key = nilfs_btree_node_get_key(btree, node, 0);
1965         level = nilfs_btree_node_get_level(btree, node);
1966         list_for_each(head, &lists[level]) {
1967                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1968                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1969                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1970                 if (key < ckey)
1971                         break;
1972         }
1973         list_add_tail(&bh->b_assoc_buffers, head);
1974 }
1975
1976 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1977                                              struct list_head *listp)
1978 {
1979         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1980         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1981         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1982         struct pagevec pvec;
1983         struct buffer_head *bh, *head;
1984         pgoff_t index = 0;
1985         int level, i;
1986
1987         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1988              level < NILFS_BTREE_LEVEL_MAX;
1989              level++)
1990                 INIT_LIST_HEAD(&lists[level]);
1991
1992         pagevec_init(&pvec, 0);
1993
1994         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1995                                   PAGEVEC_SIZE)) {
1996                 for (i = 0; i < pagevec_count(&pvec); i++) {
1997                         bh = head = page_buffers(pvec.pages[i]);
1998                         do {
1999                                 if (buffer_dirty(bh))
2000                                         nilfs_btree_add_dirty_buffer(btree,
2001                                                                      lists, bh);
2002                         } while ((bh = bh->b_this_page) != head);
2003                 }
2004                 pagevec_release(&pvec);
2005                 cond_resched();
2006         }
2007
2008         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2009              level < NILFS_BTREE_LEVEL_MAX;
2010              level++)
2011                 list_splice(&lists[level], listp->prev);
2012 }
2013
2014 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2015                                 struct nilfs_btree_path *path,
2016                                 int level,
2017                                 struct buffer_head **bh,
2018                                 sector_t blocknr,
2019                                 union nilfs_binfo *binfo)
2020 {
2021         struct nilfs_btree_node *parent;
2022         __u64 key;
2023         __u64 ptr;
2024         int ret;
2025
2026         parent = nilfs_btree_get_node(btree, path, level + 1);
2027         ptr = nilfs_btree_node_get_ptr(btree, parent,
2028                                        path[level + 1].bp_index);
2029         if (buffer_nilfs_node(*bh)) {
2030                 path[level].bp_ctxt.oldkey = ptr;
2031                 path[level].bp_ctxt.newkey = blocknr;
2032                 path[level].bp_ctxt.bh = *bh;
2033                 ret = nilfs_btnode_prepare_change_key(
2034                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2035                         &path[level].bp_ctxt);
2036                 if (ret < 0)
2037                         return ret;
2038                 nilfs_btnode_commit_change_key(
2039                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2040                         &path[level].bp_ctxt);
2041                 *bh = path[level].bp_ctxt.bh;
2042         }
2043
2044         nilfs_btree_node_set_ptr(btree, parent,
2045                                  path[level + 1].bp_index, blocknr);
2046
2047         key = nilfs_btree_node_get_key(btree, parent,
2048                                        path[level + 1].bp_index);
2049         /* on-disk format */
2050         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2051         binfo->bi_dat.bi_level = level;
2052
2053         return 0;
2054 }
2055
2056 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2057                                 struct nilfs_btree_path *path,
2058                                 int level,
2059                                 struct buffer_head **bh,
2060                                 sector_t blocknr,
2061                                 union nilfs_binfo *binfo)
2062 {
2063         struct nilfs_btree_node *parent;
2064         __u64 key;
2065         __u64 ptr;
2066         union nilfs_bmap_ptr_req req;
2067         int ret;
2068
2069         parent = nilfs_btree_get_node(btree, path, level + 1);
2070         ptr = nilfs_btree_node_get_ptr(btree, parent,
2071                                        path[level + 1].bp_index);
2072         req.bpr_ptr = ptr;
2073         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2074         if (unlikely(ret < 0))
2075                 return ret;
2076
2077         key = nilfs_btree_node_get_key(btree, parent,
2078                                        path[level + 1].bp_index);
2079         /* on-disk format */
2080         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2081         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2082
2083         return 0;
2084 }
2085
2086 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2087                               struct buffer_head **bh,
2088                               sector_t blocknr,
2089                               union nilfs_binfo *binfo)
2090 {
2091         struct nilfs_btree *btree;
2092         struct nilfs_btree_path *path;
2093         struct nilfs_btree_node *node;
2094         __u64 key;
2095         int level, ret;
2096
2097         btree = (struct nilfs_btree *)bmap;
2098         path = nilfs_btree_alloc_path(btree);
2099         if (path == NULL)
2100                 return -ENOMEM;
2101         nilfs_btree_init_path(btree, path);
2102
2103         if (buffer_nilfs_node(*bh)) {
2104                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2105                 key = nilfs_btree_node_get_key(btree, node, 0);
2106                 level = nilfs_btree_node_get_level(btree, node);
2107         } else {
2108                 key = nilfs_bmap_data_get_key(bmap, *bh);
2109                 level = NILFS_BTREE_LEVEL_DATA;
2110         }
2111
2112         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2113         if (ret < 0) {
2114                 WARN_ON(ret == -ENOENT);
2115                 goto out;
2116         }
2117
2118         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2119                                             blocknr, binfo);
2120
2121  out:
2122         nilfs_btree_clear_path(btree, path);
2123         nilfs_btree_free_path(btree, path);
2124
2125         return ret;
2126 }
2127
2128 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2129                                  struct buffer_head **bh,
2130                                  sector_t blocknr,
2131                                  union nilfs_binfo *binfo)
2132 {
2133         struct nilfs_btree *btree;
2134         struct nilfs_btree_node *node;
2135         __u64 key;
2136         int ret;
2137
2138         btree = (struct nilfs_btree *)bmap;
2139         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2140         if (ret < 0)
2141                 return ret;
2142
2143         if (buffer_nilfs_node(*bh)) {
2144                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2145                 key = nilfs_btree_node_get_key(btree, node, 0);
2146         } else
2147                 key = nilfs_bmap_data_get_key(bmap, *bh);
2148
2149         /* on-disk format */
2150         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2151         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2152
2153         return 0;
2154 }
2155
2156 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2157 {
2158         struct buffer_head *bh;
2159         struct nilfs_btree *btree;
2160         struct nilfs_btree_path *path;
2161         __u64 ptr;
2162         int ret;
2163
2164         btree = (struct nilfs_btree *)bmap;
2165         path = nilfs_btree_alloc_path(btree);
2166         if (path == NULL)
2167                 return -ENOMEM;
2168         nilfs_btree_init_path(btree, path);
2169
2170         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2171         if (ret < 0) {
2172                 WARN_ON(ret == -ENOENT);
2173                 goto out;
2174         }
2175         ret = nilfs_btree_get_block(btree, ptr, &bh);
2176         if (ret < 0) {
2177                 WARN_ON(ret == -ENOENT);
2178                 goto out;
2179         }
2180
2181         if (!buffer_dirty(bh))
2182                 nilfs_btnode_mark_dirty(bh);
2183         brelse(bh);
2184         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2185                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2186
2187  out:
2188         nilfs_btree_clear_path(btree, path);
2189         nilfs_btree_free_path(btree, path);
2190         return ret;
2191 }
2192
2193 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2194         .bop_lookup             =       nilfs_btree_lookup,
2195         .bop_insert             =       nilfs_btree_insert,
2196         .bop_delete             =       nilfs_btree_delete,
2197         .bop_clear              =       NULL,
2198
2199         .bop_propagate          =       nilfs_btree_propagate,
2200
2201         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2202
2203         .bop_assign             =       nilfs_btree_assign,
2204         .bop_mark               =       nilfs_btree_mark,
2205
2206         .bop_last_key           =       nilfs_btree_last_key,
2207         .bop_check_insert       =       NULL,
2208         .bop_check_delete       =       nilfs_btree_check_delete,
2209         .bop_gather_data        =       nilfs_btree_gather_data,
2210 };
2211
2212 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2213         .bop_lookup             =       NULL,
2214         .bop_insert             =       NULL,
2215         .bop_delete             =       NULL,
2216         .bop_clear              =       NULL,
2217
2218         .bop_propagate          =       nilfs_btree_propagate_gc,
2219
2220         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2221
2222         .bop_assign             =       nilfs_btree_assign_gc,
2223         .bop_mark               =       NULL,
2224
2225         .bop_last_key           =       NULL,
2226         .bop_check_insert       =       NULL,
2227         .bop_check_delete       =       NULL,
2228         .bop_gather_data        =       NULL,
2229 };
2230
2231 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2232         .btop_find_target       =       nilfs_btree_find_target_v,
2233         .btop_set_target        =       nilfs_btree_set_target_v,
2234         .btop_propagate         =       nilfs_btree_propagate_v,
2235         .btop_assign            =       nilfs_btree_assign_v,
2236 };
2237
2238 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2239         .btop_find_target       =       NULL,
2240         .btop_set_target        =       NULL,
2241         .btop_propagate         =       nilfs_btree_propagate_p,
2242         .btop_assign            =       nilfs_btree_assign_p,
2243 };
2244
2245 int nilfs_btree_init(struct nilfs_bmap *bmap)
2246 {
2247         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2248
2249         bmap->b_ops = &nilfs_btree_ops;
2250         switch (bmap->b_inode->i_ino) {
2251         case NILFS_DAT_INO:
2252                 btree->bt_ops = &nilfs_btree_ops_p;
2253                 break;
2254         default:
2255                 btree->bt_ops = &nilfs_btree_ops_v;
2256                 break;
2257         }
2258
2259         return 0;
2260 }
2261
2262 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2263 {
2264         bmap->b_ops = &nilfs_btree_ops_gc;
2265 }