]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/nilfs2/btree.c
102e218b426fe8ce388d4a01850715f11f22d663
[mv-sheeva.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         brelse(path[level].bp_bh);
113                         path[level].bp_bh = NULL;
114                 }
115                 /* sib_bh is released or deleted by prepare or commit
116                  * operations. */
117                 path[level].bp_sib_bh = NULL;
118                 path[level].bp_index = 0;
119                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
120                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_op = NULL;
122         }
123 }
124
125
126 /*
127  * B-tree node operations
128  */
129
130 static inline int
131 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
132                            const struct nilfs_btree_node *node)
133 {
134         return node->bn_flags;
135 }
136
137 static inline void
138 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
139                            struct nilfs_btree_node *node,
140                            int flags)
141 {
142         node->bn_flags = flags;
143 }
144
145 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
146                                         const struct nilfs_btree_node *node)
147 {
148         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
149 }
150
151 static inline int
152 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
153                            const struct nilfs_btree_node *node)
154 {
155         return node->bn_level;
156 }
157
158 static inline void
159 nilfs_btree_node_set_level(struct nilfs_btree *btree,
160                            struct nilfs_btree_node *node,
161                            int level)
162 {
163         node->bn_level = level;
164 }
165
166 static inline int
167 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
168                                const struct nilfs_btree_node *node)
169 {
170         return le16_to_cpu(node->bn_nchildren);
171 }
172
173 static inline void
174 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
175                                struct nilfs_btree_node *node,
176                                int nchildren)
177 {
178         node->bn_nchildren = cpu_to_le16(nchildren);
179 }
180
181 static inline int
182 nilfs_btree_node_size(const struct nilfs_btree *btree)
183 {
184         return 1 << btree->bt_bmap.b_inode->i_blkbits;
185 }
186
187 static inline int
188 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
189                                const struct nilfs_btree_node *node)
190 {
191         return nilfs_btree_node_root(btree, node) ?
192                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
193                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
194 }
195
196 static inline int
197 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
198                                const struct nilfs_btree_node *node)
199 {
200         return nilfs_btree_node_root(btree, node) ?
201                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
202                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
203 }
204
205 static inline __le64 *
206 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
207                        const struct nilfs_btree_node *node)
208 {
209         return (__le64 *)((char *)(node + 1) +
210                           (nilfs_btree_node_root(btree, node) ?
211                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
212 }
213
214 static inline __le64 *
215 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
216                        const struct nilfs_btree_node *node)
217 {
218         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
219                           nilfs_btree_node_nchildren_max(btree, node));
220 }
221
222 static inline __u64
223 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
224                          const struct nilfs_btree_node *node, int index)
225 {
226         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
227                                         index));
228 }
229
230 static inline void
231 nilfs_btree_node_set_key(struct nilfs_btree *btree,
232                          struct nilfs_btree_node *node, int index, __u64 key)
233 {
234         *(nilfs_btree_node_dkeys(btree, node) + index) =
235                 nilfs_bmap_key_to_dkey(key);
236 }
237
238 static inline __u64
239 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
240                          const struct nilfs_btree_node *node,
241                          int index)
242 {
243         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
244                                         index));
245 }
246
247 static inline void
248 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
249                          struct nilfs_btree_node *node,
250                          int index,
251                          __u64 ptr)
252 {
253         *(nilfs_btree_node_dptrs(btree, node) + index) =
254                 nilfs_bmap_ptr_to_dptr(ptr);
255 }
256
257 static void nilfs_btree_node_init(struct nilfs_btree *btree,
258                                   struct nilfs_btree_node *node,
259                                   int flags, int level, int nchildren,
260                                   const __u64 *keys, const __u64 *ptrs)
261 {
262         __le64 *dkeys;
263         __le64 *dptrs;
264         int i;
265
266         nilfs_btree_node_set_flags(btree, node, flags);
267         nilfs_btree_node_set_level(btree, node, level);
268         nilfs_btree_node_set_nchildren(btree, node, nchildren);
269
270         dkeys = nilfs_btree_node_dkeys(btree, node);
271         dptrs = nilfs_btree_node_dptrs(btree, node);
272         for (i = 0; i < nchildren; i++) {
273                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
274                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
275         }
276 }
277
278 /* Assume the buffer heads corresponding to left and right are locked. */
279 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
280                                        struct nilfs_btree_node *left,
281                                        struct nilfs_btree_node *right,
282                                        int n)
283 {
284         __le64 *ldkeys, *rdkeys;
285         __le64 *ldptrs, *rdptrs;
286         int lnchildren, rnchildren;
287
288         ldkeys = nilfs_btree_node_dkeys(btree, left);
289         ldptrs = nilfs_btree_node_dptrs(btree, left);
290         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
291
292         rdkeys = nilfs_btree_node_dkeys(btree, right);
293         rdptrs = nilfs_btree_node_dptrs(btree, right);
294         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
295
296         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
297         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
298         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
299         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
300
301         lnchildren += n;
302         rnchildren -= n;
303         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
304         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
305 }
306
307 /* Assume that the buffer heads corresponding to left and right are locked. */
308 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
309                                         struct nilfs_btree_node *left,
310                                         struct nilfs_btree_node *right,
311                                         int n)
312 {
313         __le64 *ldkeys, *rdkeys;
314         __le64 *ldptrs, *rdptrs;
315         int lnchildren, rnchildren;
316
317         ldkeys = nilfs_btree_node_dkeys(btree, left);
318         ldptrs = nilfs_btree_node_dptrs(btree, left);
319         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
320
321         rdkeys = nilfs_btree_node_dkeys(btree, right);
322         rdptrs = nilfs_btree_node_dptrs(btree, right);
323         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
324
325         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
326         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
327         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
328         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
329
330         lnchildren -= n;
331         rnchildren += n;
332         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
333         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
334 }
335
336 /* Assume that the buffer head corresponding to node is locked. */
337 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
338                                     struct nilfs_btree_node *node,
339                                     __u64 key, __u64 ptr, int index)
340 {
341         __le64 *dkeys;
342         __le64 *dptrs;
343         int nchildren;
344
345         dkeys = nilfs_btree_node_dkeys(btree, node);
346         dptrs = nilfs_btree_node_dptrs(btree, node);
347         nchildren = nilfs_btree_node_get_nchildren(btree, node);
348         if (index < nchildren) {
349                 memmove(dkeys + index + 1, dkeys + index,
350                         (nchildren - index) * sizeof(*dkeys));
351                 memmove(dptrs + index + 1, dptrs + index,
352                         (nchildren - index) * sizeof(*dptrs));
353         }
354         dkeys[index] = nilfs_bmap_key_to_dkey(key);
355         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
356         nchildren++;
357         nilfs_btree_node_set_nchildren(btree, node, nchildren);
358 }
359
360 /* Assume that the buffer head corresponding to node is locked. */
361 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
362                                     struct nilfs_btree_node *node,
363                                     __u64 *keyp, __u64 *ptrp, int index)
364 {
365         __u64 key;
366         __u64 ptr;
367         __le64 *dkeys;
368         __le64 *dptrs;
369         int nchildren;
370
371         dkeys = nilfs_btree_node_dkeys(btree, node);
372         dptrs = nilfs_btree_node_dptrs(btree, node);
373         key = nilfs_bmap_dkey_to_key(dkeys[index]);
374         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
375         nchildren = nilfs_btree_node_get_nchildren(btree, node);
376         if (keyp != NULL)
377                 *keyp = key;
378         if (ptrp != NULL)
379                 *ptrp = ptr;
380
381         if (index < nchildren - 1) {
382                 memmove(dkeys + index, dkeys + index + 1,
383                         (nchildren - index - 1) * sizeof(*dkeys));
384                 memmove(dptrs + index, dptrs + index + 1,
385                         (nchildren - index - 1) * sizeof(*dptrs));
386         }
387         nchildren--;
388         nilfs_btree_node_set_nchildren(btree, node, nchildren);
389 }
390
391 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
392                                    const struct nilfs_btree_node *node,
393                                    __u64 key, int *indexp)
394 {
395         __u64 nkey;
396         int index, low, high, s;
397
398         /* binary search */
399         low = 0;
400         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
401         index = 0;
402         s = 0;
403         while (low <= high) {
404                 index = (low + high) / 2;
405                 nkey = nilfs_btree_node_get_key(btree, node, index);
406                 if (nkey == key) {
407                         s = 0;
408                         goto out;
409                 } else if (nkey < key) {
410                         low = index + 1;
411                         s = -1;
412                 } else {
413                         high = index - 1;
414                         s = 1;
415                 }
416         }
417
418         /* adjust index */
419         if (nilfs_btree_node_get_level(btree, node) >
420             NILFS_BTREE_LEVEL_NODE_MIN) {
421                 if ((s > 0) && (index > 0))
422                         index--;
423         } else if (s < 0)
424                 index++;
425
426  out:
427         *indexp = index;
428
429         return s == 0;
430 }
431
432 static inline struct nilfs_btree_node *
433 nilfs_btree_get_root(const struct nilfs_btree *btree)
434 {
435         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
436 }
437
438 static inline struct nilfs_btree_node *
439 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
440                              const struct nilfs_btree_path *path,
441                              int level)
442 {
443         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
444 }
445
446 static inline struct nilfs_btree_node *
447 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
448                          const struct nilfs_btree_path *path,
449                          int level)
450 {
451         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
452 }
453
454 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
455 {
456         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
457                 + 1;
458 }
459
460 static inline struct nilfs_btree_node *
461 nilfs_btree_get_node(const struct nilfs_btree *btree,
462                      const struct nilfs_btree_path *path,
463                      int level)
464 {
465         return (level == nilfs_btree_height(btree) - 1) ?
466                 nilfs_btree_get_root(btree) :
467                 nilfs_btree_get_nonroot_node(btree, path, level);
468 }
469
470 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
471                                  struct nilfs_btree_path *path,
472                                  __u64 key, __u64 *ptrp, int minlevel)
473 {
474         struct nilfs_btree_node *node;
475         __u64 ptr;
476         int level, index, found, ret;
477
478         node = nilfs_btree_get_root(btree);
479         level = nilfs_btree_node_get_level(btree, node);
480         if ((level < minlevel) ||
481             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
482                 return -ENOENT;
483
484         found = nilfs_btree_node_lookup(btree, node, key, &index);
485         ptr = nilfs_btree_node_get_ptr(btree, node, index);
486         path[level].bp_bh = NULL;
487         path[level].bp_index = index;
488
489         for (level--; level >= minlevel; level--) {
490                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
491                                            &path[level].bp_bh);
492                 if (ret < 0)
493                         return ret;
494                 node = nilfs_btree_get_nonroot_node(btree, path, level);
495                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
496                 if (!found)
497                         found = nilfs_btree_node_lookup(btree, node, key,
498                                                         &index);
499                 else
500                         index = 0;
501                 if (index < nilfs_btree_node_nchildren_max(btree, node))
502                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
503                 else {
504                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
505                         /* insert */
506                         ptr = NILFS_BMAP_INVALID_PTR;
507                 }
508                 path[level].bp_index = index;
509         }
510         if (!found)
511                 return -ENOENT;
512
513         if (ptrp != NULL)
514                 *ptrp = ptr;
515
516         return 0;
517 }
518
519 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
520                                       struct nilfs_btree_path *path,
521                                       __u64 *keyp, __u64 *ptrp)
522 {
523         struct nilfs_btree_node *node;
524         __u64 ptr;
525         int index, level, ret;
526
527         node = nilfs_btree_get_root(btree);
528         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
529         if (index < 0)
530                 return -ENOENT;
531         level = nilfs_btree_node_get_level(btree, node);
532         ptr = nilfs_btree_node_get_ptr(btree, node, index);
533         path[level].bp_bh = NULL;
534         path[level].bp_index = index;
535
536         for (level--; level > 0; level--) {
537                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
538                                            &path[level].bp_bh);
539                 if (ret < 0)
540                         return ret;
541                 node = nilfs_btree_get_nonroot_node(btree, path, level);
542                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
543                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
544                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
545                 path[level].bp_index = index;
546         }
547
548         if (keyp != NULL)
549                 *keyp = nilfs_btree_node_get_key(btree, node, index);
550         if (ptrp != NULL)
551                 *ptrp = ptr;
552
553         return 0;
554 }
555
556 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
557                               __u64 key, int level, __u64 *ptrp)
558 {
559         struct nilfs_btree *btree;
560         struct nilfs_btree_path *path;
561         __u64 ptr;
562         int ret;
563
564         btree = (struct nilfs_btree *)bmap;
565         path = nilfs_btree_alloc_path(btree);
566         if (path == NULL)
567                 return -ENOMEM;
568         nilfs_btree_init_path(btree, path);
569
570         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
571
572         if (ptrp != NULL)
573                 *ptrp = ptr;
574
575         nilfs_btree_clear_path(btree, path);
576         nilfs_btree_free_path(btree, path);
577
578         return ret;
579 }
580
581 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
582                                     struct nilfs_btree_path *path,
583                                     int level, __u64 key)
584 {
585         if (level < nilfs_btree_height(btree) - 1) {
586                 do {
587                         lock_buffer(path[level].bp_bh);
588                         nilfs_btree_node_set_key(
589                                 btree,
590                                 nilfs_btree_get_nonroot_node(
591                                         btree, path, level),
592                                 path[level].bp_index, key);
593                         if (!buffer_dirty(path[level].bp_bh))
594                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
595                         unlock_buffer(path[level].bp_bh);
596                 } while ((path[level].bp_index == 0) &&
597                          (++level < nilfs_btree_height(btree) - 1));
598         }
599
600         /* root */
601         if (level == nilfs_btree_height(btree) - 1) {
602                 nilfs_btree_node_set_key(btree,
603                                          nilfs_btree_get_root(btree),
604                                          path[level].bp_index, key);
605         }
606 }
607
608 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
609                                   struct nilfs_btree_path *path,
610                                   int level, __u64 *keyp, __u64 *ptrp)
611 {
612         struct nilfs_btree_node *node;
613
614         if (level < nilfs_btree_height(btree) - 1) {
615                 lock_buffer(path[level].bp_bh);
616                 node = nilfs_btree_get_nonroot_node(btree, path, level);
617                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
618                                         path[level].bp_index);
619                 if (!buffer_dirty(path[level].bp_bh))
620                         nilfs_btnode_mark_dirty(path[level].bp_bh);
621                 unlock_buffer(path[level].bp_bh);
622
623                 if (path[level].bp_index == 0)
624                         nilfs_btree_promote_key(btree, path, level + 1,
625                                                 nilfs_btree_node_get_key(
626                                                         btree, node, 0));
627         } else {
628                 node = nilfs_btree_get_root(btree);
629                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
630                                         path[level].bp_index);
631         }
632 }
633
634 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
635                                    struct nilfs_btree_path *path,
636                                    int level, __u64 *keyp, __u64 *ptrp)
637 {
638         struct nilfs_btree_node *node, *left;
639         int nchildren, lnchildren, n, move;
640
641         lock_buffer(path[level].bp_bh);
642         lock_buffer(path[level].bp_sib_bh);
643
644         node = nilfs_btree_get_nonroot_node(btree, path, level);
645         left = nilfs_btree_get_sib_node(btree, path, level);
646         nchildren = nilfs_btree_node_get_nchildren(btree, node);
647         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
648         move = 0;
649
650         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
651         if (n > path[level].bp_index) {
652                 /* move insert point */
653                 n--;
654                 move = 1;
655         }
656
657         nilfs_btree_node_move_left(btree, left, node, n);
658
659         if (!buffer_dirty(path[level].bp_bh))
660                 nilfs_btnode_mark_dirty(path[level].bp_bh);
661         if (!buffer_dirty(path[level].bp_sib_bh))
662                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
663
664         unlock_buffer(path[level].bp_bh);
665         unlock_buffer(path[level].bp_sib_bh);
666
667         nilfs_btree_promote_key(btree, path, level + 1,
668                                 nilfs_btree_node_get_key(btree, node, 0));
669
670         if (move) {
671                 brelse(path[level].bp_bh);
672                 path[level].bp_bh = path[level].bp_sib_bh;
673                 path[level].bp_sib_bh = NULL;
674                 path[level].bp_index += lnchildren;
675                 path[level + 1].bp_index--;
676         } else {
677                 brelse(path[level].bp_sib_bh);
678                 path[level].bp_sib_bh = NULL;
679                 path[level].bp_index -= n;
680         }
681
682         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
683 }
684
685 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
686                                     struct nilfs_btree_path *path,
687                                     int level, __u64 *keyp, __u64 *ptrp)
688 {
689         struct nilfs_btree_node *node, *right;
690         int nchildren, rnchildren, n, move;
691
692         lock_buffer(path[level].bp_bh);
693         lock_buffer(path[level].bp_sib_bh);
694
695         node = nilfs_btree_get_nonroot_node(btree, path, level);
696         right = nilfs_btree_get_sib_node(btree, path, level);
697         nchildren = nilfs_btree_node_get_nchildren(btree, node);
698         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
699         move = 0;
700
701         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
702         if (n > nchildren - path[level].bp_index) {
703                 /* move insert point */
704                 n--;
705                 move = 1;
706         }
707
708         nilfs_btree_node_move_right(btree, node, right, n);
709
710         if (!buffer_dirty(path[level].bp_bh))
711                 nilfs_btnode_mark_dirty(path[level].bp_bh);
712         if (!buffer_dirty(path[level].bp_sib_bh))
713                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714
715         unlock_buffer(path[level].bp_bh);
716         unlock_buffer(path[level].bp_sib_bh);
717
718         path[level + 1].bp_index++;
719         nilfs_btree_promote_key(btree, path, level + 1,
720                                 nilfs_btree_node_get_key(btree, right, 0));
721         path[level + 1].bp_index--;
722
723         if (move) {
724                 brelse(path[level].bp_bh);
725                 path[level].bp_bh = path[level].bp_sib_bh;
726                 path[level].bp_sib_bh = NULL;
727                 path[level].bp_index -=
728                         nilfs_btree_node_get_nchildren(btree, node);
729                 path[level + 1].bp_index++;
730         } else {
731                 brelse(path[level].bp_sib_bh);
732                 path[level].bp_sib_bh = NULL;
733         }
734
735         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
736 }
737
738 static void nilfs_btree_split(struct nilfs_btree *btree,
739                               struct nilfs_btree_path *path,
740                               int level, __u64 *keyp, __u64 *ptrp)
741 {
742         struct nilfs_btree_node *node, *right;
743         __u64 newkey;
744         __u64 newptr;
745         int nchildren, n, move;
746
747         lock_buffer(path[level].bp_bh);
748         lock_buffer(path[level].bp_sib_bh);
749
750         node = nilfs_btree_get_nonroot_node(btree, path, level);
751         right = nilfs_btree_get_sib_node(btree, path, level);
752         nchildren = nilfs_btree_node_get_nchildren(btree, node);
753         move = 0;
754
755         n = (nchildren + 1) / 2;
756         if (n > nchildren - path[level].bp_index) {
757                 n--;
758                 move = 1;
759         }
760
761         nilfs_btree_node_move_right(btree, node, right, n);
762
763         if (!buffer_dirty(path[level].bp_bh))
764                 nilfs_btnode_mark_dirty(path[level].bp_bh);
765         if (!buffer_dirty(path[level].bp_sib_bh))
766                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
767
768         unlock_buffer(path[level].bp_bh);
769         unlock_buffer(path[level].bp_sib_bh);
770
771         newkey = nilfs_btree_node_get_key(btree, right, 0);
772         newptr = path[level].bp_newreq.bpr_ptr;
773
774         if (move) {
775                 path[level].bp_index -=
776                         nilfs_btree_node_get_nchildren(btree, node);
777                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
778                                         path[level].bp_index);
779
780                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
781                 *ptrp = path[level].bp_newreq.bpr_ptr;
782
783                 brelse(path[level].bp_bh);
784                 path[level].bp_bh = path[level].bp_sib_bh;
785                 path[level].bp_sib_bh = NULL;
786         } else {
787                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
788
789                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
790                 *ptrp = path[level].bp_newreq.bpr_ptr;
791
792                 brelse(path[level].bp_sib_bh);
793                 path[level].bp_sib_bh = NULL;
794         }
795
796         path[level + 1].bp_index++;
797 }
798
799 static void nilfs_btree_grow(struct nilfs_btree *btree,
800                              struct nilfs_btree_path *path,
801                              int level, __u64 *keyp, __u64 *ptrp)
802 {
803         struct nilfs_btree_node *root, *child;
804         int n;
805
806         lock_buffer(path[level].bp_sib_bh);
807
808         root = nilfs_btree_get_root(btree);
809         child = nilfs_btree_get_sib_node(btree, path, level);
810
811         n = nilfs_btree_node_get_nchildren(btree, root);
812
813         nilfs_btree_node_move_right(btree, root, child, n);
814         nilfs_btree_node_set_level(btree, root, level + 1);
815
816         if (!buffer_dirty(path[level].bp_sib_bh))
817                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
818
819         unlock_buffer(path[level].bp_sib_bh);
820
821         path[level].bp_bh = path[level].bp_sib_bh;
822         path[level].bp_sib_bh = NULL;
823
824         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
825
826         *keyp = nilfs_btree_node_get_key(btree, child, 0);
827         *ptrp = path[level].bp_newreq.bpr_ptr;
828 }
829
830 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
831                                    const struct nilfs_btree_path *path)
832 {
833         struct nilfs_btree_node *node;
834         int level;
835
836         if (path == NULL)
837                 return NILFS_BMAP_INVALID_PTR;
838
839         /* left sibling */
840         level = NILFS_BTREE_LEVEL_NODE_MIN;
841         if (path[level].bp_index > 0) {
842                 node = nilfs_btree_get_node(btree, path, level);
843                 return nilfs_btree_node_get_ptr(btree, node,
844                                                 path[level].bp_index - 1);
845         }
846
847         /* parent */
848         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
849         if (level <= nilfs_btree_height(btree) - 1) {
850                 node = nilfs_btree_get_node(btree, path, level);
851                 return nilfs_btree_node_get_ptr(btree, node,
852                                                 path[level].bp_index);
853         }
854
855         return NILFS_BMAP_INVALID_PTR;
856 }
857
858 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
859                                        const struct nilfs_btree_path *path,
860                                        __u64 key)
861 {
862         __u64 ptr;
863
864         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
865         if (ptr != NILFS_BMAP_INVALID_PTR)
866                 /* sequential access */
867                 return ptr;
868         else {
869                 ptr = nilfs_btree_find_near(btree, path);
870                 if (ptr != NILFS_BMAP_INVALID_PTR)
871                         /* near */
872                         return ptr;
873         }
874         /* block group */
875         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
876 }
877
878 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
879                                      __u64 ptr)
880 {
881         btree->bt_bmap.b_last_allocated_key = key;
882         btree->bt_bmap.b_last_allocated_ptr = ptr;
883 }
884
885 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
886                                       struct nilfs_btree_path *path,
887                                       int *levelp, __u64 key, __u64 ptr,
888                                       struct nilfs_bmap_stats *stats)
889 {
890         struct buffer_head *bh;
891         struct nilfs_btree_node *node, *parent, *sib;
892         __u64 sibptr;
893         int pindex, level, ret;
894
895         stats->bs_nblocks = 0;
896         level = NILFS_BTREE_LEVEL_DATA;
897
898         /* allocate a new ptr for data block */
899         if (btree->bt_ops->btop_find_target != NULL)
900                 path[level].bp_newreq.bpr_ptr =
901                         btree->bt_ops->btop_find_target(btree, path, key);
902
903         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
904                 &btree->bt_bmap, &path[level].bp_newreq);
905         if (ret < 0)
906                 goto err_out_data;
907
908         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
909              level < nilfs_btree_height(btree) - 1;
910              level++) {
911                 node = nilfs_btree_get_nonroot_node(btree, path, level);
912                 if (nilfs_btree_node_get_nchildren(btree, node) <
913                     nilfs_btree_node_nchildren_max(btree, node)) {
914                         path[level].bp_op = nilfs_btree_do_insert;
915                         stats->bs_nblocks++;
916                         goto out;
917                 }
918
919                 parent = nilfs_btree_get_node(btree, path, level + 1);
920                 pindex = path[level + 1].bp_index;
921
922                 /* left sibling */
923                 if (pindex > 0) {
924                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
925                                                           pindex - 1);
926                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
927                                                    &bh);
928                         if (ret < 0)
929                                 goto err_out_child_node;
930                         sib = (struct nilfs_btree_node *)bh->b_data;
931                         if (nilfs_btree_node_get_nchildren(btree, sib) <
932                             nilfs_btree_node_nchildren_max(btree, sib)) {
933                                 path[level].bp_sib_bh = bh;
934                                 path[level].bp_op = nilfs_btree_carry_left;
935                                 stats->bs_nblocks++;
936                                 goto out;
937                         } else
938                                 brelse(bh);
939                 }
940
941                 /* right sibling */
942                 if (pindex <
943                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
944                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
945                                                           pindex + 1);
946                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
947                                                    &bh);
948                         if (ret < 0)
949                                 goto err_out_child_node;
950                         sib = (struct nilfs_btree_node *)bh->b_data;
951                         if (nilfs_btree_node_get_nchildren(btree, sib) <
952                             nilfs_btree_node_nchildren_max(btree, sib)) {
953                                 path[level].bp_sib_bh = bh;
954                                 path[level].bp_op = nilfs_btree_carry_right;
955                                 stats->bs_nblocks++;
956                                 goto out;
957                         } else
958                                 brelse(bh);
959                 }
960
961                 /* split */
962                 path[level].bp_newreq.bpr_ptr =
963                         path[level - 1].bp_newreq.bpr_ptr + 1;
964                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
965                         &btree->bt_bmap, &path[level].bp_newreq);
966                 if (ret < 0)
967                         goto err_out_child_node;
968                 ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
969                                                path[level].bp_newreq.bpr_ptr,
970                                                &bh);
971                 if (ret < 0)
972                         goto err_out_curr_node;
973
974                 stats->bs_nblocks++;
975
976                 lock_buffer(bh);
977                 nilfs_btree_node_init(btree,
978                                       (struct nilfs_btree_node *)bh->b_data,
979                                       0, level, 0, NULL, NULL);
980                 unlock_buffer(bh);
981                 path[level].bp_sib_bh = bh;
982                 path[level].bp_op = nilfs_btree_split;
983         }
984
985         /* root */
986         node = nilfs_btree_get_root(btree);
987         if (nilfs_btree_node_get_nchildren(btree, node) <
988             nilfs_btree_node_nchildren_max(btree, node)) {
989                 path[level].bp_op = nilfs_btree_do_insert;
990                 stats->bs_nblocks++;
991                 goto out;
992         }
993
994         /* grow */
995         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
996         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
997                 &btree->bt_bmap, &path[level].bp_newreq);
998         if (ret < 0)
999                 goto err_out_child_node;
1000         ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
1001                                        path[level].bp_newreq.bpr_ptr, &bh);
1002         if (ret < 0)
1003                 goto err_out_curr_node;
1004
1005         lock_buffer(bh);
1006         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1007                               0, level, 0, NULL, NULL);
1008         unlock_buffer(bh);
1009         path[level].bp_sib_bh = bh;
1010         path[level].bp_op = nilfs_btree_grow;
1011
1012         level++;
1013         path[level].bp_op = nilfs_btree_do_insert;
1014
1015         /* a newly-created node block and a data block are added */
1016         stats->bs_nblocks += 2;
1017
1018         /* success */
1019  out:
1020         *levelp = level;
1021         return ret;
1022
1023         /* error */
1024  err_out_curr_node:
1025         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1026                                                     &path[level].bp_newreq);
1027  err_out_child_node:
1028         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1029                 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1030                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1031                         &btree->bt_bmap, &path[level].bp_newreq);
1032
1033         }
1034
1035         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1036                                                        &path[level].bp_newreq);
1037  err_out_data:
1038         *levelp = level;
1039         stats->bs_nblocks = 0;
1040         return ret;
1041 }
1042
1043 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1044                                       struct nilfs_btree_path *path,
1045                                       int maxlevel, __u64 key, __u64 ptr)
1046 {
1047         int level;
1048
1049         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1050         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1051         if (btree->bt_ops->btop_set_target != NULL)
1052                 btree->bt_ops->btop_set_target(btree, key, ptr);
1053
1054         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1055                 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1056                         btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1057                                 &btree->bt_bmap, &path[level - 1].bp_newreq);
1058                 }
1059                 path[level].bp_op(btree, path, level, &key, &ptr);
1060         }
1061
1062         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1063                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1064 }
1065
1066 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1067 {
1068         struct nilfs_btree *btree;
1069         struct nilfs_btree_path *path;
1070         struct nilfs_bmap_stats stats;
1071         int level, ret;
1072
1073         btree = (struct nilfs_btree *)bmap;
1074         path = nilfs_btree_alloc_path(btree);
1075         if (path == NULL)
1076                 return -ENOMEM;
1077         nilfs_btree_init_path(btree, path);
1078
1079         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1080                                     NILFS_BTREE_LEVEL_NODE_MIN);
1081         if (ret != -ENOENT) {
1082                 if (ret == 0)
1083                         ret = -EEXIST;
1084                 goto out;
1085         }
1086
1087         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1088         if (ret < 0)
1089                 goto out;
1090         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1091         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1092
1093  out:
1094         nilfs_btree_clear_path(btree, path);
1095         nilfs_btree_free_path(btree, path);
1096         return ret;
1097 }
1098
1099 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1100                                   struct nilfs_btree_path *path,
1101                                   int level, __u64 *keyp, __u64 *ptrp)
1102 {
1103         struct nilfs_btree_node *node;
1104
1105         if (level < nilfs_btree_height(btree) - 1) {
1106                 lock_buffer(path[level].bp_bh);
1107                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1108                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1109                                         path[level].bp_index);
1110                 if (!buffer_dirty(path[level].bp_bh))
1111                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1112                 unlock_buffer(path[level].bp_bh);
1113                 if (path[level].bp_index == 0)
1114                         nilfs_btree_promote_key(btree, path, level + 1,
1115                                 nilfs_btree_node_get_key(btree, node, 0));
1116         } else {
1117                 node = nilfs_btree_get_root(btree);
1118                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1119                                         path[level].bp_index);
1120         }
1121 }
1122
1123 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1124                                     struct nilfs_btree_path *path,
1125                                     int level, __u64 *keyp, __u64 *ptrp)
1126 {
1127         struct nilfs_btree_node *node, *left;
1128         int nchildren, lnchildren, n;
1129
1130         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1131
1132         lock_buffer(path[level].bp_bh);
1133         lock_buffer(path[level].bp_sib_bh);
1134
1135         node = nilfs_btree_get_nonroot_node(btree, path, level);
1136         left = nilfs_btree_get_sib_node(btree, path, level);
1137         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1138         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1139
1140         n = (nchildren + lnchildren) / 2 - nchildren;
1141
1142         nilfs_btree_node_move_right(btree, left, node, n);
1143
1144         if (!buffer_dirty(path[level].bp_bh))
1145                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1146         if (!buffer_dirty(path[level].bp_sib_bh))
1147                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1148
1149         unlock_buffer(path[level].bp_bh);
1150         unlock_buffer(path[level].bp_sib_bh);
1151
1152         nilfs_btree_promote_key(btree, path, level + 1,
1153                                 nilfs_btree_node_get_key(btree, node, 0));
1154
1155         brelse(path[level].bp_sib_bh);
1156         path[level].bp_sib_bh = NULL;
1157         path[level].bp_index += n;
1158 }
1159
1160 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1161                                      struct nilfs_btree_path *path,
1162                                      int level, __u64 *keyp, __u64 *ptrp)
1163 {
1164         struct nilfs_btree_node *node, *right;
1165         int nchildren, rnchildren, n;
1166
1167         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1168
1169         lock_buffer(path[level].bp_bh);
1170         lock_buffer(path[level].bp_sib_bh);
1171
1172         node = nilfs_btree_get_nonroot_node(btree, path, level);
1173         right = nilfs_btree_get_sib_node(btree, path, level);
1174         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1175         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1176
1177         n = (nchildren + rnchildren) / 2 - nchildren;
1178
1179         nilfs_btree_node_move_left(btree, node, right, n);
1180
1181         if (!buffer_dirty(path[level].bp_bh))
1182                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1183         if (!buffer_dirty(path[level].bp_sib_bh))
1184                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1185
1186         unlock_buffer(path[level].bp_bh);
1187         unlock_buffer(path[level].bp_sib_bh);
1188
1189         path[level + 1].bp_index++;
1190         nilfs_btree_promote_key(btree, path, level + 1,
1191                                 nilfs_btree_node_get_key(btree, right, 0));
1192         path[level + 1].bp_index--;
1193
1194         brelse(path[level].bp_sib_bh);
1195         path[level].bp_sib_bh = NULL;
1196 }
1197
1198 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1199                                     struct nilfs_btree_path *path,
1200                                     int level, __u64 *keyp, __u64 *ptrp)
1201 {
1202         struct nilfs_btree_node *node, *left;
1203         int n;
1204
1205         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1206
1207         lock_buffer(path[level].bp_bh);
1208         lock_buffer(path[level].bp_sib_bh);
1209
1210         node = nilfs_btree_get_nonroot_node(btree, path, level);
1211         left = nilfs_btree_get_sib_node(btree, path, level);
1212
1213         n = nilfs_btree_node_get_nchildren(btree, node);
1214
1215         nilfs_btree_node_move_left(btree, left, node, n);
1216
1217         if (!buffer_dirty(path[level].bp_sib_bh))
1218                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1219
1220         unlock_buffer(path[level].bp_bh);
1221         unlock_buffer(path[level].bp_sib_bh);
1222
1223         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1224         path[level].bp_bh = path[level].bp_sib_bh;
1225         path[level].bp_sib_bh = NULL;
1226         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1227 }
1228
1229 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1230                                      struct nilfs_btree_path *path,
1231                                      int level, __u64 *keyp, __u64 *ptrp)
1232 {
1233         struct nilfs_btree_node *node, *right;
1234         int n;
1235
1236         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1237
1238         lock_buffer(path[level].bp_bh);
1239         lock_buffer(path[level].bp_sib_bh);
1240
1241         node = nilfs_btree_get_nonroot_node(btree, path, level);
1242         right = nilfs_btree_get_sib_node(btree, path, level);
1243
1244         n = nilfs_btree_node_get_nchildren(btree, right);
1245
1246         nilfs_btree_node_move_left(btree, node, right, n);
1247
1248         if (!buffer_dirty(path[level].bp_bh))
1249                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1250
1251         unlock_buffer(path[level].bp_bh);
1252         unlock_buffer(path[level].bp_sib_bh);
1253
1254         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1255         path[level].bp_sib_bh = NULL;
1256         path[level + 1].bp_index++;
1257 }
1258
1259 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1260                                struct nilfs_btree_path *path,
1261                                int level, __u64 *keyp, __u64 *ptrp)
1262 {
1263         struct nilfs_btree_node *root, *child;
1264         int n;
1265
1266         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1267
1268         lock_buffer(path[level].bp_bh);
1269         root = nilfs_btree_get_root(btree);
1270         child = nilfs_btree_get_nonroot_node(btree, path, level);
1271
1272         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1273         nilfs_btree_node_set_level(btree, root, level);
1274         n = nilfs_btree_node_get_nchildren(btree, child);
1275         nilfs_btree_node_move_left(btree, root, child, n);
1276         unlock_buffer(path[level].bp_bh);
1277
1278         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1279         path[level].bp_bh = NULL;
1280 }
1281
1282
1283 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1284                                       struct nilfs_btree_path *path,
1285                                       int *levelp,
1286                                       struct nilfs_bmap_stats *stats)
1287 {
1288         struct buffer_head *bh;
1289         struct nilfs_btree_node *node, *parent, *sib;
1290         __u64 sibptr;
1291         int pindex, level, ret;
1292
1293         ret = 0;
1294         stats->bs_nblocks = 0;
1295         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1296              level < nilfs_btree_height(btree) - 1;
1297              level++) {
1298                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1299                 path[level].bp_oldreq.bpr_ptr =
1300                         nilfs_btree_node_get_ptr(btree, node,
1301                                                  path[level].bp_index);
1302                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1303                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1304                                 &btree->bt_bmap, &path[level].bp_oldreq);
1305                         if (ret < 0)
1306                                 goto err_out_child_node;
1307                 }
1308
1309                 if (nilfs_btree_node_get_nchildren(btree, node) >
1310                     nilfs_btree_node_nchildren_min(btree, node)) {
1311                         path[level].bp_op = nilfs_btree_do_delete;
1312                         stats->bs_nblocks++;
1313                         goto out;
1314                 }
1315
1316                 parent = nilfs_btree_get_node(btree, path, level + 1);
1317                 pindex = path[level + 1].bp_index;
1318
1319                 if (pindex > 0) {
1320                         /* left sibling */
1321                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1322                                                           pindex - 1);
1323                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1324                                                    &bh);
1325                         if (ret < 0)
1326                                 goto err_out_curr_node;
1327                         sib = (struct nilfs_btree_node *)bh->b_data;
1328                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1329                             nilfs_btree_node_nchildren_min(btree, sib)) {
1330                                 path[level].bp_sib_bh = bh;
1331                                 path[level].bp_op = nilfs_btree_borrow_left;
1332                                 stats->bs_nblocks++;
1333                                 goto out;
1334                         } else {
1335                                 path[level].bp_sib_bh = bh;
1336                                 path[level].bp_op = nilfs_btree_concat_left;
1337                                 stats->bs_nblocks++;
1338                                 /* continue; */
1339                         }
1340                 } else if (pindex <
1341                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1342                         /* right sibling */
1343                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1344                                                           pindex + 1);
1345                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1346                                                    &bh);
1347                         if (ret < 0)
1348                                 goto err_out_curr_node;
1349                         sib = (struct nilfs_btree_node *)bh->b_data;
1350                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1351                             nilfs_btree_node_nchildren_min(btree, sib)) {
1352                                 path[level].bp_sib_bh = bh;
1353                                 path[level].bp_op = nilfs_btree_borrow_right;
1354                                 stats->bs_nblocks++;
1355                                 goto out;
1356                         } else {
1357                                 path[level].bp_sib_bh = bh;
1358                                 path[level].bp_op = nilfs_btree_concat_right;
1359                                 stats->bs_nblocks++;
1360                                 /* continue; */
1361                         }
1362                 } else {
1363                         /* no siblings */
1364                         /* the only child of the root node */
1365                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1366                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1367                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1368                                 path[level].bp_op = nilfs_btree_shrink;
1369                                 stats->bs_nblocks += 2;
1370                         } else {
1371                                 path[level].bp_op = nilfs_btree_do_delete;
1372                                 stats->bs_nblocks++;
1373                         }
1374
1375                         goto out;
1376
1377                 }
1378         }
1379
1380         node = nilfs_btree_get_root(btree);
1381         path[level].bp_oldreq.bpr_ptr =
1382                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1383         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1384                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1385                         &btree->bt_bmap, &path[level].bp_oldreq);
1386                 if (ret < 0)
1387                         goto err_out_child_node;
1388         }
1389         /* child of the root node is deleted */
1390         path[level].bp_op = nilfs_btree_do_delete;
1391         stats->bs_nblocks++;
1392
1393         /* success */
1394  out:
1395         *levelp = level;
1396         return ret;
1397
1398         /* error */
1399  err_out_curr_node:
1400         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1401                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1402                         &btree->bt_bmap, &path[level].bp_oldreq);
1403  err_out_child_node:
1404         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1405                 brelse(path[level].bp_sib_bh);
1406                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1407                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1408                                 &btree->bt_bmap, &path[level].bp_oldreq);
1409         }
1410         *levelp = level;
1411         stats->bs_nblocks = 0;
1412         return ret;
1413 }
1414
1415 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1416                                       struct nilfs_btree_path *path,
1417                                       int maxlevel)
1418 {
1419         int level;
1420
1421         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1422                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1423                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1424                                 &btree->bt_bmap, &path[level].bp_oldreq);
1425                 path[level].bp_op(btree, path, level, NULL, NULL);
1426         }
1427
1428         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1429                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1430 }
1431
1432 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1433
1434 {
1435         struct nilfs_btree *btree;
1436         struct nilfs_btree_path *path;
1437         struct nilfs_bmap_stats stats;
1438         int level, ret;
1439
1440         btree = (struct nilfs_btree *)bmap;
1441         path = nilfs_btree_alloc_path(btree);
1442         if (path == NULL)
1443                 return -ENOMEM;
1444         nilfs_btree_init_path(btree, path);
1445         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1446                                     NILFS_BTREE_LEVEL_NODE_MIN);
1447         if (ret < 0)
1448                 goto out;
1449
1450         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1451         if (ret < 0)
1452                 goto out;
1453         nilfs_btree_commit_delete(btree, path, level);
1454         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1455
1456 out:
1457         nilfs_btree_clear_path(btree, path);
1458         nilfs_btree_free_path(btree, path);
1459         return ret;
1460 }
1461
1462 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1463 {
1464         struct nilfs_btree *btree;
1465         struct nilfs_btree_path *path;
1466         int ret;
1467
1468         btree = (struct nilfs_btree *)bmap;
1469         path = nilfs_btree_alloc_path(btree);
1470         if (path == NULL)
1471                 return -ENOMEM;
1472         nilfs_btree_init_path(btree, path);
1473
1474         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1475
1476         nilfs_btree_clear_path(btree, path);
1477         nilfs_btree_free_path(btree, path);
1478
1479         return ret;
1480 }
1481
1482 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1483 {
1484         struct buffer_head *bh;
1485         struct nilfs_btree *btree;
1486         struct nilfs_btree_node *root, *node;
1487         __u64 maxkey, nextmaxkey;
1488         __u64 ptr;
1489         int nchildren, ret;
1490
1491         btree = (struct nilfs_btree *)bmap;
1492         root = nilfs_btree_get_root(btree);
1493         switch (nilfs_btree_height(btree)) {
1494         case 2:
1495                 bh = NULL;
1496                 node = root;
1497                 break;
1498         case 3:
1499                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1500                 if (nchildren > 1)
1501                         return 0;
1502                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1503                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1504                 if (ret < 0)
1505                         return ret;
1506                 node = (struct nilfs_btree_node *)bh->b_data;
1507                 break;
1508         default:
1509                 return 0;
1510         }
1511
1512         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1513         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1514         nextmaxkey = (nchildren > 1) ?
1515                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1516         if (bh != NULL)
1517                 brelse(bh);
1518
1519         return (maxkey == key) && (nextmaxkey < bmap->b_low);
1520 }
1521
1522 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1523                                    __u64 *keys, __u64 *ptrs, int nitems)
1524 {
1525         struct buffer_head *bh;
1526         struct nilfs_btree *btree;
1527         struct nilfs_btree_node *node, *root;
1528         __le64 *dkeys;
1529         __le64 *dptrs;
1530         __u64 ptr;
1531         int nchildren, i, ret;
1532
1533         btree = (struct nilfs_btree *)bmap;
1534         root = nilfs_btree_get_root(btree);
1535         switch (nilfs_btree_height(btree)) {
1536         case 2:
1537                 bh = NULL;
1538                 node = root;
1539                 break;
1540         case 3:
1541                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1542                 WARN_ON(nchildren > 1);
1543                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1544                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1545                 if (ret < 0)
1546                         return ret;
1547                 node = (struct nilfs_btree_node *)bh->b_data;
1548                 break;
1549         default:
1550                 node = NULL;
1551                 return -EINVAL;
1552         }
1553
1554         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1555         if (nchildren < nitems)
1556                 nitems = nchildren;
1557         dkeys = nilfs_btree_node_dkeys(btree, node);
1558         dptrs = nilfs_btree_node_dptrs(btree, node);
1559         for (i = 0; i < nitems; i++) {
1560                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1561                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1562         }
1563
1564         if (bh != NULL)
1565                 brelse(bh);
1566
1567         return nitems;
1568 }
1569
1570 static int
1571 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1572                                        union nilfs_bmap_ptr_req *dreq,
1573                                        union nilfs_bmap_ptr_req *nreq,
1574                                        struct buffer_head **bhp,
1575                                        struct nilfs_bmap_stats *stats)
1576 {
1577         struct buffer_head *bh;
1578         struct nilfs_btree *btree;
1579         int ret;
1580
1581         btree = (struct nilfs_btree *)bmap;
1582         stats->bs_nblocks = 0;
1583
1584         /* for data */
1585         /* cannot find near ptr */
1586         if (btree->bt_ops->btop_find_target != NULL)
1587                 dreq->bpr_ptr
1588                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1589         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1590         if (ret < 0)
1591                 return ret;
1592
1593         *bhp = NULL;
1594         stats->bs_nblocks++;
1595         if (nreq != NULL) {
1596                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1597                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1598                 if (ret < 0)
1599                         goto err_out_dreq;
1600
1601                 ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
1602                 if (ret < 0)
1603                         goto err_out_nreq;
1604
1605                 *bhp = bh;
1606                 stats->bs_nblocks++;
1607         }
1608
1609         /* success */
1610         return 0;
1611
1612         /* error */
1613  err_out_nreq:
1614         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1615  err_out_dreq:
1616         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1617         stats->bs_nblocks = 0;
1618         return ret;
1619
1620 }
1621
1622 static void
1623 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1624                                       __u64 key, __u64 ptr,
1625                                       const __u64 *keys, const __u64 *ptrs,
1626                                       int n, __u64 low, __u64 high,
1627                                       union nilfs_bmap_ptr_req *dreq,
1628                                       union nilfs_bmap_ptr_req *nreq,
1629                                       struct buffer_head *bh)
1630 {
1631         struct nilfs_btree *btree;
1632         struct nilfs_btree_node *node;
1633         __u64 tmpptr;
1634
1635         /* free resources */
1636         if (bmap->b_ops->bop_clear != NULL)
1637                 bmap->b_ops->bop_clear(bmap);
1638
1639         /* ptr must be a pointer to a buffer head. */
1640         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1641
1642         /* convert and insert */
1643         btree = (struct nilfs_btree *)bmap;
1644         nilfs_btree_init(bmap, low, high);
1645         if (nreq != NULL) {
1646                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1647                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1648                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1649                 }
1650
1651                 /* create child node at level 1 */
1652                 lock_buffer(bh);
1653                 node = (struct nilfs_btree_node *)bh->b_data;
1654                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1655                 nilfs_btree_node_insert(btree, node,
1656                                         key, dreq->bpr_ptr, n);
1657                 if (!buffer_dirty(bh))
1658                         nilfs_btnode_mark_dirty(bh);
1659                 if (!nilfs_bmap_dirty(bmap))
1660                         nilfs_bmap_set_dirty(bmap);
1661
1662                 unlock_buffer(bh);
1663                 brelse(bh);
1664
1665                 /* create root node at level 2 */
1666                 node = nilfs_btree_get_root(btree);
1667                 tmpptr = nreq->bpr_ptr;
1668                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1669                                       2, 1, &keys[0], &tmpptr);
1670         } else {
1671                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1672                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1673
1674                 /* create root node at level 1 */
1675                 node = nilfs_btree_get_root(btree);
1676                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1677                                       1, n, keys, ptrs);
1678                 nilfs_btree_node_insert(btree, node,
1679                                         key, dreq->bpr_ptr, n);
1680                 if (!nilfs_bmap_dirty(bmap))
1681                         nilfs_bmap_set_dirty(bmap);
1682         }
1683
1684         if (btree->bt_ops->btop_set_target != NULL)
1685                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1686 }
1687
1688 /**
1689  * nilfs_btree_convert_and_insert -
1690  * @bmap:
1691  * @key:
1692  * @ptr:
1693  * @keys:
1694  * @ptrs:
1695  * @n:
1696  * @low:
1697  * @high:
1698  */
1699 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1700                                    __u64 key, __u64 ptr,
1701                                    const __u64 *keys, const __u64 *ptrs,
1702                                    int n, __u64 low, __u64 high)
1703 {
1704         struct buffer_head *bh;
1705         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1706         struct nilfs_bmap_stats stats;
1707         int ret;
1708
1709         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1710                 di = &dreq;
1711                 ni = NULL;
1712         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1713                            1 << bmap->b_inode->i_blkbits)) {
1714                 di = &dreq;
1715                 ni = &nreq;
1716         } else {
1717                 di = NULL;
1718                 ni = NULL;
1719                 BUG();
1720         }
1721
1722         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1723                                                      &stats);
1724         if (ret < 0)
1725                 return ret;
1726         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1727                                               low, high, di, ni, bh);
1728         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1729         return 0;
1730 }
1731
1732 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1733                                    struct nilfs_btree_path *path,
1734                                    int level,
1735                                    struct buffer_head *bh)
1736 {
1737         while ((++level < nilfs_btree_height(btree) - 1) &&
1738                !buffer_dirty(path[level].bp_bh))
1739                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1740
1741         return 0;
1742 }
1743
1744 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1745                                         struct nilfs_btree_path *path,
1746                                         int level)
1747 {
1748         struct nilfs_btree_node *parent;
1749         int ret;
1750
1751         parent = nilfs_btree_get_node(btree, path, level + 1);
1752         path[level].bp_oldreq.bpr_ptr =
1753                 nilfs_btree_node_get_ptr(btree, parent,
1754                                          path[level + 1].bp_index);
1755         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1756         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1757                                         &path[level].bp_oldreq,
1758                                         &path[level].bp_newreq);
1759         if (ret < 0)
1760                 return ret;
1761
1762         if (buffer_nilfs_node(path[level].bp_bh)) {
1763                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1764                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1765                 path[level].bp_ctxt.bh = path[level].bp_bh;
1766                 ret = nilfs_btnode_prepare_change_key(
1767                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1768                         &path[level].bp_ctxt);
1769                 if (ret < 0) {
1770                         nilfs_bmap_abort_update(&btree->bt_bmap,
1771                                                 &path[level].bp_oldreq,
1772                                                 &path[level].bp_newreq);
1773                         return ret;
1774                 }
1775         }
1776
1777         return 0;
1778 }
1779
1780 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1781                                         struct nilfs_btree_path *path,
1782                                         int level)
1783 {
1784         struct nilfs_btree_node *parent;
1785
1786         nilfs_bmap_commit_update(&btree->bt_bmap,
1787                                  &path[level].bp_oldreq,
1788                                  &path[level].bp_newreq);
1789
1790         if (buffer_nilfs_node(path[level].bp_bh)) {
1791                 nilfs_btnode_commit_change_key(
1792                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1793                         &path[level].bp_ctxt);
1794                 path[level].bp_bh = path[level].bp_ctxt.bh;
1795         }
1796         set_buffer_nilfs_volatile(path[level].bp_bh);
1797
1798         parent = nilfs_btree_get_node(btree, path, level + 1);
1799         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1800                                  path[level].bp_newreq.bpr_ptr);
1801 }
1802
1803 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1804                                        struct nilfs_btree_path *path,
1805                                        int level)
1806 {
1807         nilfs_bmap_abort_update(&btree->bt_bmap,
1808                                 &path[level].bp_oldreq,
1809                                 &path[level].bp_newreq);
1810         if (buffer_nilfs_node(path[level].bp_bh))
1811                 nilfs_btnode_abort_change_key(
1812                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1813                         &path[level].bp_ctxt);
1814 }
1815
1816 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1817                                            struct nilfs_btree_path *path,
1818                                            int minlevel,
1819                                            int *maxlevelp)
1820 {
1821         int level, ret;
1822
1823         level = minlevel;
1824         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1825                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1826                 if (ret < 0)
1827                         return ret;
1828         }
1829         while ((++level < nilfs_btree_height(btree) - 1) &&
1830                !buffer_dirty(path[level].bp_bh)) {
1831
1832                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1833                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1834                 if (ret < 0)
1835                         goto out;
1836         }
1837
1838         /* success */
1839         *maxlevelp = level - 1;
1840         return 0;
1841
1842         /* error */
1843  out:
1844         while (--level > minlevel)
1845                 nilfs_btree_abort_update_v(btree, path, level);
1846         if (!buffer_nilfs_volatile(path[level].bp_bh))
1847                 nilfs_btree_abort_update_v(btree, path, level);
1848         return ret;
1849 }
1850
1851 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1852                                            struct nilfs_btree_path *path,
1853                                            int minlevel,
1854                                            int maxlevel,
1855                                            struct buffer_head *bh)
1856 {
1857         int level;
1858
1859         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1860                 nilfs_btree_commit_update_v(btree, path, minlevel);
1861
1862         for (level = minlevel + 1; level <= maxlevel; level++)
1863                 nilfs_btree_commit_update_v(btree, path, level);
1864 }
1865
1866 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1867                                    struct nilfs_btree_path *path,
1868                                    int level,
1869                                    struct buffer_head *bh)
1870 {
1871         int maxlevel, ret;
1872         struct nilfs_btree_node *parent;
1873         __u64 ptr;
1874
1875         get_bh(bh);
1876         path[level].bp_bh = bh;
1877         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1878         if (ret < 0)
1879                 goto out;
1880
1881         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1882                 parent = nilfs_btree_get_node(btree, path, level + 1);
1883                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1884                                                path[level + 1].bp_index);
1885                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1886                 if (ret < 0)
1887                         goto out;
1888         }
1889
1890         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1891
1892  out:
1893         brelse(path[level].bp_bh);
1894         path[level].bp_bh = NULL;
1895         return ret;
1896 }
1897
1898 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1899                                  struct buffer_head *bh)
1900 {
1901         struct nilfs_btree *btree;
1902         struct nilfs_btree_path *path;
1903         struct nilfs_btree_node *node;
1904         __u64 key;
1905         int level, ret;
1906
1907         WARN_ON(!buffer_dirty(bh));
1908
1909         btree = (struct nilfs_btree *)bmap;
1910         path = nilfs_btree_alloc_path(btree);
1911         if (path == NULL)
1912                 return -ENOMEM;
1913         nilfs_btree_init_path(btree, path);
1914
1915         if (buffer_nilfs_node(bh)) {
1916                 node = (struct nilfs_btree_node *)bh->b_data;
1917                 key = nilfs_btree_node_get_key(btree, node, 0);
1918                 level = nilfs_btree_node_get_level(btree, node);
1919         } else {
1920                 key = nilfs_bmap_data_get_key(bmap, bh);
1921                 level = NILFS_BTREE_LEVEL_DATA;
1922         }
1923
1924         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1925         if (ret < 0) {
1926                 if (unlikely(ret == -ENOENT))
1927                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1928                                __func__, (unsigned long long)key, level);
1929                 goto out;
1930         }
1931
1932         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1933
1934  out:
1935         nilfs_btree_clear_path(btree, path);
1936         nilfs_btree_free_path(btree, path);
1937
1938         return ret;
1939 }
1940
1941 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1942                                     struct buffer_head *bh)
1943 {
1944         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1945 }
1946
1947 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1948                                          struct list_head *lists,
1949                                          struct buffer_head *bh)
1950 {
1951         struct list_head *head;
1952         struct buffer_head *cbh;
1953         struct nilfs_btree_node *node, *cnode;
1954         __u64 key, ckey;
1955         int level;
1956
1957         get_bh(bh);
1958         node = (struct nilfs_btree_node *)bh->b_data;
1959         key = nilfs_btree_node_get_key(btree, node, 0);
1960         level = nilfs_btree_node_get_level(btree, node);
1961         list_for_each(head, &lists[level]) {
1962                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1963                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1964                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1965                 if (key < ckey)
1966                         break;
1967         }
1968         list_add_tail(&bh->b_assoc_buffers, head);
1969 }
1970
1971 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1972                                              struct list_head *listp)
1973 {
1974         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1975         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1976         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1977         struct pagevec pvec;
1978         struct buffer_head *bh, *head;
1979         pgoff_t index = 0;
1980         int level, i;
1981
1982         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1983              level < NILFS_BTREE_LEVEL_MAX;
1984              level++)
1985                 INIT_LIST_HEAD(&lists[level]);
1986
1987         pagevec_init(&pvec, 0);
1988
1989         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1990                                   PAGEVEC_SIZE)) {
1991                 for (i = 0; i < pagevec_count(&pvec); i++) {
1992                         bh = head = page_buffers(pvec.pages[i]);
1993                         do {
1994                                 if (buffer_dirty(bh))
1995                                         nilfs_btree_add_dirty_buffer(btree,
1996                                                                      lists, bh);
1997                         } while ((bh = bh->b_this_page) != head);
1998                 }
1999                 pagevec_release(&pvec);
2000                 cond_resched();
2001         }
2002
2003         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2004              level < NILFS_BTREE_LEVEL_MAX;
2005              level++)
2006                 list_splice(&lists[level], listp->prev);
2007 }
2008
2009 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2010                                 struct nilfs_btree_path *path,
2011                                 int level,
2012                                 struct buffer_head **bh,
2013                                 sector_t blocknr,
2014                                 union nilfs_binfo *binfo)
2015 {
2016         struct nilfs_btree_node *parent;
2017         __u64 key;
2018         __u64 ptr;
2019         int ret;
2020
2021         parent = nilfs_btree_get_node(btree, path, level + 1);
2022         ptr = nilfs_btree_node_get_ptr(btree, parent,
2023                                        path[level + 1].bp_index);
2024         if (buffer_nilfs_node(*bh)) {
2025                 path[level].bp_ctxt.oldkey = ptr;
2026                 path[level].bp_ctxt.newkey = blocknr;
2027                 path[level].bp_ctxt.bh = *bh;
2028                 ret = nilfs_btnode_prepare_change_key(
2029                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2030                         &path[level].bp_ctxt);
2031                 if (ret < 0)
2032                         return ret;
2033                 nilfs_btnode_commit_change_key(
2034                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2035                         &path[level].bp_ctxt);
2036                 *bh = path[level].bp_ctxt.bh;
2037         }
2038
2039         nilfs_btree_node_set_ptr(btree, parent,
2040                                  path[level + 1].bp_index, blocknr);
2041
2042         key = nilfs_btree_node_get_key(btree, parent,
2043                                        path[level + 1].bp_index);
2044         /* on-disk format */
2045         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2046         binfo->bi_dat.bi_level = level;
2047
2048         return 0;
2049 }
2050
2051 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2052                                 struct nilfs_btree_path *path,
2053                                 int level,
2054                                 struct buffer_head **bh,
2055                                 sector_t blocknr,
2056                                 union nilfs_binfo *binfo)
2057 {
2058         struct nilfs_btree_node *parent;
2059         __u64 key;
2060         __u64 ptr;
2061         union nilfs_bmap_ptr_req req;
2062         int ret;
2063
2064         parent = nilfs_btree_get_node(btree, path, level + 1);
2065         ptr = nilfs_btree_node_get_ptr(btree, parent,
2066                                        path[level + 1].bp_index);
2067         req.bpr_ptr = ptr;
2068         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2069         if (unlikely(ret < 0))
2070                 return ret;
2071
2072         key = nilfs_btree_node_get_key(btree, parent,
2073                                        path[level + 1].bp_index);
2074         /* on-disk format */
2075         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2076         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2077
2078         return 0;
2079 }
2080
2081 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2082                               struct buffer_head **bh,
2083                               sector_t blocknr,
2084                               union nilfs_binfo *binfo)
2085 {
2086         struct nilfs_btree *btree;
2087         struct nilfs_btree_path *path;
2088         struct nilfs_btree_node *node;
2089         __u64 key;
2090         int level, ret;
2091
2092         btree = (struct nilfs_btree *)bmap;
2093         path = nilfs_btree_alloc_path(btree);
2094         if (path == NULL)
2095                 return -ENOMEM;
2096         nilfs_btree_init_path(btree, path);
2097
2098         if (buffer_nilfs_node(*bh)) {
2099                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2100                 key = nilfs_btree_node_get_key(btree, node, 0);
2101                 level = nilfs_btree_node_get_level(btree, node);
2102         } else {
2103                 key = nilfs_bmap_data_get_key(bmap, *bh);
2104                 level = NILFS_BTREE_LEVEL_DATA;
2105         }
2106
2107         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2108         if (ret < 0) {
2109                 WARN_ON(ret == -ENOENT);
2110                 goto out;
2111         }
2112
2113         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2114                                             blocknr, binfo);
2115
2116  out:
2117         nilfs_btree_clear_path(btree, path);
2118         nilfs_btree_free_path(btree, path);
2119
2120         return ret;
2121 }
2122
2123 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2124                                  struct buffer_head **bh,
2125                                  sector_t blocknr,
2126                                  union nilfs_binfo *binfo)
2127 {
2128         struct nilfs_btree *btree;
2129         struct nilfs_btree_node *node;
2130         __u64 key;
2131         int ret;
2132
2133         btree = (struct nilfs_btree *)bmap;
2134         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2135         if (ret < 0)
2136                 return ret;
2137
2138         if (buffer_nilfs_node(*bh)) {
2139                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2140                 key = nilfs_btree_node_get_key(btree, node, 0);
2141         } else
2142                 key = nilfs_bmap_data_get_key(bmap, *bh);
2143
2144         /* on-disk format */
2145         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2146         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2147
2148         return 0;
2149 }
2150
2151 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2152 {
2153         struct buffer_head *bh;
2154         struct nilfs_btree *btree;
2155         struct nilfs_btree_path *path;
2156         __u64 ptr;
2157         int ret;
2158
2159         btree = (struct nilfs_btree *)bmap;
2160         path = nilfs_btree_alloc_path(btree);
2161         if (path == NULL)
2162                 return -ENOMEM;
2163         nilfs_btree_init_path(btree, path);
2164
2165         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2166         if (ret < 0) {
2167                 WARN_ON(ret == -ENOENT);
2168                 goto out;
2169         }
2170         ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
2171         if (ret < 0) {
2172                 WARN_ON(ret == -ENOENT);
2173                 goto out;
2174         }
2175
2176         if (!buffer_dirty(bh))
2177                 nilfs_btnode_mark_dirty(bh);
2178         brelse(bh);
2179         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2180                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2181
2182  out:
2183         nilfs_btree_clear_path(btree, path);
2184         nilfs_btree_free_path(btree, path);
2185         return ret;
2186 }
2187
2188 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2189         .bop_lookup             =       nilfs_btree_lookup,
2190         .bop_insert             =       nilfs_btree_insert,
2191         .bop_delete             =       nilfs_btree_delete,
2192         .bop_clear              =       NULL,
2193
2194         .bop_propagate          =       nilfs_btree_propagate,
2195
2196         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2197
2198         .bop_assign             =       nilfs_btree_assign,
2199         .bop_mark               =       nilfs_btree_mark,
2200
2201         .bop_last_key           =       nilfs_btree_last_key,
2202         .bop_check_insert       =       NULL,
2203         .bop_check_delete       =       nilfs_btree_check_delete,
2204         .bop_gather_data        =       nilfs_btree_gather_data,
2205 };
2206
2207 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2208         .bop_lookup             =       NULL,
2209         .bop_insert             =       NULL,
2210         .bop_delete             =       NULL,
2211         .bop_clear              =       NULL,
2212
2213         .bop_propagate          =       nilfs_btree_propagate_gc,
2214
2215         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2216
2217         .bop_assign             =       nilfs_btree_assign_gc,
2218         .bop_mark               =       NULL,
2219
2220         .bop_last_key           =       NULL,
2221         .bop_check_insert       =       NULL,
2222         .bop_check_delete       =       NULL,
2223         .bop_gather_data        =       NULL,
2224 };
2225
2226 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2227         .btop_find_target       =       nilfs_btree_find_target_v,
2228         .btop_set_target        =       nilfs_btree_set_target_v,
2229         .btop_propagate         =       nilfs_btree_propagate_v,
2230         .btop_assign            =       nilfs_btree_assign_v,
2231 };
2232
2233 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2234         .btop_find_target       =       NULL,
2235         .btop_set_target        =       NULL,
2236         .btop_propagate         =       nilfs_btree_propagate_p,
2237         .btop_assign            =       nilfs_btree_assign_p,
2238 };
2239
2240 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2241 {
2242         struct nilfs_btree *btree;
2243
2244         btree = (struct nilfs_btree *)bmap;
2245         bmap->b_ops = &nilfs_btree_ops;
2246         bmap->b_low = low;
2247         bmap->b_high = high;
2248         switch (bmap->b_inode->i_ino) {
2249         case NILFS_DAT_INO:
2250                 btree->bt_ops = &nilfs_btree_ops_p;
2251                 break;
2252         default:
2253                 btree->bt_ops = &nilfs_btree_ops_v;
2254                 break;
2255         }
2256
2257         return 0;
2258 }
2259
2260 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2261 {
2262         bmap->b_low = NILFS_BMAP_LARGE_LOW;
2263         bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2264         bmap->b_ops = &nilfs_btree_ops_gc;
2265 }