]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/nilfs2/btree.c
nilfs2: remove bmap pointer operations
[mv-sheeva.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         brelse(path[level].bp_bh);
113                         path[level].bp_bh = NULL;
114                 }
115                 /* sib_bh is released or deleted by prepare or commit
116                  * operations. */
117                 path[level].bp_sib_bh = NULL;
118                 path[level].bp_index = 0;
119                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
120                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_op = NULL;
122         }
123 }
124
125 /*
126  * B-tree node operations
127  */
128 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
129                                  struct buffer_head **bhp)
130 {
131         struct address_space *btnc =
132                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
133         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
134 }
135
136 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
137                                      __u64 ptr, struct buffer_head **bhp)
138 {
139         struct address_space *btnc =
140                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
141         int ret;
142
143         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
144         if (!ret)
145                 set_buffer_nilfs_volatile(*bhp);
146         return ret;
147 }
148
149 static inline int
150 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
151                            const struct nilfs_btree_node *node)
152 {
153         return node->bn_flags;
154 }
155
156 static inline void
157 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
158                            struct nilfs_btree_node *node,
159                            int flags)
160 {
161         node->bn_flags = flags;
162 }
163
164 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
165                                         const struct nilfs_btree_node *node)
166 {
167         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
168 }
169
170 static inline int
171 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
172                            const struct nilfs_btree_node *node)
173 {
174         return node->bn_level;
175 }
176
177 static inline void
178 nilfs_btree_node_set_level(struct nilfs_btree *btree,
179                            struct nilfs_btree_node *node,
180                            int level)
181 {
182         node->bn_level = level;
183 }
184
185 static inline int
186 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
187                                const struct nilfs_btree_node *node)
188 {
189         return le16_to_cpu(node->bn_nchildren);
190 }
191
192 static inline void
193 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
194                                struct nilfs_btree_node *node,
195                                int nchildren)
196 {
197         node->bn_nchildren = cpu_to_le16(nchildren);
198 }
199
200 static inline int
201 nilfs_btree_node_size(const struct nilfs_btree *btree)
202 {
203         return 1 << btree->bt_bmap.b_inode->i_blkbits;
204 }
205
206 static inline int
207 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
208                                const struct nilfs_btree_node *node)
209 {
210         return nilfs_btree_node_root(btree, node) ?
211                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
212                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
213 }
214
215 static inline int
216 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
217                                const struct nilfs_btree_node *node)
218 {
219         return nilfs_btree_node_root(btree, node) ?
220                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
221                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
222 }
223
224 static inline __le64 *
225 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
226                        const struct nilfs_btree_node *node)
227 {
228         return (__le64 *)((char *)(node + 1) +
229                           (nilfs_btree_node_root(btree, node) ?
230                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
231 }
232
233 static inline __le64 *
234 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
235                        const struct nilfs_btree_node *node)
236 {
237         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
238                           nilfs_btree_node_nchildren_max(btree, node));
239 }
240
241 static inline __u64
242 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
243                          const struct nilfs_btree_node *node, int index)
244 {
245         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
246                                         index));
247 }
248
249 static inline void
250 nilfs_btree_node_set_key(struct nilfs_btree *btree,
251                          struct nilfs_btree_node *node, int index, __u64 key)
252 {
253         *(nilfs_btree_node_dkeys(btree, node) + index) =
254                 nilfs_bmap_key_to_dkey(key);
255 }
256
257 static inline __u64
258 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
259                          const struct nilfs_btree_node *node,
260                          int index)
261 {
262         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
263                                         index));
264 }
265
266 static inline void
267 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
268                          struct nilfs_btree_node *node,
269                          int index,
270                          __u64 ptr)
271 {
272         *(nilfs_btree_node_dptrs(btree, node) + index) =
273                 nilfs_bmap_ptr_to_dptr(ptr);
274 }
275
276 static void nilfs_btree_node_init(struct nilfs_btree *btree,
277                                   struct nilfs_btree_node *node,
278                                   int flags, int level, int nchildren,
279                                   const __u64 *keys, const __u64 *ptrs)
280 {
281         __le64 *dkeys;
282         __le64 *dptrs;
283         int i;
284
285         nilfs_btree_node_set_flags(btree, node, flags);
286         nilfs_btree_node_set_level(btree, node, level);
287         nilfs_btree_node_set_nchildren(btree, node, nchildren);
288
289         dkeys = nilfs_btree_node_dkeys(btree, node);
290         dptrs = nilfs_btree_node_dptrs(btree, node);
291         for (i = 0; i < nchildren; i++) {
292                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
293                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
294         }
295 }
296
297 /* Assume the buffer heads corresponding to left and right are locked. */
298 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
299                                        struct nilfs_btree_node *left,
300                                        struct nilfs_btree_node *right,
301                                        int n)
302 {
303         __le64 *ldkeys, *rdkeys;
304         __le64 *ldptrs, *rdptrs;
305         int lnchildren, rnchildren;
306
307         ldkeys = nilfs_btree_node_dkeys(btree, left);
308         ldptrs = nilfs_btree_node_dptrs(btree, left);
309         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
310
311         rdkeys = nilfs_btree_node_dkeys(btree, right);
312         rdptrs = nilfs_btree_node_dptrs(btree, right);
313         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
314
315         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
316         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
317         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
318         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
319
320         lnchildren += n;
321         rnchildren -= n;
322         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
323         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
324 }
325
326 /* Assume that the buffer heads corresponding to left and right are locked. */
327 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
328                                         struct nilfs_btree_node *left,
329                                         struct nilfs_btree_node *right,
330                                         int n)
331 {
332         __le64 *ldkeys, *rdkeys;
333         __le64 *ldptrs, *rdptrs;
334         int lnchildren, rnchildren;
335
336         ldkeys = nilfs_btree_node_dkeys(btree, left);
337         ldptrs = nilfs_btree_node_dptrs(btree, left);
338         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
339
340         rdkeys = nilfs_btree_node_dkeys(btree, right);
341         rdptrs = nilfs_btree_node_dptrs(btree, right);
342         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
343
344         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
345         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
346         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
347         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
348
349         lnchildren -= n;
350         rnchildren += n;
351         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
352         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
353 }
354
355 /* Assume that the buffer head corresponding to node is locked. */
356 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
357                                     struct nilfs_btree_node *node,
358                                     __u64 key, __u64 ptr, int index)
359 {
360         __le64 *dkeys;
361         __le64 *dptrs;
362         int nchildren;
363
364         dkeys = nilfs_btree_node_dkeys(btree, node);
365         dptrs = nilfs_btree_node_dptrs(btree, node);
366         nchildren = nilfs_btree_node_get_nchildren(btree, node);
367         if (index < nchildren) {
368                 memmove(dkeys + index + 1, dkeys + index,
369                         (nchildren - index) * sizeof(*dkeys));
370                 memmove(dptrs + index + 1, dptrs + index,
371                         (nchildren - index) * sizeof(*dptrs));
372         }
373         dkeys[index] = nilfs_bmap_key_to_dkey(key);
374         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
375         nchildren++;
376         nilfs_btree_node_set_nchildren(btree, node, nchildren);
377 }
378
379 /* Assume that the buffer head corresponding to node is locked. */
380 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
381                                     struct nilfs_btree_node *node,
382                                     __u64 *keyp, __u64 *ptrp, int index)
383 {
384         __u64 key;
385         __u64 ptr;
386         __le64 *dkeys;
387         __le64 *dptrs;
388         int nchildren;
389
390         dkeys = nilfs_btree_node_dkeys(btree, node);
391         dptrs = nilfs_btree_node_dptrs(btree, node);
392         key = nilfs_bmap_dkey_to_key(dkeys[index]);
393         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
394         nchildren = nilfs_btree_node_get_nchildren(btree, node);
395         if (keyp != NULL)
396                 *keyp = key;
397         if (ptrp != NULL)
398                 *ptrp = ptr;
399
400         if (index < nchildren - 1) {
401                 memmove(dkeys + index, dkeys + index + 1,
402                         (nchildren - index - 1) * sizeof(*dkeys));
403                 memmove(dptrs + index, dptrs + index + 1,
404                         (nchildren - index - 1) * sizeof(*dptrs));
405         }
406         nchildren--;
407         nilfs_btree_node_set_nchildren(btree, node, nchildren);
408 }
409
410 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
411                                    const struct nilfs_btree_node *node,
412                                    __u64 key, int *indexp)
413 {
414         __u64 nkey;
415         int index, low, high, s;
416
417         /* binary search */
418         low = 0;
419         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
420         index = 0;
421         s = 0;
422         while (low <= high) {
423                 index = (low + high) / 2;
424                 nkey = nilfs_btree_node_get_key(btree, node, index);
425                 if (nkey == key) {
426                         s = 0;
427                         goto out;
428                 } else if (nkey < key) {
429                         low = index + 1;
430                         s = -1;
431                 } else {
432                         high = index - 1;
433                         s = 1;
434                 }
435         }
436
437         /* adjust index */
438         if (nilfs_btree_node_get_level(btree, node) >
439             NILFS_BTREE_LEVEL_NODE_MIN) {
440                 if ((s > 0) && (index > 0))
441                         index--;
442         } else if (s < 0)
443                 index++;
444
445  out:
446         *indexp = index;
447
448         return s == 0;
449 }
450
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_root(const struct nilfs_btree *btree)
453 {
454         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
455 }
456
457 static inline struct nilfs_btree_node *
458 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
459                              const struct nilfs_btree_path *path,
460                              int level)
461 {
462         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
463 }
464
465 static inline struct nilfs_btree_node *
466 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
467                          const struct nilfs_btree_path *path,
468                          int level)
469 {
470         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
471 }
472
473 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
474 {
475         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
476                 + 1;
477 }
478
479 static inline struct nilfs_btree_node *
480 nilfs_btree_get_node(const struct nilfs_btree *btree,
481                      const struct nilfs_btree_path *path,
482                      int level)
483 {
484         return (level == nilfs_btree_height(btree) - 1) ?
485                 nilfs_btree_get_root(btree) :
486                 nilfs_btree_get_nonroot_node(btree, path, level);
487 }
488
489 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
490                                  struct nilfs_btree_path *path,
491                                  __u64 key, __u64 *ptrp, int minlevel)
492 {
493         struct nilfs_btree_node *node;
494         __u64 ptr;
495         int level, index, found, ret;
496
497         node = nilfs_btree_get_root(btree);
498         level = nilfs_btree_node_get_level(btree, node);
499         if ((level < minlevel) ||
500             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
501                 return -ENOENT;
502
503         found = nilfs_btree_node_lookup(btree, node, key, &index);
504         ptr = nilfs_btree_node_get_ptr(btree, node, index);
505         path[level].bp_bh = NULL;
506         path[level].bp_index = index;
507
508         for (level--; level >= minlevel; level--) {
509                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
510                 if (ret < 0)
511                         return ret;
512                 node = nilfs_btree_get_nonroot_node(btree, path, level);
513                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
514                 if (!found)
515                         found = nilfs_btree_node_lookup(btree, node, key,
516                                                         &index);
517                 else
518                         index = 0;
519                 if (index < nilfs_btree_node_nchildren_max(btree, node))
520                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
521                 else {
522                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
523                         /* insert */
524                         ptr = NILFS_BMAP_INVALID_PTR;
525                 }
526                 path[level].bp_index = index;
527         }
528         if (!found)
529                 return -ENOENT;
530
531         if (ptrp != NULL)
532                 *ptrp = ptr;
533
534         return 0;
535 }
536
537 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
538                                       struct nilfs_btree_path *path,
539                                       __u64 *keyp, __u64 *ptrp)
540 {
541         struct nilfs_btree_node *node;
542         __u64 ptr;
543         int index, level, ret;
544
545         node = nilfs_btree_get_root(btree);
546         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
547         if (index < 0)
548                 return -ENOENT;
549         level = nilfs_btree_node_get_level(btree, node);
550         ptr = nilfs_btree_node_get_ptr(btree, node, index);
551         path[level].bp_bh = NULL;
552         path[level].bp_index = index;
553
554         for (level--; level > 0; level--) {
555                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
556                 if (ret < 0)
557                         return ret;
558                 node = nilfs_btree_get_nonroot_node(btree, path, level);
559                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
560                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
561                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
562                 path[level].bp_index = index;
563         }
564
565         if (keyp != NULL)
566                 *keyp = nilfs_btree_node_get_key(btree, node, index);
567         if (ptrp != NULL)
568                 *ptrp = ptr;
569
570         return 0;
571 }
572
573 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
574                               __u64 key, int level, __u64 *ptrp)
575 {
576         struct nilfs_btree *btree;
577         struct nilfs_btree_path *path;
578         __u64 ptr;
579         int ret;
580
581         btree = (struct nilfs_btree *)bmap;
582         path = nilfs_btree_alloc_path(btree);
583         if (path == NULL)
584                 return -ENOMEM;
585         nilfs_btree_init_path(btree, path);
586
587         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
588
589         if (ptrp != NULL)
590                 *ptrp = ptr;
591
592         nilfs_btree_clear_path(btree, path);
593         nilfs_btree_free_path(btree, path);
594
595         return ret;
596 }
597
598 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
599                                     struct nilfs_btree_path *path,
600                                     int level, __u64 key)
601 {
602         if (level < nilfs_btree_height(btree) - 1) {
603                 do {
604                         lock_buffer(path[level].bp_bh);
605                         nilfs_btree_node_set_key(
606                                 btree,
607                                 nilfs_btree_get_nonroot_node(
608                                         btree, path, level),
609                                 path[level].bp_index, key);
610                         if (!buffer_dirty(path[level].bp_bh))
611                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
612                         unlock_buffer(path[level].bp_bh);
613                 } while ((path[level].bp_index == 0) &&
614                          (++level < nilfs_btree_height(btree) - 1));
615         }
616
617         /* root */
618         if (level == nilfs_btree_height(btree) - 1) {
619                 nilfs_btree_node_set_key(btree,
620                                          nilfs_btree_get_root(btree),
621                                          path[level].bp_index, key);
622         }
623 }
624
625 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
626                                   struct nilfs_btree_path *path,
627                                   int level, __u64 *keyp, __u64 *ptrp)
628 {
629         struct nilfs_btree_node *node;
630
631         if (level < nilfs_btree_height(btree) - 1) {
632                 lock_buffer(path[level].bp_bh);
633                 node = nilfs_btree_get_nonroot_node(btree, path, level);
634                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
635                                         path[level].bp_index);
636                 if (!buffer_dirty(path[level].bp_bh))
637                         nilfs_btnode_mark_dirty(path[level].bp_bh);
638                 unlock_buffer(path[level].bp_bh);
639
640                 if (path[level].bp_index == 0)
641                         nilfs_btree_promote_key(btree, path, level + 1,
642                                                 nilfs_btree_node_get_key(
643                                                         btree, node, 0));
644         } else {
645                 node = nilfs_btree_get_root(btree);
646                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
647                                         path[level].bp_index);
648         }
649 }
650
651 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
652                                    struct nilfs_btree_path *path,
653                                    int level, __u64 *keyp, __u64 *ptrp)
654 {
655         struct nilfs_btree_node *node, *left;
656         int nchildren, lnchildren, n, move;
657
658         lock_buffer(path[level].bp_bh);
659         lock_buffer(path[level].bp_sib_bh);
660
661         node = nilfs_btree_get_nonroot_node(btree, path, level);
662         left = nilfs_btree_get_sib_node(btree, path, level);
663         nchildren = nilfs_btree_node_get_nchildren(btree, node);
664         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
665         move = 0;
666
667         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
668         if (n > path[level].bp_index) {
669                 /* move insert point */
670                 n--;
671                 move = 1;
672         }
673
674         nilfs_btree_node_move_left(btree, left, node, n);
675
676         if (!buffer_dirty(path[level].bp_bh))
677                 nilfs_btnode_mark_dirty(path[level].bp_bh);
678         if (!buffer_dirty(path[level].bp_sib_bh))
679                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
680
681         unlock_buffer(path[level].bp_bh);
682         unlock_buffer(path[level].bp_sib_bh);
683
684         nilfs_btree_promote_key(btree, path, level + 1,
685                                 nilfs_btree_node_get_key(btree, node, 0));
686
687         if (move) {
688                 brelse(path[level].bp_bh);
689                 path[level].bp_bh = path[level].bp_sib_bh;
690                 path[level].bp_sib_bh = NULL;
691                 path[level].bp_index += lnchildren;
692                 path[level + 1].bp_index--;
693         } else {
694                 brelse(path[level].bp_sib_bh);
695                 path[level].bp_sib_bh = NULL;
696                 path[level].bp_index -= n;
697         }
698
699         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
700 }
701
702 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
703                                     struct nilfs_btree_path *path,
704                                     int level, __u64 *keyp, __u64 *ptrp)
705 {
706         struct nilfs_btree_node *node, *right;
707         int nchildren, rnchildren, n, move;
708
709         lock_buffer(path[level].bp_bh);
710         lock_buffer(path[level].bp_sib_bh);
711
712         node = nilfs_btree_get_nonroot_node(btree, path, level);
713         right = nilfs_btree_get_sib_node(btree, path, level);
714         nchildren = nilfs_btree_node_get_nchildren(btree, node);
715         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
716         move = 0;
717
718         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
719         if (n > nchildren - path[level].bp_index) {
720                 /* move insert point */
721                 n--;
722                 move = 1;
723         }
724
725         nilfs_btree_node_move_right(btree, node, right, n);
726
727         if (!buffer_dirty(path[level].bp_bh))
728                 nilfs_btnode_mark_dirty(path[level].bp_bh);
729         if (!buffer_dirty(path[level].bp_sib_bh))
730                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
731
732         unlock_buffer(path[level].bp_bh);
733         unlock_buffer(path[level].bp_sib_bh);
734
735         path[level + 1].bp_index++;
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(btree, right, 0));
738         path[level + 1].bp_index--;
739
740         if (move) {
741                 brelse(path[level].bp_bh);
742                 path[level].bp_bh = path[level].bp_sib_bh;
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -=
745                         nilfs_btree_node_get_nchildren(btree, node);
746                 path[level + 1].bp_index++;
747         } else {
748                 brelse(path[level].bp_sib_bh);
749                 path[level].bp_sib_bh = NULL;
750         }
751
752         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
753 }
754
755 static void nilfs_btree_split(struct nilfs_btree *btree,
756                               struct nilfs_btree_path *path,
757                               int level, __u64 *keyp, __u64 *ptrp)
758 {
759         struct nilfs_btree_node *node, *right;
760         __u64 newkey;
761         __u64 newptr;
762         int nchildren, n, move;
763
764         lock_buffer(path[level].bp_bh);
765         lock_buffer(path[level].bp_sib_bh);
766
767         node = nilfs_btree_get_nonroot_node(btree, path, level);
768         right = nilfs_btree_get_sib_node(btree, path, level);
769         nchildren = nilfs_btree_node_get_nchildren(btree, node);
770         move = 0;
771
772         n = (nchildren + 1) / 2;
773         if (n > nchildren - path[level].bp_index) {
774                 n--;
775                 move = 1;
776         }
777
778         nilfs_btree_node_move_right(btree, node, right, n);
779
780         if (!buffer_dirty(path[level].bp_bh))
781                 nilfs_btnode_mark_dirty(path[level].bp_bh);
782         if (!buffer_dirty(path[level].bp_sib_bh))
783                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
784
785         unlock_buffer(path[level].bp_bh);
786         unlock_buffer(path[level].bp_sib_bh);
787
788         newkey = nilfs_btree_node_get_key(btree, right, 0);
789         newptr = path[level].bp_newreq.bpr_ptr;
790
791         if (move) {
792                 path[level].bp_index -=
793                         nilfs_btree_node_get_nchildren(btree, node);
794                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
795                                         path[level].bp_index);
796
797                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
798                 *ptrp = path[level].bp_newreq.bpr_ptr;
799
800                 brelse(path[level].bp_bh);
801                 path[level].bp_bh = path[level].bp_sib_bh;
802                 path[level].bp_sib_bh = NULL;
803         } else {
804                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
805
806                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
807                 *ptrp = path[level].bp_newreq.bpr_ptr;
808
809                 brelse(path[level].bp_sib_bh);
810                 path[level].bp_sib_bh = NULL;
811         }
812
813         path[level + 1].bp_index++;
814 }
815
816 static void nilfs_btree_grow(struct nilfs_btree *btree,
817                              struct nilfs_btree_path *path,
818                              int level, __u64 *keyp, __u64 *ptrp)
819 {
820         struct nilfs_btree_node *root, *child;
821         int n;
822
823         lock_buffer(path[level].bp_sib_bh);
824
825         root = nilfs_btree_get_root(btree);
826         child = nilfs_btree_get_sib_node(btree, path, level);
827
828         n = nilfs_btree_node_get_nchildren(btree, root);
829
830         nilfs_btree_node_move_right(btree, root, child, n);
831         nilfs_btree_node_set_level(btree, root, level + 1);
832
833         if (!buffer_dirty(path[level].bp_sib_bh))
834                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
835
836         unlock_buffer(path[level].bp_sib_bh);
837
838         path[level].bp_bh = path[level].bp_sib_bh;
839         path[level].bp_sib_bh = NULL;
840
841         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
842
843         *keyp = nilfs_btree_node_get_key(btree, child, 0);
844         *ptrp = path[level].bp_newreq.bpr_ptr;
845 }
846
847 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
848                                    const struct nilfs_btree_path *path)
849 {
850         struct nilfs_btree_node *node;
851         int level;
852
853         if (path == NULL)
854                 return NILFS_BMAP_INVALID_PTR;
855
856         /* left sibling */
857         level = NILFS_BTREE_LEVEL_NODE_MIN;
858         if (path[level].bp_index > 0) {
859                 node = nilfs_btree_get_node(btree, path, level);
860                 return nilfs_btree_node_get_ptr(btree, node,
861                                                 path[level].bp_index - 1);
862         }
863
864         /* parent */
865         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
866         if (level <= nilfs_btree_height(btree) - 1) {
867                 node = nilfs_btree_get_node(btree, path, level);
868                 return nilfs_btree_node_get_ptr(btree, node,
869                                                 path[level].bp_index);
870         }
871
872         return NILFS_BMAP_INVALID_PTR;
873 }
874
875 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
876                                        const struct nilfs_btree_path *path,
877                                        __u64 key)
878 {
879         __u64 ptr;
880
881         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
882         if (ptr != NILFS_BMAP_INVALID_PTR)
883                 /* sequential access */
884                 return ptr;
885         else {
886                 ptr = nilfs_btree_find_near(btree, path);
887                 if (ptr != NILFS_BMAP_INVALID_PTR)
888                         /* near */
889                         return ptr;
890         }
891         /* block group */
892         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
893 }
894
895 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
896                                      __u64 ptr)
897 {
898         btree->bt_bmap.b_last_allocated_key = key;
899         btree->bt_bmap.b_last_allocated_ptr = ptr;
900 }
901
902 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
903                                       struct nilfs_btree_path *path,
904                                       int *levelp, __u64 key, __u64 ptr,
905                                       struct nilfs_bmap_stats *stats)
906 {
907         struct buffer_head *bh;
908         struct nilfs_btree_node *node, *parent, *sib;
909         __u64 sibptr;
910         int pindex, level, ret;
911
912         stats->bs_nblocks = 0;
913         level = NILFS_BTREE_LEVEL_DATA;
914
915         /* allocate a new ptr for data block */
916         if (btree->bt_ops->btop_find_target != NULL)
917                 path[level].bp_newreq.bpr_ptr =
918                         btree->bt_ops->btop_find_target(btree, path, key);
919
920         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
921                                            &path[level].bp_newreq);
922         if (ret < 0)
923                 goto err_out_data;
924
925         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
926              level < nilfs_btree_height(btree) - 1;
927              level++) {
928                 node = nilfs_btree_get_nonroot_node(btree, path, level);
929                 if (nilfs_btree_node_get_nchildren(btree, node) <
930                     nilfs_btree_node_nchildren_max(btree, node)) {
931                         path[level].bp_op = nilfs_btree_do_insert;
932                         stats->bs_nblocks++;
933                         goto out;
934                 }
935
936                 parent = nilfs_btree_get_node(btree, path, level + 1);
937                 pindex = path[level + 1].bp_index;
938
939                 /* left sibling */
940                 if (pindex > 0) {
941                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
942                                                           pindex - 1);
943                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
944                         if (ret < 0)
945                                 goto err_out_child_node;
946                         sib = (struct nilfs_btree_node *)bh->b_data;
947                         if (nilfs_btree_node_get_nchildren(btree, sib) <
948                             nilfs_btree_node_nchildren_max(btree, sib)) {
949                                 path[level].bp_sib_bh = bh;
950                                 path[level].bp_op = nilfs_btree_carry_left;
951                                 stats->bs_nblocks++;
952                                 goto out;
953                         } else
954                                 brelse(bh);
955                 }
956
957                 /* right sibling */
958                 if (pindex <
959                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
960                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
961                                                           pindex + 1);
962                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
963                         if (ret < 0)
964                                 goto err_out_child_node;
965                         sib = (struct nilfs_btree_node *)bh->b_data;
966                         if (nilfs_btree_node_get_nchildren(btree, sib) <
967                             nilfs_btree_node_nchildren_max(btree, sib)) {
968                                 path[level].bp_sib_bh = bh;
969                                 path[level].bp_op = nilfs_btree_carry_right;
970                                 stats->bs_nblocks++;
971                                 goto out;
972                         } else
973                                 brelse(bh);
974                 }
975
976                 /* split */
977                 path[level].bp_newreq.bpr_ptr =
978                         path[level - 1].bp_newreq.bpr_ptr + 1;
979                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
980                                                    &path[level].bp_newreq);
981                 if (ret < 0)
982                         goto err_out_child_node;
983                 ret = nilfs_btree_get_new_block(btree,
984                                                 path[level].bp_newreq.bpr_ptr,
985                                                 &bh);
986                 if (ret < 0)
987                         goto err_out_curr_node;
988
989                 stats->bs_nblocks++;
990
991                 lock_buffer(bh);
992                 nilfs_btree_node_init(btree,
993                                       (struct nilfs_btree_node *)bh->b_data,
994                                       0, level, 0, NULL, NULL);
995                 unlock_buffer(bh);
996                 path[level].bp_sib_bh = bh;
997                 path[level].bp_op = nilfs_btree_split;
998         }
999
1000         /* root */
1001         node = nilfs_btree_get_root(btree);
1002         if (nilfs_btree_node_get_nchildren(btree, node) <
1003             nilfs_btree_node_nchildren_max(btree, node)) {
1004                 path[level].bp_op = nilfs_btree_do_insert;
1005                 stats->bs_nblocks++;
1006                 goto out;
1007         }
1008
1009         /* grow */
1010         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1011         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1012                                            &path[level].bp_newreq);
1013         if (ret < 0)
1014                 goto err_out_child_node;
1015         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1016                                         &bh);
1017         if (ret < 0)
1018                 goto err_out_curr_node;
1019
1020         lock_buffer(bh);
1021         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1022                               0, level, 0, NULL, NULL);
1023         unlock_buffer(bh);
1024         path[level].bp_sib_bh = bh;
1025         path[level].bp_op = nilfs_btree_grow;
1026
1027         level++;
1028         path[level].bp_op = nilfs_btree_do_insert;
1029
1030         /* a newly-created node block and a data block are added */
1031         stats->bs_nblocks += 2;
1032
1033         /* success */
1034  out:
1035         *levelp = level;
1036         return ret;
1037
1038         /* error */
1039  err_out_curr_node:
1040         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1041  err_out_child_node:
1042         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1043                 nilfs_btnode_delete(path[level].bp_sib_bh);
1044                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1045                                            &path[level].bp_newreq);
1046
1047         }
1048
1049         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1050  err_out_data:
1051         *levelp = level;
1052         stats->bs_nblocks = 0;
1053         return ret;
1054 }
1055
1056 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1057                                       struct nilfs_btree_path *path,
1058                                       int maxlevel, __u64 key, __u64 ptr)
1059 {
1060         int level;
1061
1062         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1063         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1064         if (btree->bt_ops->btop_set_target != NULL)
1065                 btree->bt_ops->btop_set_target(btree, key, ptr);
1066
1067         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1068                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1069                                             &path[level - 1].bp_newreq);
1070                 path[level].bp_op(btree, path, level, &key, &ptr);
1071         }
1072
1073         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1074                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1075 }
1076
1077 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1078 {
1079         struct nilfs_btree *btree;
1080         struct nilfs_btree_path *path;
1081         struct nilfs_bmap_stats stats;
1082         int level, ret;
1083
1084         btree = (struct nilfs_btree *)bmap;
1085         path = nilfs_btree_alloc_path(btree);
1086         if (path == NULL)
1087                 return -ENOMEM;
1088         nilfs_btree_init_path(btree, path);
1089
1090         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1091                                     NILFS_BTREE_LEVEL_NODE_MIN);
1092         if (ret != -ENOENT) {
1093                 if (ret == 0)
1094                         ret = -EEXIST;
1095                 goto out;
1096         }
1097
1098         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1099         if (ret < 0)
1100                 goto out;
1101         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1102         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1103
1104  out:
1105         nilfs_btree_clear_path(btree, path);
1106         nilfs_btree_free_path(btree, path);
1107         return ret;
1108 }
1109
1110 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1111                                   struct nilfs_btree_path *path,
1112                                   int level, __u64 *keyp, __u64 *ptrp)
1113 {
1114         struct nilfs_btree_node *node;
1115
1116         if (level < nilfs_btree_height(btree) - 1) {
1117                 lock_buffer(path[level].bp_bh);
1118                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1119                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1120                                         path[level].bp_index);
1121                 if (!buffer_dirty(path[level].bp_bh))
1122                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1123                 unlock_buffer(path[level].bp_bh);
1124                 if (path[level].bp_index == 0)
1125                         nilfs_btree_promote_key(btree, path, level + 1,
1126                                 nilfs_btree_node_get_key(btree, node, 0));
1127         } else {
1128                 node = nilfs_btree_get_root(btree);
1129                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1130                                         path[level].bp_index);
1131         }
1132 }
1133
1134 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1135                                     struct nilfs_btree_path *path,
1136                                     int level, __u64 *keyp, __u64 *ptrp)
1137 {
1138         struct nilfs_btree_node *node, *left;
1139         int nchildren, lnchildren, n;
1140
1141         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1142
1143         lock_buffer(path[level].bp_bh);
1144         lock_buffer(path[level].bp_sib_bh);
1145
1146         node = nilfs_btree_get_nonroot_node(btree, path, level);
1147         left = nilfs_btree_get_sib_node(btree, path, level);
1148         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1149         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1150
1151         n = (nchildren + lnchildren) / 2 - nchildren;
1152
1153         nilfs_btree_node_move_right(btree, left, node, n);
1154
1155         if (!buffer_dirty(path[level].bp_bh))
1156                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1157         if (!buffer_dirty(path[level].bp_sib_bh))
1158                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1159
1160         unlock_buffer(path[level].bp_bh);
1161         unlock_buffer(path[level].bp_sib_bh);
1162
1163         nilfs_btree_promote_key(btree, path, level + 1,
1164                                 nilfs_btree_node_get_key(btree, node, 0));
1165
1166         brelse(path[level].bp_sib_bh);
1167         path[level].bp_sib_bh = NULL;
1168         path[level].bp_index += n;
1169 }
1170
1171 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1172                                      struct nilfs_btree_path *path,
1173                                      int level, __u64 *keyp, __u64 *ptrp)
1174 {
1175         struct nilfs_btree_node *node, *right;
1176         int nchildren, rnchildren, n;
1177
1178         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1179
1180         lock_buffer(path[level].bp_bh);
1181         lock_buffer(path[level].bp_sib_bh);
1182
1183         node = nilfs_btree_get_nonroot_node(btree, path, level);
1184         right = nilfs_btree_get_sib_node(btree, path, level);
1185         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1186         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1187
1188         n = (nchildren + rnchildren) / 2 - nchildren;
1189
1190         nilfs_btree_node_move_left(btree, node, right, n);
1191
1192         if (!buffer_dirty(path[level].bp_bh))
1193                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1194         if (!buffer_dirty(path[level].bp_sib_bh))
1195                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1196
1197         unlock_buffer(path[level].bp_bh);
1198         unlock_buffer(path[level].bp_sib_bh);
1199
1200         path[level + 1].bp_index++;
1201         nilfs_btree_promote_key(btree, path, level + 1,
1202                                 nilfs_btree_node_get_key(btree, right, 0));
1203         path[level + 1].bp_index--;
1204
1205         brelse(path[level].bp_sib_bh);
1206         path[level].bp_sib_bh = NULL;
1207 }
1208
1209 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1210                                     struct nilfs_btree_path *path,
1211                                     int level, __u64 *keyp, __u64 *ptrp)
1212 {
1213         struct nilfs_btree_node *node, *left;
1214         int n;
1215
1216         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1217
1218         lock_buffer(path[level].bp_bh);
1219         lock_buffer(path[level].bp_sib_bh);
1220
1221         node = nilfs_btree_get_nonroot_node(btree, path, level);
1222         left = nilfs_btree_get_sib_node(btree, path, level);
1223
1224         n = nilfs_btree_node_get_nchildren(btree, node);
1225
1226         nilfs_btree_node_move_left(btree, left, node, n);
1227
1228         if (!buffer_dirty(path[level].bp_sib_bh))
1229                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1230
1231         unlock_buffer(path[level].bp_bh);
1232         unlock_buffer(path[level].bp_sib_bh);
1233
1234         nilfs_btnode_delete(path[level].bp_bh);
1235         path[level].bp_bh = path[level].bp_sib_bh;
1236         path[level].bp_sib_bh = NULL;
1237         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1238 }
1239
1240 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1241                                      struct nilfs_btree_path *path,
1242                                      int level, __u64 *keyp, __u64 *ptrp)
1243 {
1244         struct nilfs_btree_node *node, *right;
1245         int n;
1246
1247         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1248
1249         lock_buffer(path[level].bp_bh);
1250         lock_buffer(path[level].bp_sib_bh);
1251
1252         node = nilfs_btree_get_nonroot_node(btree, path, level);
1253         right = nilfs_btree_get_sib_node(btree, path, level);
1254
1255         n = nilfs_btree_node_get_nchildren(btree, right);
1256
1257         nilfs_btree_node_move_left(btree, node, right, n);
1258
1259         if (!buffer_dirty(path[level].bp_bh))
1260                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1261
1262         unlock_buffer(path[level].bp_bh);
1263         unlock_buffer(path[level].bp_sib_bh);
1264
1265         nilfs_btnode_delete(path[level].bp_sib_bh);
1266         path[level].bp_sib_bh = NULL;
1267         path[level + 1].bp_index++;
1268 }
1269
1270 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1271                                struct nilfs_btree_path *path,
1272                                int level, __u64 *keyp, __u64 *ptrp)
1273 {
1274         struct nilfs_btree_node *root, *child;
1275         int n;
1276
1277         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1278
1279         lock_buffer(path[level].bp_bh);
1280         root = nilfs_btree_get_root(btree);
1281         child = nilfs_btree_get_nonroot_node(btree, path, level);
1282
1283         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1284         nilfs_btree_node_set_level(btree, root, level);
1285         n = nilfs_btree_node_get_nchildren(btree, child);
1286         nilfs_btree_node_move_left(btree, root, child, n);
1287         unlock_buffer(path[level].bp_bh);
1288
1289         nilfs_btnode_delete(path[level].bp_bh);
1290         path[level].bp_bh = NULL;
1291 }
1292
1293
1294 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1295                                       struct nilfs_btree_path *path,
1296                                       int *levelp,
1297                                       struct nilfs_bmap_stats *stats)
1298 {
1299         struct buffer_head *bh;
1300         struct nilfs_btree_node *node, *parent, *sib;
1301         __u64 sibptr;
1302         int pindex, level, ret;
1303
1304         ret = 0;
1305         stats->bs_nblocks = 0;
1306         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1307              level < nilfs_btree_height(btree) - 1;
1308              level++) {
1309                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1310                 path[level].bp_oldreq.bpr_ptr =
1311                         nilfs_btree_node_get_ptr(btree, node,
1312                                                  path[level].bp_index);
1313                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1314                                                  &path[level].bp_oldreq);
1315                 if (ret < 0)
1316                         goto err_out_child_node;
1317
1318                 if (nilfs_btree_node_get_nchildren(btree, node) >
1319                     nilfs_btree_node_nchildren_min(btree, node)) {
1320                         path[level].bp_op = nilfs_btree_do_delete;
1321                         stats->bs_nblocks++;
1322                         goto out;
1323                 }
1324
1325                 parent = nilfs_btree_get_node(btree, path, level + 1);
1326                 pindex = path[level + 1].bp_index;
1327
1328                 if (pindex > 0) {
1329                         /* left sibling */
1330                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1331                                                           pindex - 1);
1332                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1333                         if (ret < 0)
1334                                 goto err_out_curr_node;
1335                         sib = (struct nilfs_btree_node *)bh->b_data;
1336                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1337                             nilfs_btree_node_nchildren_min(btree, sib)) {
1338                                 path[level].bp_sib_bh = bh;
1339                                 path[level].bp_op = nilfs_btree_borrow_left;
1340                                 stats->bs_nblocks++;
1341                                 goto out;
1342                         } else {
1343                                 path[level].bp_sib_bh = bh;
1344                                 path[level].bp_op = nilfs_btree_concat_left;
1345                                 stats->bs_nblocks++;
1346                                 /* continue; */
1347                         }
1348                 } else if (pindex <
1349                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1350                         /* right sibling */
1351                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1352                                                           pindex + 1);
1353                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1354                         if (ret < 0)
1355                                 goto err_out_curr_node;
1356                         sib = (struct nilfs_btree_node *)bh->b_data;
1357                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1358                             nilfs_btree_node_nchildren_min(btree, sib)) {
1359                                 path[level].bp_sib_bh = bh;
1360                                 path[level].bp_op = nilfs_btree_borrow_right;
1361                                 stats->bs_nblocks++;
1362                                 goto out;
1363                         } else {
1364                                 path[level].bp_sib_bh = bh;
1365                                 path[level].bp_op = nilfs_btree_concat_right;
1366                                 stats->bs_nblocks++;
1367                                 /* continue; */
1368                         }
1369                 } else {
1370                         /* no siblings */
1371                         /* the only child of the root node */
1372                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1373                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1374                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1375                                 path[level].bp_op = nilfs_btree_shrink;
1376                                 stats->bs_nblocks += 2;
1377                         } else {
1378                                 path[level].bp_op = nilfs_btree_do_delete;
1379                                 stats->bs_nblocks++;
1380                         }
1381
1382                         goto out;
1383
1384                 }
1385         }
1386
1387         node = nilfs_btree_get_root(btree);
1388         path[level].bp_oldreq.bpr_ptr =
1389                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1390
1391         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1392                                          &path[level].bp_oldreq);
1393         if (ret < 0)
1394                 goto err_out_child_node;
1395
1396         /* child of the root node is deleted */
1397         path[level].bp_op = nilfs_btree_do_delete;
1398         stats->bs_nblocks++;
1399
1400         /* success */
1401  out:
1402         *levelp = level;
1403         return ret;
1404
1405         /* error */
1406  err_out_curr_node:
1407         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
1408  err_out_child_node:
1409         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1410                 brelse(path[level].bp_sib_bh);
1411                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1412                                          &path[level].bp_oldreq);
1413         }
1414         *levelp = level;
1415         stats->bs_nblocks = 0;
1416         return ret;
1417 }
1418
1419 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1420                                       struct nilfs_btree_path *path,
1421                                       int maxlevel)
1422 {
1423         int level;
1424
1425         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1426                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1427                                           &path[level].bp_oldreq);
1428                 path[level].bp_op(btree, path, level, NULL, NULL);
1429         }
1430
1431         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1432                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1433 }
1434
1435 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1436
1437 {
1438         struct nilfs_btree *btree;
1439         struct nilfs_btree_path *path;
1440         struct nilfs_bmap_stats stats;
1441         int level, ret;
1442
1443         btree = (struct nilfs_btree *)bmap;
1444         path = nilfs_btree_alloc_path(btree);
1445         if (path == NULL)
1446                 return -ENOMEM;
1447         nilfs_btree_init_path(btree, path);
1448         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1449                                     NILFS_BTREE_LEVEL_NODE_MIN);
1450         if (ret < 0)
1451                 goto out;
1452
1453         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1454         if (ret < 0)
1455                 goto out;
1456         nilfs_btree_commit_delete(btree, path, level);
1457         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1458
1459 out:
1460         nilfs_btree_clear_path(btree, path);
1461         nilfs_btree_free_path(btree, path);
1462         return ret;
1463 }
1464
1465 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1466 {
1467         struct nilfs_btree *btree;
1468         struct nilfs_btree_path *path;
1469         int ret;
1470
1471         btree = (struct nilfs_btree *)bmap;
1472         path = nilfs_btree_alloc_path(btree);
1473         if (path == NULL)
1474                 return -ENOMEM;
1475         nilfs_btree_init_path(btree, path);
1476
1477         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1478
1479         nilfs_btree_clear_path(btree, path);
1480         nilfs_btree_free_path(btree, path);
1481
1482         return ret;
1483 }
1484
1485 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1486 {
1487         struct buffer_head *bh;
1488         struct nilfs_btree *btree;
1489         struct nilfs_btree_node *root, *node;
1490         __u64 maxkey, nextmaxkey;
1491         __u64 ptr;
1492         int nchildren, ret;
1493
1494         btree = (struct nilfs_btree *)bmap;
1495         root = nilfs_btree_get_root(btree);
1496         switch (nilfs_btree_height(btree)) {
1497         case 2:
1498                 bh = NULL;
1499                 node = root;
1500                 break;
1501         case 3:
1502                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1503                 if (nchildren > 1)
1504                         return 0;
1505                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1506                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1507                 if (ret < 0)
1508                         return ret;
1509                 node = (struct nilfs_btree_node *)bh->b_data;
1510                 break;
1511         default:
1512                 return 0;
1513         }
1514
1515         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1516         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1517         nextmaxkey = (nchildren > 1) ?
1518                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1519         if (bh != NULL)
1520                 brelse(bh);
1521
1522         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1523 }
1524
1525 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1526                                    __u64 *keys, __u64 *ptrs, int nitems)
1527 {
1528         struct buffer_head *bh;
1529         struct nilfs_btree *btree;
1530         struct nilfs_btree_node *node, *root;
1531         __le64 *dkeys;
1532         __le64 *dptrs;
1533         __u64 ptr;
1534         int nchildren, i, ret;
1535
1536         btree = (struct nilfs_btree *)bmap;
1537         root = nilfs_btree_get_root(btree);
1538         switch (nilfs_btree_height(btree)) {
1539         case 2:
1540                 bh = NULL;
1541                 node = root;
1542                 break;
1543         case 3:
1544                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1545                 WARN_ON(nchildren > 1);
1546                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1547                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1548                 if (ret < 0)
1549                         return ret;
1550                 node = (struct nilfs_btree_node *)bh->b_data;
1551                 break;
1552         default:
1553                 node = NULL;
1554                 return -EINVAL;
1555         }
1556
1557         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1558         if (nchildren < nitems)
1559                 nitems = nchildren;
1560         dkeys = nilfs_btree_node_dkeys(btree, node);
1561         dptrs = nilfs_btree_node_dptrs(btree, node);
1562         for (i = 0; i < nitems; i++) {
1563                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1564                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1565         }
1566
1567         if (bh != NULL)
1568                 brelse(bh);
1569
1570         return nitems;
1571 }
1572
1573 static int
1574 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1575                                        union nilfs_bmap_ptr_req *dreq,
1576                                        union nilfs_bmap_ptr_req *nreq,
1577                                        struct buffer_head **bhp,
1578                                        struct nilfs_bmap_stats *stats)
1579 {
1580         struct buffer_head *bh;
1581         struct nilfs_btree *btree;
1582         int ret;
1583
1584         btree = (struct nilfs_btree *)bmap;
1585         stats->bs_nblocks = 0;
1586
1587         /* for data */
1588         /* cannot find near ptr */
1589         if (btree->bt_ops->btop_find_target != NULL)
1590                 dreq->bpr_ptr
1591                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1592         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
1593         if (ret < 0)
1594                 return ret;
1595
1596         *bhp = NULL;
1597         stats->bs_nblocks++;
1598         if (nreq != NULL) {
1599                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1600                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
1601                 if (ret < 0)
1602                         goto err_out_dreq;
1603
1604                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1605                 if (ret < 0)
1606                         goto err_out_nreq;
1607
1608                 *bhp = bh;
1609                 stats->bs_nblocks++;
1610         }
1611
1612         /* success */
1613         return 0;
1614
1615         /* error */
1616  err_out_nreq:
1617         nilfs_bmap_abort_alloc_ptr(bmap, nreq);
1618  err_out_dreq:
1619         nilfs_bmap_abort_alloc_ptr(bmap, dreq);
1620         stats->bs_nblocks = 0;
1621         return ret;
1622
1623 }
1624
1625 static void
1626 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1627                                       __u64 key, __u64 ptr,
1628                                       const __u64 *keys, const __u64 *ptrs,
1629                                       int n,
1630                                       union nilfs_bmap_ptr_req *dreq,
1631                                       union nilfs_bmap_ptr_req *nreq,
1632                                       struct buffer_head *bh)
1633 {
1634         struct nilfs_btree *btree;
1635         struct nilfs_btree_node *node;
1636         __u64 tmpptr;
1637
1638         /* free resources */
1639         if (bmap->b_ops->bop_clear != NULL)
1640                 bmap->b_ops->bop_clear(bmap);
1641
1642         /* ptr must be a pointer to a buffer head. */
1643         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1644
1645         /* convert and insert */
1646         btree = (struct nilfs_btree *)bmap;
1647         nilfs_btree_init(bmap);
1648         if (nreq != NULL) {
1649                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1650                 nilfs_bmap_commit_alloc_ptr(bmap, nreq);
1651
1652                 /* create child node at level 1 */
1653                 lock_buffer(bh);
1654                 node = (struct nilfs_btree_node *)bh->b_data;
1655                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1656                 nilfs_btree_node_insert(btree, node,
1657                                         key, dreq->bpr_ptr, n);
1658                 if (!buffer_dirty(bh))
1659                         nilfs_btnode_mark_dirty(bh);
1660                 if (!nilfs_bmap_dirty(bmap))
1661                         nilfs_bmap_set_dirty(bmap);
1662
1663                 unlock_buffer(bh);
1664                 brelse(bh);
1665
1666                 /* create root node at level 2 */
1667                 node = nilfs_btree_get_root(btree);
1668                 tmpptr = nreq->bpr_ptr;
1669                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1670                                       2, 1, &keys[0], &tmpptr);
1671         } else {
1672                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1673
1674                 /* create root node at level 1 */
1675                 node = nilfs_btree_get_root(btree);
1676                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1677                                       1, n, keys, ptrs);
1678                 nilfs_btree_node_insert(btree, node,
1679                                         key, dreq->bpr_ptr, n);
1680                 if (!nilfs_bmap_dirty(bmap))
1681                         nilfs_bmap_set_dirty(bmap);
1682         }
1683
1684         if (btree->bt_ops->btop_set_target != NULL)
1685                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1686 }
1687
1688 /**
1689  * nilfs_btree_convert_and_insert -
1690  * @bmap:
1691  * @key:
1692  * @ptr:
1693  * @keys:
1694  * @ptrs:
1695  * @n:
1696  */
1697 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1698                                    __u64 key, __u64 ptr,
1699                                    const __u64 *keys, const __u64 *ptrs, int n)
1700 {
1701         struct buffer_head *bh;
1702         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1703         struct nilfs_bmap_stats stats;
1704         int ret;
1705
1706         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1707                 di = &dreq;
1708                 ni = NULL;
1709         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1710                            1 << bmap->b_inode->i_blkbits)) {
1711                 di = &dreq;
1712                 ni = &nreq;
1713         } else {
1714                 di = NULL;
1715                 ni = NULL;
1716                 BUG();
1717         }
1718
1719         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1720                                                      &stats);
1721         if (ret < 0)
1722                 return ret;
1723         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1724                                               di, ni, bh);
1725         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1726         return 0;
1727 }
1728
1729 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1730                                    struct nilfs_btree_path *path,
1731                                    int level,
1732                                    struct buffer_head *bh)
1733 {
1734         while ((++level < nilfs_btree_height(btree) - 1) &&
1735                !buffer_dirty(path[level].bp_bh))
1736                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1737
1738         return 0;
1739 }
1740
1741 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1742                                         struct nilfs_btree_path *path,
1743                                         int level)
1744 {
1745         struct nilfs_btree_node *parent;
1746         int ret;
1747
1748         parent = nilfs_btree_get_node(btree, path, level + 1);
1749         path[level].bp_oldreq.bpr_ptr =
1750                 nilfs_btree_node_get_ptr(btree, parent,
1751                                          path[level + 1].bp_index);
1752         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1753         ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
1754                                           &path[level].bp_oldreq,
1755                                           &path[level].bp_newreq);
1756         if (ret < 0)
1757                 return ret;
1758
1759         if (buffer_nilfs_node(path[level].bp_bh)) {
1760                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1761                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1762                 path[level].bp_ctxt.bh = path[level].bp_bh;
1763                 ret = nilfs_btnode_prepare_change_key(
1764                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1765                         &path[level].bp_ctxt);
1766                 if (ret < 0) {
1767                         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1768                                                   &path[level].bp_oldreq,
1769                                                   &path[level].bp_newreq);
1770                         return ret;
1771                 }
1772         }
1773
1774         return 0;
1775 }
1776
1777 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1778                                         struct nilfs_btree_path *path,
1779                                         int level)
1780 {
1781         struct nilfs_btree_node *parent;
1782
1783         nilfs_bmap_commit_update_v(&btree->bt_bmap,
1784                                    &path[level].bp_oldreq,
1785                                    &path[level].bp_newreq);
1786
1787         if (buffer_nilfs_node(path[level].bp_bh)) {
1788                 nilfs_btnode_commit_change_key(
1789                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1790                         &path[level].bp_ctxt);
1791                 path[level].bp_bh = path[level].bp_ctxt.bh;
1792         }
1793         set_buffer_nilfs_volatile(path[level].bp_bh);
1794
1795         parent = nilfs_btree_get_node(btree, path, level + 1);
1796         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1797                                  path[level].bp_newreq.bpr_ptr);
1798 }
1799
1800 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1801                                        struct nilfs_btree_path *path,
1802                                        int level)
1803 {
1804         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1805                                   &path[level].bp_oldreq,
1806                                   &path[level].bp_newreq);
1807         if (buffer_nilfs_node(path[level].bp_bh))
1808                 nilfs_btnode_abort_change_key(
1809                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1810                         &path[level].bp_ctxt);
1811 }
1812
1813 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1814                                            struct nilfs_btree_path *path,
1815                                            int minlevel,
1816                                            int *maxlevelp)
1817 {
1818         int level, ret;
1819
1820         level = minlevel;
1821         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1822                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1823                 if (ret < 0)
1824                         return ret;
1825         }
1826         while ((++level < nilfs_btree_height(btree) - 1) &&
1827                !buffer_dirty(path[level].bp_bh)) {
1828
1829                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1830                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1831                 if (ret < 0)
1832                         goto out;
1833         }
1834
1835         /* success */
1836         *maxlevelp = level - 1;
1837         return 0;
1838
1839         /* error */
1840  out:
1841         while (--level > minlevel)
1842                 nilfs_btree_abort_update_v(btree, path, level);
1843         if (!buffer_nilfs_volatile(path[level].bp_bh))
1844                 nilfs_btree_abort_update_v(btree, path, level);
1845         return ret;
1846 }
1847
1848 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1849                                            struct nilfs_btree_path *path,
1850                                            int minlevel,
1851                                            int maxlevel,
1852                                            struct buffer_head *bh)
1853 {
1854         int level;
1855
1856         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1857                 nilfs_btree_commit_update_v(btree, path, minlevel);
1858
1859         for (level = minlevel + 1; level <= maxlevel; level++)
1860                 nilfs_btree_commit_update_v(btree, path, level);
1861 }
1862
1863 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1864                                    struct nilfs_btree_path *path,
1865                                    int level,
1866                                    struct buffer_head *bh)
1867 {
1868         int maxlevel, ret;
1869         struct nilfs_btree_node *parent;
1870         __u64 ptr;
1871
1872         get_bh(bh);
1873         path[level].bp_bh = bh;
1874         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1875         if (ret < 0)
1876                 goto out;
1877
1878         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1879                 parent = nilfs_btree_get_node(btree, path, level + 1);
1880                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1881                                                path[level + 1].bp_index);
1882                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1883                 if (ret < 0)
1884                         goto out;
1885         }
1886
1887         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1888
1889  out:
1890         brelse(path[level].bp_bh);
1891         path[level].bp_bh = NULL;
1892         return ret;
1893 }
1894
1895 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1896                                  struct buffer_head *bh)
1897 {
1898         struct nilfs_btree *btree;
1899         struct nilfs_btree_path *path;
1900         struct nilfs_btree_node *node;
1901         __u64 key;
1902         int level, ret;
1903
1904         WARN_ON(!buffer_dirty(bh));
1905
1906         btree = (struct nilfs_btree *)bmap;
1907         path = nilfs_btree_alloc_path(btree);
1908         if (path == NULL)
1909                 return -ENOMEM;
1910         nilfs_btree_init_path(btree, path);
1911
1912         if (buffer_nilfs_node(bh)) {
1913                 node = (struct nilfs_btree_node *)bh->b_data;
1914                 key = nilfs_btree_node_get_key(btree, node, 0);
1915                 level = nilfs_btree_node_get_level(btree, node);
1916         } else {
1917                 key = nilfs_bmap_data_get_key(bmap, bh);
1918                 level = NILFS_BTREE_LEVEL_DATA;
1919         }
1920
1921         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1922         if (ret < 0) {
1923                 if (unlikely(ret == -ENOENT))
1924                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1925                                __func__, (unsigned long long)key, level);
1926                 goto out;
1927         }
1928
1929         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1930
1931  out:
1932         nilfs_btree_clear_path(btree, path);
1933         nilfs_btree_free_path(btree, path);
1934
1935         return ret;
1936 }
1937
1938 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1939                                     struct buffer_head *bh)
1940 {
1941         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1942 }
1943
1944 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1945                                          struct list_head *lists,
1946                                          struct buffer_head *bh)
1947 {
1948         struct list_head *head;
1949         struct buffer_head *cbh;
1950         struct nilfs_btree_node *node, *cnode;
1951         __u64 key, ckey;
1952         int level;
1953
1954         get_bh(bh);
1955         node = (struct nilfs_btree_node *)bh->b_data;
1956         key = nilfs_btree_node_get_key(btree, node, 0);
1957         level = nilfs_btree_node_get_level(btree, node);
1958         list_for_each(head, &lists[level]) {
1959                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1960                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1961                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1962                 if (key < ckey)
1963                         break;
1964         }
1965         list_add_tail(&bh->b_assoc_buffers, head);
1966 }
1967
1968 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1969                                              struct list_head *listp)
1970 {
1971         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1972         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1973         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1974         struct pagevec pvec;
1975         struct buffer_head *bh, *head;
1976         pgoff_t index = 0;
1977         int level, i;
1978
1979         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1980              level < NILFS_BTREE_LEVEL_MAX;
1981              level++)
1982                 INIT_LIST_HEAD(&lists[level]);
1983
1984         pagevec_init(&pvec, 0);
1985
1986         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1987                                   PAGEVEC_SIZE)) {
1988                 for (i = 0; i < pagevec_count(&pvec); i++) {
1989                         bh = head = page_buffers(pvec.pages[i]);
1990                         do {
1991                                 if (buffer_dirty(bh))
1992                                         nilfs_btree_add_dirty_buffer(btree,
1993                                                                      lists, bh);
1994                         } while ((bh = bh->b_this_page) != head);
1995                 }
1996                 pagevec_release(&pvec);
1997                 cond_resched();
1998         }
1999
2000         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2001              level < NILFS_BTREE_LEVEL_MAX;
2002              level++)
2003                 list_splice(&lists[level], listp->prev);
2004 }
2005
2006 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2007                                 struct nilfs_btree_path *path,
2008                                 int level,
2009                                 struct buffer_head **bh,
2010                                 sector_t blocknr,
2011                                 union nilfs_binfo *binfo)
2012 {
2013         struct nilfs_btree_node *parent;
2014         __u64 key;
2015         __u64 ptr;
2016         int ret;
2017
2018         parent = nilfs_btree_get_node(btree, path, level + 1);
2019         ptr = nilfs_btree_node_get_ptr(btree, parent,
2020                                        path[level + 1].bp_index);
2021         if (buffer_nilfs_node(*bh)) {
2022                 path[level].bp_ctxt.oldkey = ptr;
2023                 path[level].bp_ctxt.newkey = blocknr;
2024                 path[level].bp_ctxt.bh = *bh;
2025                 ret = nilfs_btnode_prepare_change_key(
2026                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2027                         &path[level].bp_ctxt);
2028                 if (ret < 0)
2029                         return ret;
2030                 nilfs_btnode_commit_change_key(
2031                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2032                         &path[level].bp_ctxt);
2033                 *bh = path[level].bp_ctxt.bh;
2034         }
2035
2036         nilfs_btree_node_set_ptr(btree, parent,
2037                                  path[level + 1].bp_index, blocknr);
2038
2039         key = nilfs_btree_node_get_key(btree, parent,
2040                                        path[level + 1].bp_index);
2041         /* on-disk format */
2042         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2043         binfo->bi_dat.bi_level = level;
2044
2045         return 0;
2046 }
2047
2048 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2049                                 struct nilfs_btree_path *path,
2050                                 int level,
2051                                 struct buffer_head **bh,
2052                                 sector_t blocknr,
2053                                 union nilfs_binfo *binfo)
2054 {
2055         struct nilfs_btree_node *parent;
2056         __u64 key;
2057         __u64 ptr;
2058         union nilfs_bmap_ptr_req req;
2059         int ret;
2060
2061         parent = nilfs_btree_get_node(btree, path, level + 1);
2062         ptr = nilfs_btree_node_get_ptr(btree, parent,
2063                                        path[level + 1].bp_index);
2064         req.bpr_ptr = ptr;
2065         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2066         if (unlikely(ret < 0))
2067                 return ret;
2068
2069         key = nilfs_btree_node_get_key(btree, parent,
2070                                        path[level + 1].bp_index);
2071         /* on-disk format */
2072         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2073         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2074
2075         return 0;
2076 }
2077
2078 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2079                               struct buffer_head **bh,
2080                               sector_t blocknr,
2081                               union nilfs_binfo *binfo)
2082 {
2083         struct nilfs_btree *btree;
2084         struct nilfs_btree_path *path;
2085         struct nilfs_btree_node *node;
2086         __u64 key;
2087         int level, ret;
2088
2089         btree = (struct nilfs_btree *)bmap;
2090         path = nilfs_btree_alloc_path(btree);
2091         if (path == NULL)
2092                 return -ENOMEM;
2093         nilfs_btree_init_path(btree, path);
2094
2095         if (buffer_nilfs_node(*bh)) {
2096                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2097                 key = nilfs_btree_node_get_key(btree, node, 0);
2098                 level = nilfs_btree_node_get_level(btree, node);
2099         } else {
2100                 key = nilfs_bmap_data_get_key(bmap, *bh);
2101                 level = NILFS_BTREE_LEVEL_DATA;
2102         }
2103
2104         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2105         if (ret < 0) {
2106                 WARN_ON(ret == -ENOENT);
2107                 goto out;
2108         }
2109
2110         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2111                                             blocknr, binfo);
2112
2113  out:
2114         nilfs_btree_clear_path(btree, path);
2115         nilfs_btree_free_path(btree, path);
2116
2117         return ret;
2118 }
2119
2120 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2121                                  struct buffer_head **bh,
2122                                  sector_t blocknr,
2123                                  union nilfs_binfo *binfo)
2124 {
2125         struct nilfs_btree *btree;
2126         struct nilfs_btree_node *node;
2127         __u64 key;
2128         int ret;
2129
2130         btree = (struct nilfs_btree *)bmap;
2131         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2132         if (ret < 0)
2133                 return ret;
2134
2135         if (buffer_nilfs_node(*bh)) {
2136                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2137                 key = nilfs_btree_node_get_key(btree, node, 0);
2138         } else
2139                 key = nilfs_bmap_data_get_key(bmap, *bh);
2140
2141         /* on-disk format */
2142         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2143         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2144
2145         return 0;
2146 }
2147
2148 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2149 {
2150         struct buffer_head *bh;
2151         struct nilfs_btree *btree;
2152         struct nilfs_btree_path *path;
2153         __u64 ptr;
2154         int ret;
2155
2156         btree = (struct nilfs_btree *)bmap;
2157         path = nilfs_btree_alloc_path(btree);
2158         if (path == NULL)
2159                 return -ENOMEM;
2160         nilfs_btree_init_path(btree, path);
2161
2162         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2163         if (ret < 0) {
2164                 WARN_ON(ret == -ENOENT);
2165                 goto out;
2166         }
2167         ret = nilfs_btree_get_block(btree, ptr, &bh);
2168         if (ret < 0) {
2169                 WARN_ON(ret == -ENOENT);
2170                 goto out;
2171         }
2172
2173         if (!buffer_dirty(bh))
2174                 nilfs_btnode_mark_dirty(bh);
2175         brelse(bh);
2176         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2177                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2178
2179  out:
2180         nilfs_btree_clear_path(btree, path);
2181         nilfs_btree_free_path(btree, path);
2182         return ret;
2183 }
2184
2185 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2186         .bop_lookup             =       nilfs_btree_lookup,
2187         .bop_insert             =       nilfs_btree_insert,
2188         .bop_delete             =       nilfs_btree_delete,
2189         .bop_clear              =       NULL,
2190
2191         .bop_propagate          =       nilfs_btree_propagate,
2192
2193         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2194
2195         .bop_assign             =       nilfs_btree_assign,
2196         .bop_mark               =       nilfs_btree_mark,
2197
2198         .bop_last_key           =       nilfs_btree_last_key,
2199         .bop_check_insert       =       NULL,
2200         .bop_check_delete       =       nilfs_btree_check_delete,
2201         .bop_gather_data        =       nilfs_btree_gather_data,
2202 };
2203
2204 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2205         .bop_lookup             =       NULL,
2206         .bop_insert             =       NULL,
2207         .bop_delete             =       NULL,
2208         .bop_clear              =       NULL,
2209
2210         .bop_propagate          =       nilfs_btree_propagate_gc,
2211
2212         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2213
2214         .bop_assign             =       nilfs_btree_assign_gc,
2215         .bop_mark               =       NULL,
2216
2217         .bop_last_key           =       NULL,
2218         .bop_check_insert       =       NULL,
2219         .bop_check_delete       =       NULL,
2220         .bop_gather_data        =       NULL,
2221 };
2222
2223 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2224         .btop_find_target       =       nilfs_btree_find_target_v,
2225         .btop_set_target        =       nilfs_btree_set_target_v,
2226         .btop_propagate         =       nilfs_btree_propagate_v,
2227         .btop_assign            =       nilfs_btree_assign_v,
2228 };
2229
2230 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2231         .btop_find_target       =       NULL,
2232         .btop_set_target        =       NULL,
2233         .btop_propagate         =       nilfs_btree_propagate_p,
2234         .btop_assign            =       nilfs_btree_assign_p,
2235 };
2236
2237 int nilfs_btree_init(struct nilfs_bmap *bmap)
2238 {
2239         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2240
2241         bmap->b_ops = &nilfs_btree_ops;
2242         switch (bmap->b_inode->i_ino) {
2243         case NILFS_DAT_INO:
2244                 btree->bt_ops = &nilfs_btree_ops_p;
2245                 break;
2246         default:
2247                 btree->bt_ops = &nilfs_btree_ops_v;
2248                 break;
2249         }
2250
2251         return 0;
2252 }
2253
2254 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2255 {
2256         bmap->b_ops = &nilfs_btree_ops_gc;
2257 }