2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/highmem.h>
22 #include "transaction.h"
23 #include "print-tree.h"
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_key *ins_key,
29 struct btrfs_path *path, int data_size);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct extent_buffer *dst,
32 struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
35 struct extent_buffer *dst_buf,
36 struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38 struct btrfs_path *path, int level, int slot);
40 inline void btrfs_init_path(struct btrfs_path *p)
42 memset(p, 0, sizeof(*p));
45 struct btrfs_path *btrfs_alloc_path(void)
47 struct btrfs_path *path;
48 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
50 btrfs_init_path(path);
56 void btrfs_free_path(struct btrfs_path *p)
58 btrfs_release_path(NULL, p);
59 kmem_cache_free(btrfs_path_cachep, p);
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
68 free_extent_buffer(p->nodes[i]);
70 memset(p, 0, sizeof(*p));
73 static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74 struct btrfs_root *root,
75 struct extent_buffer *buf,
76 struct extent_buffer *parent, int parent_slot,
77 struct extent_buffer **cow_ret,
78 u64 search_start, u64 empty_size)
80 struct extent_buffer *cow;
82 int different_trans = 0;
84 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
86 cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
90 if (buf->len != root->sectorsize || cow->len != root->sectorsize)
93 copy_extent_buffer(cow, buf, 0, 0, cow->len);
94 btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow));
95 btrfs_set_header_generation(cow, trans->transid);
96 btrfs_set_header_owner(cow, root->root_key.objectid);
98 WARN_ON(btrfs_header_generation(buf) > trans->transid);
99 if (btrfs_header_generation(buf) != trans->transid) {
101 ret = btrfs_inc_ref(trans, root, buf);
105 clean_tree_block(trans, root, buf);
108 if (buf == root->node) {
110 extent_buffer_get(cow);
111 if (buf != root->commit_root) {
112 btrfs_free_extent(trans, root,
113 extent_buffer_blocknr(buf), 1, 1);
115 free_extent_buffer(buf);
117 btrfs_set_node_blockptr(parent, parent_slot,
118 extent_buffer_blocknr(cow));
119 btrfs_mark_buffer_dirty(parent);
120 WARN_ON(btrfs_header_generation(parent) != trans->transid);
121 btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1);
123 free_extent_buffer(buf);
124 btrfs_mark_buffer_dirty(cow);
129 int btrfs_cow_block(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root, struct extent_buffer *buf,
131 struct extent_buffer *parent, int parent_slot,
132 struct extent_buffer **cow_ret)
135 if (trans->transaction != root->fs_info->running_transaction) {
136 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
137 root->fs_info->running_transaction->transid);
140 if (trans->transid != root->fs_info->generation) {
141 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
142 root->fs_info->generation);
145 if (btrfs_header_generation(buf) == trans->transid) {
150 search_start = extent_buffer_blocknr(buf) & ~((u64)65535);
151 return __btrfs_cow_block(trans, root, buf, parent,
152 parent_slot, cow_ret, search_start, 0);
155 static int close_blocks(u64 blocknr, u64 other)
157 if (blocknr < other && other - blocknr < 8)
159 if (blocknr > other && blocknr - other < 8)
165 static int should_defrag_leaf(struct extent_buffer *eb)
168 struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb);
169 struct btrfs_disk_key *key;
172 if (buffer_defrag(bh))
175 nritems = btrfs_header_nritems(&leaf->header);
179 key = &leaf->items[0].key;
180 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
183 key = &leaf->items[nritems-1].key;
184 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
187 key = &leaf->items[nritems/2].key;
188 if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
195 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root, struct extent_buffer *parent,
197 int cache_only, u64 *last_ret)
201 struct btrfs_node *parent_node;
202 struct extent_buffer *cur_eb;
203 struct extent_buffer *tmp_eb;
205 u64 search_start = *last_ret;
215 if (trans->transaction != root->fs_info->running_transaction) {
216 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
217 root->fs_info->running_transaction->transid);
220 if (trans->transid != root->fs_info->generation) {
221 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
222 root->fs_info->generation);
225 if (buffer_defrag_done(parent))
228 parent_node = btrfs_buffer_node(parent);
229 parent_nritems = btrfs_header_nritems(&parent_node->header);
230 parent_level = btrfs_header_level(&parent_node->header);
233 end_slot = parent_nritems;
235 if (parent_nritems == 1)
238 for (i = start_slot; i < end_slot; i++) {
240 blocknr = btrfs_node_blockptr(parent_node, i);
242 last_block = blocknr;
244 other = btrfs_node_blockptr(parent_node, i - 1);
245 close = close_blocks(blocknr, other);
247 if (close && i < end_slot - 1) {
248 other = btrfs_node_blockptr(parent_node, i + 1);
249 close = close_blocks(blocknr, other);
252 last_block = blocknr;
256 cur_bh = btrfs_find_tree_block(root, blocknr);
257 if (!cur_bh || !buffer_uptodate(cur_bh) ||
258 buffer_locked(cur_bh) ||
259 (parent_level != 1 && !buffer_defrag(cur_bh)) ||
260 (parent_level == 1 && !should_defrag_leaf(cur_bh))) {
265 if (!cur_bh || !buffer_uptodate(cur_bh) ||
266 buffer_locked(cur_bh)) {
268 cur_bh = read_tree_block(root, blocknr);
271 if (search_start == 0)
272 search_start = last_block & ~((u64)65535);
274 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
275 &tmp_bh, search_start,
276 min(8, end_slot - i));
281 search_start = bh_blocknr(tmp_bh);
282 *last_ret = search_start;
283 if (parent_level == 1)
284 clear_buffer_defrag(tmp_bh);
285 set_buffer_defrag_done(tmp_bh);
293 * The leaf data grows from end-to-front in the node.
294 * this returns the address of the start of the last item,
295 * which is the stop of the leaf data stack
297 static inline unsigned int leaf_data_end(struct btrfs_root *root,
298 struct extent_buffer *leaf)
300 u32 nr = btrfs_header_nritems(leaf);
302 return BTRFS_LEAF_DATA_SIZE(root);
303 return btrfs_item_offset_nr(leaf, nr - 1);
307 * compare two keys in a memcmp fashion
309 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
313 btrfs_disk_key_to_cpu(&k1, disk);
315 if (k1.objectid > k2->objectid)
317 if (k1.objectid < k2->objectid)
319 if (k1.type > k2->type)
321 if (k1.type < k2->type)
323 if (k1.offset > k2->offset)
325 if (k1.offset < k2->offset)
330 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
333 struct extent_buffer *parent = NULL;
334 struct extent_buffer *node = path->nodes[level];
335 struct btrfs_disk_key parent_key;
336 struct btrfs_disk_key node_key;
339 struct btrfs_key cpukey;
340 u32 nritems = btrfs_header_nritems(node);
342 if (path->nodes[level + 1])
343 parent = path->nodes[level + 1];
345 slot = path->slots[level];
346 BUG_ON(nritems == 0);
348 parent_slot = path->slots[level + 1];
349 btrfs_node_key(parent, &parent_key, parent_slot);
350 btrfs_node_key(node, &node_key, 0);
351 BUG_ON(memcmp(&parent_key, &node_key,
352 sizeof(struct btrfs_disk_key)));
353 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
354 btrfs_header_blocknr(node));
356 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
358 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
359 btrfs_node_key(node, &node_key, slot);
360 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
362 if (slot < nritems - 1) {
363 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
364 btrfs_node_key(node, &node_key, slot);
365 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
370 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
373 struct extent_buffer *leaf = path->nodes[level];
374 struct extent_buffer *parent = NULL;
376 struct btrfs_key cpukey;
377 struct btrfs_disk_key parent_key;
378 struct btrfs_disk_key leaf_key;
379 int slot = path->slots[0];
381 u32 nritems = btrfs_header_nritems(leaf);
383 if (path->nodes[level + 1])
384 parent = path->nodes[level + 1];
390 parent_slot = path->slots[level + 1];
391 btrfs_node_key(parent, &parent_key, parent_slot);
392 btrfs_item_key(leaf, &leaf_key, 0);
394 BUG_ON(memcmp(&parent_key, &leaf_key,
395 sizeof(struct btrfs_disk_key)));
396 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
397 btrfs_header_blocknr(leaf));
400 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
401 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
402 btrfs_item_key(leaf, &leaf_key, i);
403 if (comp_keys(&leaf_key, &cpukey) >= 0) {
404 btrfs_print_leaf(root, leaf);
405 printk("slot %d offset bad key\n", i);
408 if (btrfs_item_offset_nr(leaf, i) !=
409 btrfs_item_end_nr(leaf, i + 1)) {
410 btrfs_print_leaf(root, leaf);
411 printk("slot %d offset bad\n", i);
415 if (btrfs_item_offset_nr(leaf, i) +
416 btrfs_item_size_nr(leaf, i) !=
417 BTRFS_LEAF_DATA_SIZE(root)) {
418 btrfs_print_leaf(root, leaf);
419 printk("slot %d first offset bad\n", i);
425 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
426 btrfs_print_leaf(root, leaf);
427 printk("slot %d bad size \n", nritems - 1);
432 if (slot != 0 && slot < nritems - 1) {
433 btrfs_item_key(leaf, &leaf_key, slot);
434 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
435 if (comp_keys(&leaf_key, &cpukey) <= 0) {
436 btrfs_print_leaf(root, leaf);
437 printk("slot %d offset bad key\n", slot);
440 if (btrfs_item_offset_nr(leaf, slot - 1) !=
441 btrfs_item_end_nr(leaf, slot)) {
442 btrfs_print_leaf(root, leaf);
443 printk("slot %d offset bad\n", slot);
447 if (slot < nritems - 1) {
448 btrfs_item_key(leaf, &leaf_key, slot);
449 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
450 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
451 if (btrfs_item_offset_nr(leaf, slot) !=
452 btrfs_item_end_nr(leaf, slot + 1)) {
453 btrfs_print_leaf(root, leaf);
454 printk("slot %d offset bad\n", slot);
458 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
459 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
463 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
466 struct extent_buffer *buf = path->nodes[level];
468 if (memcmp_extent_buffer(buf, root->fs_info->fsid,
469 (unsigned long)btrfs_header_fsid(buf),
471 printk("warning bad block %Lu\n", buf->start);
475 return check_leaf(root, path, level);
476 return check_node(root, path, level);
480 * search for key in the extent_buffer. The items start at offset p,
481 * and they are item_size apart. There are 'max' items in p.
483 * the slot in the array is returned via slot, and it points to
484 * the place where you would insert key if it is not found in
487 * slot may point to max if the key is bigger than all of the keys
489 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
490 int item_size, struct btrfs_key *key,
497 struct btrfs_disk_key *tmp = NULL;
498 struct btrfs_disk_key unaligned;
499 unsigned long offset;
500 char *map_token = NULL;
502 unsigned long map_start = 0;
503 unsigned long map_len = 0;
507 mid = (low + high) / 2;
508 offset = p + mid * item_size;
510 if (!map_token || offset < map_start ||
511 (offset + sizeof(struct btrfs_disk_key)) >
512 map_start + map_len) {
514 unmap_extent_buffer(eb, map_token, KM_USER0);
517 err = map_extent_buffer(eb, offset,
518 sizeof(struct btrfs_disk_key),
520 &map_start, &map_len, KM_USER0);
523 tmp = (struct btrfs_disk_key *)(kaddr + offset -
526 read_extent_buffer(eb, &unaligned,
527 offset, sizeof(unaligned));
532 tmp = (struct btrfs_disk_key *)(kaddr + offset -
535 ret = comp_keys(tmp, key);
544 unmap_extent_buffer(eb, map_token, KM_USER0);
550 unmap_extent_buffer(eb, map_token, KM_USER0);
555 * simple bin_search frontend that does the right thing for
558 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
559 int level, int *slot)
562 return generic_bin_search(eb,
563 offsetof(struct btrfs_leaf, items),
564 sizeof(struct btrfs_item),
565 key, btrfs_header_nritems(eb),
568 return generic_bin_search(eb,
569 offsetof(struct btrfs_node, ptrs),
570 sizeof(struct btrfs_key_ptr),
571 key, btrfs_header_nritems(eb),
577 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
578 struct extent_buffer *parent, int slot)
582 if (slot >= btrfs_header_nritems(parent))
584 return read_tree_block(root, btrfs_node_blockptr(parent, slot));
587 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
588 *root, struct btrfs_path *path, int level)
590 struct extent_buffer *right = NULL;
591 struct extent_buffer *mid;
592 struct extent_buffer *left = NULL;
593 struct extent_buffer *parent = NULL;
597 int orig_slot = path->slots[level];
598 int err_on_enospc = 0;
604 mid = path->nodes[level];
605 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
607 if (level < BTRFS_MAX_LEVEL - 1)
608 parent = path->nodes[level + 1];
609 pslot = path->slots[level + 1];
612 * deal with the case where there is only one pointer in the root
613 * by promoting the node below to a root
616 struct extent_buffer *child;
617 u64 blocknr = extent_buffer_blocknr(mid);
619 if (btrfs_header_nritems(mid) != 1)
622 /* promote the child to a root */
623 child = read_node_slot(root, mid, 0);
626 path->nodes[level] = NULL;
627 clean_tree_block(trans, root, mid);
628 wait_on_tree_block_writeback(root, mid);
629 /* once for the path */
630 free_extent_buffer(mid);
631 /* once for the root ptr */
632 free_extent_buffer(mid);
633 return btrfs_free_extent(trans, root, blocknr, 1, 1);
635 if (btrfs_header_nritems(mid) >
636 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
639 if (btrfs_header_nritems(mid) < 2)
642 left = read_node_slot(root, parent, pslot - 1);
644 wret = btrfs_cow_block(trans, root, left,
645 parent, pslot - 1, &left);
651 right = read_node_slot(root, parent, pslot + 1);
653 wret = btrfs_cow_block(trans, root, right,
654 parent, pslot + 1, &right);
661 /* first, try to make some room in the middle buffer */
663 orig_slot += btrfs_header_nritems(left);
664 wret = push_node_left(trans, root, left, mid);
667 if (btrfs_header_nritems(mid) < 2)
672 * then try to empty the right most buffer into the middle
675 wret = push_node_left(trans, root, mid, right);
676 if (wret < 0 && wret != -ENOSPC)
678 if (btrfs_header_nritems(right) == 0) {
679 u64 blocknr = extent_buffer_blocknr(right);
680 clean_tree_block(trans, root, right);
681 wait_on_tree_block_writeback(root, right);
682 free_extent_buffer(right);
684 wret = del_ptr(trans, root, path, level + 1, pslot +
688 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
692 struct btrfs_disk_key right_key;
693 btrfs_node_key(right, &right_key, 0);
694 btrfs_set_node_key(parent, &right_key, pslot + 1);
695 btrfs_mark_buffer_dirty(parent);
698 if (btrfs_header_nritems(mid) == 1) {
700 * we're not allowed to leave a node with one item in the
701 * tree during a delete. A deletion from lower in the tree
702 * could try to delete the only pointer in this node.
703 * So, pull some keys from the left.
704 * There has to be a left pointer at this point because
705 * otherwise we would have pulled some pointers from the
709 wret = balance_node_right(trans, root, mid, left);
716 if (btrfs_header_nritems(mid) == 0) {
717 /* we've managed to empty the middle node, drop it */
718 u64 blocknr = extent_buffer_blocknr(mid);
719 clean_tree_block(trans, root, mid);
720 wait_on_tree_block_writeback(root, mid);
721 free_extent_buffer(mid);
723 wret = del_ptr(trans, root, path, level + 1, pslot);
726 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
730 /* update the parent key to reflect our changes */
731 struct btrfs_disk_key mid_key;
732 btrfs_node_key(mid, &mid_key, 0);
733 btrfs_set_node_key(parent, &mid_key, pslot);
734 btrfs_mark_buffer_dirty(parent);
737 /* update the path */
739 if (btrfs_header_nritems(left) > orig_slot) {
740 extent_buffer_get(left);
741 path->nodes[level] = left;
742 path->slots[level + 1] -= 1;
743 path->slots[level] = orig_slot;
745 free_extent_buffer(mid);
747 orig_slot -= btrfs_header_nritems(left);
748 path->slots[level] = orig_slot;
751 /* double check we haven't messed things up */
752 check_block(root, path, level);
754 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
758 free_extent_buffer(right);
760 free_extent_buffer(left);
764 /* returns zero if the push worked, non-zero otherwise */
765 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
766 struct btrfs_root *root,
767 struct btrfs_path *path, int level)
769 struct extent_buffer *right = NULL;
770 struct extent_buffer *mid;
771 struct extent_buffer *left = NULL;
772 struct extent_buffer *parent = NULL;
776 int orig_slot = path->slots[level];
782 mid = path->nodes[level];
783 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
785 if (level < BTRFS_MAX_LEVEL - 1)
786 parent = path->nodes[level + 1];
787 pslot = path->slots[level + 1];
792 left = read_node_slot(root, parent, pslot - 1);
794 /* first, try to make some room in the middle buffer */
797 left_nr = btrfs_header_nritems(left);
798 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
801 ret = btrfs_cow_block(trans, root, left, parent,
806 wret = push_node_left(trans, root,
813 struct btrfs_disk_key disk_key;
814 orig_slot += left_nr;
815 btrfs_node_key(mid, &disk_key, 0);
816 btrfs_set_node_key(parent, &disk_key, pslot);
817 btrfs_mark_buffer_dirty(parent);
818 if (btrfs_header_nritems(left) > orig_slot) {
819 path->nodes[level] = left;
820 path->slots[level + 1] -= 1;
821 path->slots[level] = orig_slot;
822 free_extent_buffer(mid);
825 btrfs_header_nritems(left);
826 path->slots[level] = orig_slot;
827 free_extent_buffer(left);
829 check_node(root, path, level);
832 free_extent_buffer(left);
834 right= read_node_slot(root, parent, pslot + 1);
837 * then try to empty the right most buffer into the middle
841 right_nr = btrfs_header_nritems(right);
842 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
845 ret = btrfs_cow_block(trans, root, right,
851 wret = balance_node_right(trans, root,
858 struct btrfs_disk_key disk_key;
860 btrfs_node_key(right, &disk_key, 0);
861 btrfs_set_node_key(parent, &disk_key, pslot + 1);
862 btrfs_mark_buffer_dirty(parent);
864 if (btrfs_header_nritems(mid) <= orig_slot) {
865 path->nodes[level] = right;
866 path->slots[level + 1] += 1;
867 path->slots[level] = orig_slot -
868 btrfs_header_nritems(mid);
869 free_extent_buffer(mid);
871 free_extent_buffer(right);
873 check_node(root, path, level);
876 free_extent_buffer(right);
878 check_node(root, path, level);
883 * readahead one full node of leaves
885 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
888 struct extent_buffer *node;
896 int direction = path->reada;
897 struct radix_tree_root found;
898 unsigned long gang[8];
899 struct extent_buffer *eb;
904 if (!path->nodes[level])
907 node = path->nodes[level];
908 search = btrfs_node_blockptr(node, slot);
909 eb = btrfs_find_tree_block(root, search);
911 free_extent_buffer(eb);
915 init_bit_radix(&found);
916 nritems = btrfs_header_nritems(node);
917 for (i = slot; i < nritems; i++) {
918 blocknr = btrfs_node_blockptr(node, i);
919 set_radix_bit(&found, blocknr);
922 cluster_start = search - 4;
923 if (cluster_start > search)
926 cluster_start = search + 4;
928 ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang));
931 for (i = 0; i < ret; i++) {
933 clear_radix_bit(&found, blocknr);
934 if (path->reada == 1 && nread > 16)
936 if (close_blocks(cluster_start, blocknr)) {
937 readahead_tree_block(root, blocknr);
939 cluster_start = blocknr;
945 * look for key in the tree. path is filled in with nodes along the way
946 * if key is found, we return zero and you can find the item in the leaf
947 * level of the path (level 0)
949 * If the key isn't found, the path points to the slot where it should
950 * be inserted, and 1 is returned. If there are other errors during the
951 * search a negative error number is returned.
953 * if ins_len > 0, nodes and leaves will be split as we walk down the
954 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
957 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
958 *root, struct btrfs_key *key, struct btrfs_path *p, int
961 struct extent_buffer *b;
966 int should_reada = p->reada;
969 lowest_level = p->lowest_level;
970 WARN_ON(lowest_level && ins_len);
971 WARN_ON(p->nodes[0] != NULL);
972 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
975 extent_buffer_get(b);
977 level = btrfs_header_level(b);
980 wret = btrfs_cow_block(trans, root, b,
985 free_extent_buffer(b);
989 BUG_ON(!cow && ins_len);
990 if (level != btrfs_header_level(b))
992 level = btrfs_header_level(b);
994 ret = check_block(root, p, level);
997 ret = bin_search(b, key, level, &slot);
1001 p->slots[level] = slot;
1002 if (ins_len > 0 && btrfs_header_nritems(b) >=
1003 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1004 int sret = split_node(trans, root, p, level);
1008 b = p->nodes[level];
1009 slot = p->slots[level];
1010 } else if (ins_len < 0) {
1011 int sret = balance_level(trans, root, p,
1015 b = p->nodes[level];
1018 slot = p->slots[level];
1019 BUG_ON(btrfs_header_nritems(b) == 1);
1021 /* this is only true while dropping a snapshot */
1022 if (level == lowest_level)
1024 blocknr = btrfs_node_blockptr(b, slot);
1026 reada_for_search(root, p, level, slot);
1027 b = read_tree_block(root, btrfs_node_blockptr(b, slot));
1029 p->slots[level] = slot;
1030 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1031 sizeof(struct btrfs_item) + ins_len) {
1032 int sret = split_leaf(trans, root, key,
1045 * adjust the pointers going up the tree, starting at level
1046 * making sure the right key of each node is points to 'key'.
1047 * This is used after shifting pointers to the left, so it stops
1048 * fixing up pointers when a given leaf/node is not in slot 0 of the
1051 * If this fails to write a tree block, it returns -1, but continues
1052 * fixing up the blocks in ram so the tree is consistent.
1054 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1055 struct btrfs_root *root, struct btrfs_path *path,
1056 struct btrfs_disk_key *key, int level)
1060 struct extent_buffer *t;
1062 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1063 int tslot = path->slots[i];
1064 if (!path->nodes[i])
1067 btrfs_set_node_key(t, key, tslot);
1068 btrfs_mark_buffer_dirty(path->nodes[i]);
1076 * try to push data from one node into the next node left in the
1079 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1080 * error, and > 0 if there was no room in the left hand block.
1082 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1083 *root, struct extent_buffer *dst,
1084 struct extent_buffer *src)
1091 src_nritems = btrfs_header_nritems(src);
1092 dst_nritems = btrfs_header_nritems(dst);
1093 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1095 if (push_items <= 0) {
1099 if (src_nritems < push_items)
1100 push_items = src_nritems;
1102 copy_extent_buffer(dst, src,
1103 btrfs_node_key_ptr_offset(dst_nritems),
1104 btrfs_node_key_ptr_offset(0),
1105 push_items * sizeof(struct btrfs_key_ptr));
1107 if (push_items < src_nritems) {
1108 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1109 btrfs_node_key_ptr_offset(push_items),
1110 (src_nritems - push_items) *
1111 sizeof(struct btrfs_key_ptr));
1113 btrfs_set_header_nritems(src, src_nritems - push_items);
1114 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1115 btrfs_mark_buffer_dirty(src);
1116 btrfs_mark_buffer_dirty(dst);
1121 * try to push data from one node into the next node right in the
1124 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1125 * error, and > 0 if there was no room in the right hand block.
1127 * this will only push up to 1/2 the contents of the left node over
1129 static int balance_node_right(struct btrfs_trans_handle *trans,
1130 struct btrfs_root *root,
1131 struct extent_buffer *dst,
1132 struct extent_buffer *src)
1140 src_nritems = btrfs_header_nritems(src);
1141 dst_nritems = btrfs_header_nritems(dst);
1142 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1143 if (push_items <= 0)
1146 max_push = src_nritems / 2 + 1;
1147 /* don't try to empty the node */
1148 if (max_push >= src_nritems)
1151 if (max_push < push_items)
1152 push_items = max_push;
1154 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1155 btrfs_node_key_ptr_offset(0),
1157 sizeof(struct btrfs_key_ptr));
1159 copy_extent_buffer(dst, src,
1160 btrfs_node_key_ptr_offset(0),
1161 btrfs_node_key_ptr_offset(src_nritems - push_items),
1162 push_items * sizeof(struct btrfs_key_ptr));
1164 btrfs_set_header_nritems(src, src_nritems - push_items);
1165 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1167 btrfs_mark_buffer_dirty(src);
1168 btrfs_mark_buffer_dirty(dst);
1173 * helper function to insert a new root level in the tree.
1174 * A new node is allocated, and a single item is inserted to
1175 * point to the existing root
1177 * returns zero on success or < 0 on failure.
1179 static int insert_new_root(struct btrfs_trans_handle *trans,
1180 struct btrfs_root *root,
1181 struct btrfs_path *path, int level)
1183 struct extent_buffer *lower;
1184 struct extent_buffer *c;
1185 struct btrfs_disk_key lower_key;
1187 BUG_ON(path->nodes[level]);
1188 BUG_ON(path->nodes[level-1] != root->node);
1190 c = btrfs_alloc_free_block(trans, root,
1191 extent_buffer_blocknr(root->node), 0);
1194 memset_extent_buffer(c, 0, 0, root->nodesize);
1195 btrfs_set_header_nritems(c, 1);
1196 btrfs_set_header_level(c, level);
1197 btrfs_set_header_blocknr(c, extent_buffer_blocknr(c));
1198 btrfs_set_header_generation(c, trans->transid);
1199 btrfs_set_header_owner(c, root->root_key.objectid);
1200 lower = path->nodes[level-1];
1202 write_extent_buffer(c, root->fs_info->fsid,
1203 (unsigned long)btrfs_header_fsid(c),
1206 btrfs_item_key(lower, &lower_key, 0);
1208 btrfs_node_key(lower, &lower_key, 0);
1209 btrfs_set_node_key(c, &lower_key, 0);
1210 btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower));
1212 btrfs_mark_buffer_dirty(c);
1214 /* the super has an extra ref to root->node */
1215 free_extent_buffer(root->node);
1217 extent_buffer_get(c);
1218 path->nodes[level] = c;
1219 path->slots[level] = 0;
1224 * worker function to insert a single pointer in a node.
1225 * the node should have enough room for the pointer already
1227 * slot and level indicate where you want the key to go, and
1228 * blocknr is the block the key points to.
1230 * returns zero on success and < 0 on any error
1232 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1233 *root, struct btrfs_path *path, struct btrfs_disk_key
1234 *key, u64 blocknr, int slot, int level)
1236 struct extent_buffer *lower;
1239 BUG_ON(!path->nodes[level]);
1240 lower = path->nodes[level];
1241 nritems = btrfs_header_nritems(lower);
1244 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1246 if (slot != nritems) {
1247 memmove_extent_buffer(lower,
1248 btrfs_node_key_ptr_offset(slot + 1),
1249 btrfs_node_key_ptr_offset(slot),
1250 (nritems - slot) * sizeof(struct btrfs_key_ptr));
1252 btrfs_set_node_key(lower, key, slot);
1253 btrfs_set_node_blockptr(lower, slot, blocknr);
1254 btrfs_set_header_nritems(lower, nritems + 1);
1255 btrfs_mark_buffer_dirty(lower);
1256 check_node(root, path, level);
1261 * split the node at the specified level in path in two.
1262 * The path is corrected to point to the appropriate node after the split
1264 * Before splitting this tries to make some room in the node by pushing
1265 * left and right, if either one works, it returns right away.
1267 * returns 0 on success and < 0 on failure
1269 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1270 *root, struct btrfs_path *path, int level)
1272 struct extent_buffer *c;
1273 struct extent_buffer *split;
1274 struct btrfs_disk_key disk_key;
1280 c = path->nodes[level];
1281 if (c == root->node) {
1282 /* trying to split the root, lets make a new one */
1283 ret = insert_new_root(trans, root, path, level + 1);
1287 ret = push_nodes_for_insert(trans, root, path, level);
1288 c = path->nodes[level];
1289 if (!ret && btrfs_header_nritems(c) <
1290 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1296 c_nritems = btrfs_header_nritems(c);
1297 split = btrfs_alloc_free_block(trans, root,
1298 extent_buffer_blocknr(c), 0);
1300 return PTR_ERR(split);
1302 btrfs_set_header_flags(split, btrfs_header_flags(c));
1303 btrfs_set_header_level(split, btrfs_header_level(c));
1304 btrfs_set_header_blocknr(split, extent_buffer_blocknr(split));
1305 btrfs_set_header_generation(split, trans->transid);
1306 btrfs_set_header_owner(split, root->root_key.objectid);
1307 write_extent_buffer(split, root->fs_info->fsid,
1308 (unsigned long)btrfs_header_fsid(split),
1311 mid = (c_nritems + 1) / 2;
1313 copy_extent_buffer(split, c,
1314 btrfs_node_key_ptr_offset(0),
1315 btrfs_node_key_ptr_offset(mid),
1316 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1317 btrfs_set_header_nritems(split, c_nritems - mid);
1318 btrfs_set_header_nritems(c, mid);
1321 btrfs_mark_buffer_dirty(c);
1322 btrfs_mark_buffer_dirty(split);
1324 btrfs_node_key(split, &disk_key, 0);
1325 wret = insert_ptr(trans, root, path, &disk_key,
1326 extent_buffer_blocknr(split),
1327 path->slots[level + 1] + 1,
1332 if (path->slots[level] >= mid) {
1333 path->slots[level] -= mid;
1334 free_extent_buffer(c);
1335 path->nodes[level] = split;
1336 path->slots[level + 1] += 1;
1338 free_extent_buffer(split);
1344 * how many bytes are required to store the items in a leaf. start
1345 * and nr indicate which items in the leaf to check. This totals up the
1346 * space used both by the item structs and the item data
1348 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1351 int nritems = btrfs_header_nritems(l);
1352 int end = min(nritems, start + nr) - 1;
1356 data_len = btrfs_item_end_nr(l, start);
1357 data_len = data_len - btrfs_item_offset_nr(l, end);
1358 data_len += sizeof(struct btrfs_item) * nr;
1359 WARN_ON(data_len < 0);
1364 * The space between the end of the leaf items and
1365 * the start of the leaf data. IOW, how much room
1366 * the leaf has left for both items and data
1368 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1370 int nritems = btrfs_header_nritems(leaf);
1372 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1374 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1375 ret, BTRFS_LEAF_DATA_SIZE(root),
1376 leaf_space_used(leaf, 0, nritems), nritems);
1382 * push some data in the path leaf to the right, trying to free up at
1383 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1385 * returns 1 if the push failed because the other node didn't have enough
1386 * room, 0 if everything worked out and < 0 if there were major errors.
1388 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1389 *root, struct btrfs_path *path, int data_size)
1391 struct extent_buffer *left = path->nodes[0];
1392 struct extent_buffer *right;
1393 struct extent_buffer *upper;
1394 struct btrfs_disk_key disk_key;
1400 struct btrfs_item *item;
1406 slot = path->slots[1];
1407 if (!path->nodes[1]) {
1410 upper = path->nodes[1];
1411 if (slot >= btrfs_header_nritems(upper) - 1)
1414 right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1));
1415 free_space = btrfs_leaf_free_space(root, right);
1416 if (free_space < data_size + sizeof(struct btrfs_item)) {
1417 free_extent_buffer(right);
1421 /* cow and double check */
1422 ret = btrfs_cow_block(trans, root, right, upper,
1425 free_extent_buffer(right);
1428 free_space = btrfs_leaf_free_space(root, right);
1429 if (free_space < data_size + sizeof(struct btrfs_item)) {
1430 free_extent_buffer(right);
1434 left_nritems = btrfs_header_nritems(left);
1435 if (left_nritems == 0) {
1436 free_extent_buffer(right);
1440 for (i = left_nritems - 1; i >= 1; i--) {
1441 item = btrfs_item_nr(left, i);
1442 if (path->slots[0] == i)
1443 push_space += data_size + sizeof(*item);
1444 if (btrfs_item_size(left, item) + sizeof(*item) + push_space >
1448 push_space += btrfs_item_size(left, item) + sizeof(*item);
1451 if (push_items == 0) {
1452 free_extent_buffer(right);
1456 if (push_items == left_nritems)
1459 /* push left to right */
1460 right_nritems = btrfs_header_nritems(right);
1461 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1462 push_space -= leaf_data_end(root, left);
1464 /* make room in the right data area */
1465 data_end = leaf_data_end(root, right);
1466 memmove_extent_buffer(right,
1467 btrfs_leaf_data(right) + data_end - push_space,
1468 btrfs_leaf_data(right) + data_end,
1469 BTRFS_LEAF_DATA_SIZE(root) - data_end);
1471 /* copy from the left data area */
1472 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1473 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1474 btrfs_leaf_data(left) + leaf_data_end(root, left),
1477 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1478 btrfs_item_nr_offset(0),
1479 right_nritems * sizeof(struct btrfs_item));
1481 /* copy the items from left to right */
1482 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1483 btrfs_item_nr_offset(left_nritems - push_items),
1484 push_items * sizeof(struct btrfs_item));
1486 /* update the item pointers */
1487 right_nritems += push_items;
1488 btrfs_set_header_nritems(right, right_nritems);
1489 push_space = BTRFS_LEAF_DATA_SIZE(root);
1490 for (i = 0; i < right_nritems; i++) {
1491 item = btrfs_item_nr(right, i);
1492 btrfs_set_item_offset(right, item, push_space -
1493 btrfs_item_size(right, item));
1494 push_space = btrfs_item_offset(right, item);
1496 left_nritems -= push_items;
1497 btrfs_set_header_nritems(left, left_nritems);
1499 btrfs_mark_buffer_dirty(left);
1500 btrfs_mark_buffer_dirty(right);
1502 btrfs_item_key(right, &disk_key, 0);
1503 btrfs_set_node_key(upper, &disk_key, slot + 1);
1504 btrfs_mark_buffer_dirty(upper);
1506 /* then fixup the leaf pointer in the path */
1507 if (path->slots[0] >= left_nritems) {
1508 path->slots[0] -= left_nritems;
1509 free_extent_buffer(path->nodes[0]);
1510 path->nodes[0] = right;
1511 path->slots[1] += 1;
1513 free_extent_buffer(right);
1516 check_node(root, path, 1);
1520 * push some data in the path leaf to the left, trying to free up at
1521 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1523 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1524 *root, struct btrfs_path *path, int data_size)
1526 struct btrfs_disk_key disk_key;
1527 struct extent_buffer *right = path->nodes[0];
1528 struct extent_buffer *left;
1534 struct btrfs_item *item;
1535 u32 old_left_nritems;
1540 slot = path->slots[1];
1543 if (!path->nodes[1])
1546 left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1548 free_space = btrfs_leaf_free_space(root, left);
1549 if (free_space < data_size + sizeof(struct btrfs_item)) {
1550 free_extent_buffer(left);
1554 /* cow and double check */
1555 ret = btrfs_cow_block(trans, root, left,
1556 path->nodes[1], slot - 1, &left);
1558 /* we hit -ENOSPC, but it isn't fatal here */
1559 free_extent_buffer(left);
1562 free_space = btrfs_leaf_free_space(root, left);
1563 if (free_space < data_size + sizeof(struct btrfs_item)) {
1564 free_extent_buffer(left);
1568 right_nritems = btrfs_header_nritems(right);
1569 if (right_nritems == 0) {
1570 free_extent_buffer(left);
1574 for (i = 0; i < right_nritems - 1; i++) {
1575 item = btrfs_item_nr(right, i);
1576 if (path->slots[0] == i)
1577 push_space += data_size + sizeof(*item);
1578 if (btrfs_item_size(right, item) + sizeof(*item) + push_space >
1582 push_space += btrfs_item_size(right, item) + sizeof(*item);
1584 if (push_items == 0) {
1585 free_extent_buffer(left);
1588 if (push_items == btrfs_header_nritems(right))
1591 /* push data from right to left */
1592 copy_extent_buffer(left, right,
1593 btrfs_item_nr_offset(btrfs_header_nritems(left)),
1594 btrfs_item_nr_offset(0),
1595 push_items * sizeof(struct btrfs_item));
1597 push_space = BTRFS_LEAF_DATA_SIZE(root) -
1598 btrfs_item_offset_nr(right, push_items -1);
1600 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1601 leaf_data_end(root, left) - push_space,
1602 btrfs_leaf_data(right) +
1603 btrfs_item_offset_nr(right, push_items - 1),
1605 old_left_nritems = btrfs_header_nritems(left);
1606 BUG_ON(old_left_nritems < 0);
1608 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1610 item = btrfs_item_nr(left, i);
1611 ioff = btrfs_item_offset(left, item);
1612 btrfs_set_item_offset(left, item,
1613 ioff - (BTRFS_LEAF_DATA_SIZE(root) -
1614 btrfs_item_offset_nr(left, old_left_nritems - 1)));
1616 btrfs_set_header_nritems(left, old_left_nritems + push_items);
1618 /* fixup right node */
1619 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1620 leaf_data_end(root, right);
1621 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1622 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1623 btrfs_leaf_data(right) +
1624 leaf_data_end(root, right), push_space);
1626 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1627 btrfs_item_nr_offset(push_items),
1628 (btrfs_header_nritems(right) - push_items) *
1629 sizeof(struct btrfs_item));
1631 right_nritems = btrfs_header_nritems(right) - push_items;
1632 btrfs_set_header_nritems(right, right_nritems);
1633 push_space = BTRFS_LEAF_DATA_SIZE(root);
1635 for (i = 0; i < right_nritems; i++) {
1636 item = btrfs_item_nr(right, i);
1637 btrfs_set_item_offset(right, item, push_space -
1638 btrfs_item_size(right, item));
1639 push_space = btrfs_item_offset(right, item);
1642 btrfs_mark_buffer_dirty(left);
1643 btrfs_mark_buffer_dirty(right);
1645 btrfs_item_key(right, &disk_key, 0);
1646 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1650 /* then fixup the leaf pointer in the path */
1651 if (path->slots[0] < push_items) {
1652 path->slots[0] += old_left_nritems;
1653 free_extent_buffer(path->nodes[0]);
1654 path->nodes[0] = left;
1655 path->slots[1] -= 1;
1657 free_extent_buffer(left);
1658 path->slots[0] -= push_items;
1660 BUG_ON(path->slots[0] < 0);
1662 check_node(root, path, 1);
1667 * split the path's leaf in two, making sure there is at least data_size
1668 * available for the resulting leaf level of the path.
1670 * returns 0 if all went well and < 0 on failure.
1672 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1673 *root, struct btrfs_key *ins_key,
1674 struct btrfs_path *path, int data_size)
1676 struct extent_buffer *l;
1680 struct extent_buffer *right;
1681 int space_needed = data_size + sizeof(struct btrfs_item);
1687 int double_split = 0;
1688 struct btrfs_disk_key disk_key;
1690 /* first try to make some room by pushing left and right */
1691 wret = push_leaf_left(trans, root, path, data_size);
1695 wret = push_leaf_right(trans, root, path, data_size);
1701 /* did the pushes work? */
1702 if (btrfs_leaf_free_space(root, l) >=
1703 sizeof(struct btrfs_item) + data_size)
1706 if (!path->nodes[1]) {
1707 ret = insert_new_root(trans, root, path, 1);
1711 slot = path->slots[0];
1712 nritems = btrfs_header_nritems(l);
1713 mid = (nritems + 1)/ 2;
1715 right = btrfs_alloc_free_block(trans, root,
1716 extent_buffer_blocknr(l), 0);
1718 return PTR_ERR(right);
1720 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1721 btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1722 btrfs_set_header_generation(right, trans->transid);
1723 btrfs_set_header_owner(right, root->root_key.objectid);
1724 btrfs_set_header_level(right, 0);
1725 write_extent_buffer(right, root->fs_info->fsid,
1726 (unsigned long)btrfs_header_fsid(right),
1731 leaf_space_used(l, mid, nritems - mid) + space_needed >
1732 BTRFS_LEAF_DATA_SIZE(root)) {
1733 if (slot >= nritems) {
1734 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1735 btrfs_set_header_nritems(right, 0);
1736 wret = insert_ptr(trans, root, path,
1738 extent_buffer_blocknr(right),
1739 path->slots[1] + 1, 1);
1742 free_extent_buffer(path->nodes[0]);
1743 path->nodes[0] = right;
1745 path->slots[1] += 1;
1752 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1753 BTRFS_LEAF_DATA_SIZE(root)) {
1755 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1756 btrfs_set_header_nritems(right, 0);
1757 wret = insert_ptr(trans, root, path,
1759 extent_buffer_blocknr(right),
1763 free_extent_buffer(path->nodes[0]);
1764 path->nodes[0] = right;
1766 if (path->slots[1] == 0) {
1767 wret = fixup_low_keys(trans, root,
1768 path, &disk_key, 1);
1778 nritems = nritems - mid;
1779 btrfs_set_header_nritems(right, nritems);
1780 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1782 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1783 btrfs_item_nr_offset(mid),
1784 nritems * sizeof(struct btrfs_item));
1786 copy_extent_buffer(right, l,
1787 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1788 data_copy_size, btrfs_leaf_data(l) +
1789 leaf_data_end(root, l), data_copy_size);
1791 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1792 btrfs_item_end_nr(l, mid);
1794 for (i = 0; i < nritems; i++) {
1795 struct btrfs_item *item = btrfs_item_nr(right, i);
1796 u32 ioff = btrfs_item_offset(right, item);
1797 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1800 btrfs_set_header_nritems(l, mid);
1802 btrfs_item_key(right, &disk_key, 0);
1803 wret = insert_ptr(trans, root, path, &disk_key,
1804 extent_buffer_blocknr(right), path->slots[1] + 1, 1);
1808 btrfs_mark_buffer_dirty(right);
1809 btrfs_mark_buffer_dirty(l);
1810 BUG_ON(path->slots[0] != slot);
1813 free_extent_buffer(path->nodes[0]);
1814 path->nodes[0] = right;
1815 path->slots[0] -= mid;
1816 path->slots[1] += 1;
1818 free_extent_buffer(right);
1820 BUG_ON(path->slots[0] < 0);
1821 check_node(root, path, 1);
1822 check_leaf(root, path, 0);
1827 right = btrfs_alloc_free_block(trans, root,
1828 extent_buffer_blocknr(l), 0);
1830 return PTR_ERR(right);
1832 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1833 btrfs_set_header_blocknr(right, extent_buffer_blocknr(right));
1834 btrfs_set_header_generation(right, trans->transid);
1835 btrfs_set_header_owner(right, root->root_key.objectid);
1836 btrfs_set_header_level(right, 0);
1837 write_extent_buffer(right, root->fs_info->fsid,
1838 (unsigned long)btrfs_header_fsid(right),
1841 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1842 btrfs_set_header_nritems(right, 0);
1843 wret = insert_ptr(trans, root, path,
1845 extent_buffer_blocknr(right),
1849 if (path->slots[1] == 0) {
1850 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1854 free_extent_buffer(path->nodes[0]);
1855 path->nodes[0] = right;
1857 check_node(root, path, 1);
1858 check_leaf(root, path, 0);
1862 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1863 struct btrfs_root *root,
1864 struct btrfs_path *path,
1870 struct extent_buffer *leaf;
1871 struct btrfs_item *item;
1873 unsigned int data_end;
1874 unsigned int old_data_start;
1875 unsigned int old_size;
1876 unsigned int size_diff;
1879 slot_orig = path->slots[0];
1880 leaf = path->nodes[0];
1882 nritems = btrfs_header_nritems(leaf);
1883 data_end = leaf_data_end(root, leaf);
1885 slot = path->slots[0];
1886 old_data_start = btrfs_item_offset_nr(leaf, slot);
1887 old_size = btrfs_item_size_nr(leaf, slot);
1888 BUG_ON(old_size <= new_size);
1889 size_diff = old_size - new_size;
1892 BUG_ON(slot >= nritems);
1895 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1897 /* first correct the data pointers */
1898 for (i = slot; i < nritems; i++) {
1900 item = btrfs_item_nr(leaf, i);
1901 ioff = btrfs_item_offset(leaf, item);
1902 btrfs_set_item_offset(leaf, item, ioff + size_diff);
1904 /* shift the data */
1905 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1906 data_end + size_diff, btrfs_leaf_data(leaf) +
1907 data_end, old_data_start + new_size - data_end);
1909 item = btrfs_item_nr(leaf, slot);
1910 btrfs_set_item_size(leaf, item, new_size);
1911 btrfs_mark_buffer_dirty(leaf);
1914 if (btrfs_leaf_free_space(root, leaf) < 0) {
1915 btrfs_print_leaf(root, leaf);
1918 check_leaf(root, path, 0);
1922 int btrfs_extend_item(struct btrfs_trans_handle *trans,
1923 struct btrfs_root *root, struct btrfs_path *path,
1929 struct extent_buffer *leaf;
1930 struct btrfs_item *item;
1932 unsigned int data_end;
1933 unsigned int old_data;
1934 unsigned int old_size;
1937 slot_orig = path->slots[0];
1938 leaf = path->nodes[0];
1940 nritems = btrfs_header_nritems(leaf);
1941 data_end = leaf_data_end(root, leaf);
1943 if (btrfs_leaf_free_space(root, leaf) < data_size) {
1944 btrfs_print_leaf(root, leaf);
1947 slot = path->slots[0];
1948 old_data = btrfs_item_end_nr(leaf, slot);
1951 BUG_ON(slot >= nritems);
1954 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1956 /* first correct the data pointers */
1957 for (i = slot; i < nritems; i++) {
1959 item = btrfs_item_nr(leaf, i);
1960 ioff = btrfs_item_offset(leaf, item);
1961 btrfs_set_item_offset(leaf, item, ioff - data_size);
1964 /* shift the data */
1965 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
1966 data_end - data_size, btrfs_leaf_data(leaf) +
1967 data_end, old_data - data_end);
1969 data_end = old_data;
1970 old_size = btrfs_item_size_nr(leaf, slot);
1971 item = btrfs_item_nr(leaf, slot);
1972 btrfs_set_item_size(leaf, item, old_size + data_size);
1973 btrfs_mark_buffer_dirty(leaf);
1976 if (btrfs_leaf_free_space(root, leaf) < 0) {
1977 btrfs_print_leaf(root, leaf);
1980 check_leaf(root, path, 0);
1985 * Given a key and some data, insert an item into the tree.
1986 * This does all the path init required, making room in the tree if needed.
1988 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
1989 struct btrfs_root *root,
1990 struct btrfs_path *path,
1991 struct btrfs_key *cpu_key, u32 data_size)
1993 struct extent_buffer *leaf;
1994 struct btrfs_item *item;
1999 unsigned int data_end;
2000 struct btrfs_disk_key disk_key;
2002 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2004 /* create a root if there isn't one */
2008 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2015 slot_orig = path->slots[0];
2016 leaf = path->nodes[0];
2018 nritems = btrfs_header_nritems(leaf);
2019 data_end = leaf_data_end(root, leaf);
2021 if (btrfs_leaf_free_space(root, leaf) <
2022 sizeof(struct btrfs_item) + data_size) {
2026 slot = path->slots[0];
2029 if (slot != nritems) {
2031 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2033 if (old_data < data_end) {
2034 btrfs_print_leaf(root, leaf);
2035 printk("slot %d old_data %d data_end %d\n",
2036 slot, old_data, data_end);
2040 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2042 /* first correct the data pointers */
2043 for (i = slot; i < nritems; i++) {
2045 item = btrfs_item_nr(leaf, i);
2046 ioff = btrfs_item_offset(leaf, item);
2047 btrfs_set_item_offset(leaf, item, ioff - data_size);
2050 /* shift the items */
2051 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2052 btrfs_item_nr_offset(slot),
2053 (nritems - slot) * sizeof(struct btrfs_item));
2055 /* shift the data */
2056 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2057 data_end - data_size, btrfs_leaf_data(leaf) +
2058 data_end, old_data - data_end);
2059 data_end = old_data;
2062 /* setup the item for the new data */
2063 btrfs_set_item_key(leaf, &disk_key, slot);
2064 item = btrfs_item_nr(leaf, slot);
2065 btrfs_set_item_offset(leaf, item, data_end - data_size);
2066 btrfs_set_item_size(leaf, item, data_size);
2067 btrfs_set_header_nritems(leaf, nritems + 1);
2068 btrfs_mark_buffer_dirty(leaf);
2072 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2074 if (btrfs_leaf_free_space(root, leaf) < 0) {
2075 btrfs_print_leaf(root, leaf);
2078 check_leaf(root, path, 0);
2084 * Given a key and some data, insert an item into the tree.
2085 * This does all the path init required, making room in the tree if needed.
2087 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2088 *root, struct btrfs_key *cpu_key, void *data, u32
2092 struct btrfs_path *path;
2093 struct extent_buffer *leaf;
2096 path = btrfs_alloc_path();
2098 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2100 leaf = path->nodes[0];
2101 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2102 write_extent_buffer(leaf, data, ptr, data_size);
2103 btrfs_mark_buffer_dirty(leaf);
2105 btrfs_free_path(path);
2110 * delete the pointer from a given node.
2112 * If the delete empties a node, the node is removed from the tree,
2113 * continuing all the way the root if required. The root is converted into
2114 * a leaf if all the nodes are emptied.
2116 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2117 struct btrfs_path *path, int level, int slot)
2119 struct extent_buffer *parent = path->nodes[level];
2124 nritems = btrfs_header_nritems(parent);
2125 if (slot != nritems -1) {
2126 memmove_extent_buffer(parent,
2127 btrfs_node_key_ptr_offset(slot),
2128 btrfs_node_key_ptr_offset(slot + 1),
2129 sizeof(struct btrfs_key_ptr) *
2130 (nritems - slot - 1));
2133 btrfs_set_header_nritems(parent, nritems);
2134 if (nritems == 0 && parent == root->node) {
2135 BUG_ON(btrfs_header_level(root->node) != 1);
2136 /* just turn the root into a leaf and break */
2137 btrfs_set_header_level(root->node, 0);
2138 } else if (slot == 0) {
2139 struct btrfs_disk_key disk_key;
2141 btrfs_node_key(parent, &disk_key, 0);
2142 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2146 btrfs_mark_buffer_dirty(parent);
2151 * delete the item at the leaf level in path. If that empties
2152 * the leaf, remove it from the tree
2154 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2155 struct btrfs_path *path)
2158 struct extent_buffer *leaf;
2159 struct btrfs_item *item;
2166 leaf = path->nodes[0];
2167 slot = path->slots[0];
2168 doff = btrfs_item_offset_nr(leaf, slot);
2169 dsize = btrfs_item_size_nr(leaf, slot);
2170 nritems = btrfs_header_nritems(leaf);
2172 if (slot != nritems - 1) {
2174 int data_end = leaf_data_end(root, leaf);
2176 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2178 btrfs_leaf_data(leaf) + data_end,
2181 for (i = slot + 1; i < nritems; i++) {
2183 item = btrfs_item_nr(leaf, i);
2184 ioff = btrfs_item_offset(leaf, item);
2185 btrfs_set_item_offset(leaf, item, ioff + dsize);
2187 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2188 btrfs_item_nr_offset(slot + 1),
2189 sizeof(struct btrfs_item) *
2190 (nritems - slot - 1));
2192 btrfs_set_header_nritems(leaf, nritems - 1);
2195 /* delete the leaf if we've emptied it */
2197 if (leaf == root->node) {
2198 btrfs_set_header_level(leaf, 0);
2200 clean_tree_block(trans, root, leaf);
2201 wait_on_tree_block_writeback(root, leaf);
2202 wret = del_ptr(trans, root, path, 1, path->slots[1]);
2205 wret = btrfs_free_extent(trans, root,
2206 extent_buffer_blocknr(leaf),
2212 int used = leaf_space_used(leaf, 0, nritems);
2214 struct btrfs_disk_key disk_key;
2216 btrfs_item_key(leaf, &disk_key, 0);
2217 wret = fixup_low_keys(trans, root, path,
2223 /* delete the leaf if it is mostly empty */
2224 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2225 /* push_leaf_left fixes the path.
2226 * make sure the path still points to our leaf
2227 * for possible call to del_ptr below
2229 slot = path->slots[1];
2230 extent_buffer_get(leaf);
2232 wret = push_leaf_left(trans, root, path, 1);
2233 if (wret < 0 && wret != -ENOSPC)
2236 if (path->nodes[0] == leaf &&
2237 btrfs_header_nritems(leaf)) {
2238 wret = push_leaf_right(trans, root, path, 1);
2239 if (wret < 0 && wret != -ENOSPC)
2243 if (btrfs_header_nritems(leaf) == 0) {
2244 u64 blocknr = extent_buffer_blocknr(leaf);
2246 clean_tree_block(trans, root, leaf);
2247 wait_on_tree_block_writeback(root, leaf);
2249 wret = del_ptr(trans, root, path, 1, slot);
2253 free_extent_buffer(leaf);
2254 wret = btrfs_free_extent(trans, root, blocknr,
2259 btrfs_mark_buffer_dirty(leaf);
2260 free_extent_buffer(leaf);
2263 btrfs_mark_buffer_dirty(leaf);
2270 * walk up the tree as far as required to find the next leaf.
2271 * returns 0 if it found something or 1 if there are no greater leaves.
2272 * returns < 0 on io errors.
2274 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2279 struct extent_buffer *c;
2280 struct extent_buffer *next = NULL;
2282 while(level < BTRFS_MAX_LEVEL) {
2283 if (!path->nodes[level])
2286 slot = path->slots[level] + 1;
2287 c = path->nodes[level];
2288 if (slot >= btrfs_header_nritems(c)) {
2293 blocknr = btrfs_node_blockptr(c, slot);
2295 free_extent_buffer(next);
2298 reada_for_search(root, path, level, slot);
2300 next = read_tree_block(root, blocknr);
2303 path->slots[level] = slot;
2306 c = path->nodes[level];
2307 free_extent_buffer(c);
2308 path->nodes[level] = next;
2309 path->slots[level] = 0;
2313 reada_for_search(root, path, level, 0);
2314 next = read_tree_block(root, btrfs_node_blockptr(next, 0));