]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/ctree.c
Btrfs: fix check_node and check_leaf to use less cpu
[karo-tx-linux.git] / fs / btrfs / ctree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "transaction.h"
5
6 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
7                       *root, struct btrfs_path *path, int level);
8 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
9                       *root, struct btrfs_key *ins_key,
10                       struct btrfs_path *path, int data_size);
11 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
12                           *root, struct buffer_head *dst, struct buffer_head
13                           *src);
14 static int balance_node_right(struct btrfs_trans_handle *trans, struct
15                               btrfs_root *root, struct buffer_head *dst_buf,
16                               struct buffer_head *src_buf);
17 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
18                    struct btrfs_path *path, int level, int slot);
19
20 inline void btrfs_init_path(struct btrfs_path *p)
21 {
22         memset(p, 0, sizeof(*p));
23 }
24
25 struct btrfs_path *btrfs_alloc_path(void)
26 {
27         struct btrfs_path *path;
28         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
29         if (path)
30                 btrfs_init_path(path);
31         return path;
32 }
33
34 void btrfs_free_path(struct btrfs_path *p)
35 {
36         btrfs_release_path(NULL, p);
37         kmem_cache_free(btrfs_path_cachep, p);
38 }
39
40 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
41 {
42         int i;
43         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
44                 if (!p->nodes[i])
45                         break;
46                 btrfs_block_release(root, p->nodes[i]);
47         }
48         memset(p, 0, sizeof(*p));
49 }
50
51 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
52                            *root, struct buffer_head *buf, struct buffer_head
53                            *parent, int parent_slot, struct buffer_head
54                            **cow_ret)
55 {
56         struct buffer_head *cow;
57         struct btrfs_node *cow_node;
58
59         if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
60                                     trans->transid) {
61                 *cow_ret = buf;
62                 return 0;
63         }
64         cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
65         cow_node = btrfs_buffer_node(cow);
66         if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
67                 WARN_ON(1);
68         memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
69         btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
70         btrfs_set_header_generation(&cow_node->header, trans->transid);
71         btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
72         btrfs_inc_ref(trans, root, buf);
73         if (buf == root->node) {
74                 root->node = cow;
75                 get_bh(cow);
76                 if (buf != root->commit_root) {
77                         btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
78                 }
79                 btrfs_block_release(root, buf);
80         } else {
81                 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
82                                         bh_blocknr(cow));
83                 btrfs_mark_buffer_dirty(parent);
84                 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
85         }
86         btrfs_block_release(root, buf);
87         mark_buffer_dirty(cow);
88         *cow_ret = cow;
89         return 0;
90 }
91
92 /*
93  * The leaf data grows from end-to-front in the node.
94  * this returns the address of the start of the last item,
95  * which is the stop of the leaf data stack
96  */
97 static inline unsigned int leaf_data_end(struct btrfs_root *root,
98                                          struct btrfs_leaf *leaf)
99 {
100         u32 nr = btrfs_header_nritems(&leaf->header);
101         if (nr == 0)
102                 return BTRFS_LEAF_DATA_SIZE(root);
103         return btrfs_item_offset(leaf->items + nr - 1);
104 }
105
106 /*
107  * compare two keys in a memcmp fashion
108  */
109 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
110 {
111         struct btrfs_key k1;
112
113         btrfs_disk_key_to_cpu(&k1, disk);
114
115         if (k1.objectid > k2->objectid)
116                 return 1;
117         if (k1.objectid < k2->objectid)
118                 return -1;
119         if (k1.flags > k2->flags)
120                 return 1;
121         if (k1.flags < k2->flags)
122                 return -1;
123         if (k1.offset > k2->offset)
124                 return 1;
125         if (k1.offset < k2->offset)
126                 return -1;
127         return 0;
128 }
129
130 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
131                       int level)
132 {
133         struct btrfs_node *parent = NULL;
134         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
135         int parent_slot;
136         int slot;
137         struct btrfs_key cpukey;
138         u32 nritems = btrfs_header_nritems(&node->header);
139
140         if (path->nodes[level + 1])
141                 parent = btrfs_buffer_node(path->nodes[level + 1]);
142         parent_slot = path->slots[level + 1];
143         slot = path->slots[level];
144         BUG_ON(nritems == 0);
145         if (parent) {
146                 struct btrfs_disk_key *parent_key;
147                 parent_key = &parent->ptrs[parent_slot].key;
148                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
149                               sizeof(struct btrfs_disk_key)));
150                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
151                        btrfs_header_blocknr(&node->header));
152         }
153         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
154         if (slot != 0) {
155                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key);
156                 BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0);
157         }
158         if (slot < nritems - 1) {
159                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key);
160                 BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0);
161         }
162         return 0;
163 }
164
165 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
166                       int level)
167 {
168         struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
169         struct btrfs_node *parent = NULL;
170         int parent_slot;
171         int slot = path->slots[0];
172         struct btrfs_key cpukey;
173
174         u32 nritems = btrfs_header_nritems(&leaf->header);
175
176         if (path->nodes[level + 1])
177                 parent = btrfs_buffer_node(path->nodes[level + 1]);
178         parent_slot = path->slots[level + 1];
179         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
180
181         if (nritems == 0)
182                 return 0;
183
184         if (parent) {
185                 struct btrfs_disk_key *parent_key;
186                 parent_key = &parent->ptrs[parent_slot].key;
187                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
188                        sizeof(struct btrfs_disk_key)));
189                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
190                        btrfs_header_blocknr(&leaf->header));
191         }
192         if (slot != 0) {
193                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key);
194                 BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0);
195                 BUG_ON(btrfs_item_offset(leaf->items + slot - 1) !=
196                         btrfs_item_end(leaf->items + slot));
197         }
198         if (slot < nritems - 1) {
199                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key);
200                 BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0);
201                 BUG_ON(btrfs_item_offset(leaf->items + slot) !=
202                         btrfs_item_end(leaf->items + slot + 1));
203         }
204         BUG_ON(btrfs_item_offset(leaf->items) +
205                btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root));
206         return 0;
207 }
208
209 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
210                         int level)
211 {
212         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
213         if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid,
214                    sizeof(node->header.fsid)))
215                 BUG();
216         if (level == 0)
217                 return check_leaf(root, path, level);
218         return check_node(root, path, level);
219 }
220
221 /*
222  * search for key in the array p.  items p are item_size apart
223  * and there are 'max' items in p
224  * the slot in the array is returned via slot, and it points to
225  * the place where you would insert key if it is not found in
226  * the array.
227  *
228  * slot may point to max if the key is bigger than all of the keys
229  */
230 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
231                        int max, int *slot)
232 {
233         int low = 0;
234         int high = max;
235         int mid;
236         int ret;
237         struct btrfs_disk_key *tmp;
238
239         while(low < high) {
240                 mid = (low + high) / 2;
241                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
242                 ret = comp_keys(tmp, key);
243
244                 if (ret < 0)
245                         low = mid + 1;
246                 else if (ret > 0)
247                         high = mid;
248                 else {
249                         *slot = mid;
250                         return 0;
251                 }
252         }
253         *slot = low;
254         return 1;
255 }
256
257 /*
258  * simple bin_search frontend that does the right thing for
259  * leaves vs nodes
260  */
261 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
262 {
263         if (btrfs_is_leaf(c)) {
264                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
265                 return generic_bin_search((void *)l->items,
266                                           sizeof(struct btrfs_item),
267                                           key, btrfs_header_nritems(&c->header),
268                                           slot);
269         } else {
270                 return generic_bin_search((void *)c->ptrs,
271                                           sizeof(struct btrfs_key_ptr),
272                                           key, btrfs_header_nritems(&c->header),
273                                           slot);
274         }
275         return -1;
276 }
277
278 static struct buffer_head *read_node_slot(struct btrfs_root *root,
279                                    struct buffer_head *parent_buf,
280                                    int slot)
281 {
282         struct btrfs_node *node = btrfs_buffer_node(parent_buf);
283         if (slot < 0)
284                 return NULL;
285         if (slot >= btrfs_header_nritems(&node->header))
286                 return NULL;
287         return read_tree_block(root, btrfs_node_blockptr(node, slot));
288 }
289
290 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
291                          *root, struct btrfs_path *path, int level)
292 {
293         struct buffer_head *right_buf;
294         struct buffer_head *mid_buf;
295         struct buffer_head *left_buf;
296         struct buffer_head *parent_buf = NULL;
297         struct btrfs_node *right = NULL;
298         struct btrfs_node *mid;
299         struct btrfs_node *left = NULL;
300         struct btrfs_node *parent = NULL;
301         int ret = 0;
302         int wret;
303         int pslot;
304         int orig_slot = path->slots[level];
305         u64 orig_ptr;
306
307         if (level == 0)
308                 return 0;
309
310         mid_buf = path->nodes[level];
311         mid = btrfs_buffer_node(mid_buf);
312         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
313
314         if (level < BTRFS_MAX_LEVEL - 1)
315                 parent_buf = path->nodes[level + 1];
316         pslot = path->slots[level + 1];
317
318         /*
319          * deal with the case where there is only one pointer in the root
320          * by promoting the node below to a root
321          */
322         if (!parent_buf) {
323                 struct buffer_head *child;
324                 u64 blocknr = bh_blocknr(mid_buf);
325
326                 if (btrfs_header_nritems(&mid->header) != 1)
327                         return 0;
328
329                 /* promote the child to a root */
330                 child = read_node_slot(root, mid_buf, 0);
331                 BUG_ON(!child);
332                 root->node = child;
333                 path->nodes[level] = NULL;
334                 clean_tree_block(trans, root, mid_buf);
335                 wait_on_buffer(mid_buf);
336                 /* once for the path */
337                 btrfs_block_release(root, mid_buf);
338                 /* once for the root ptr */
339                 btrfs_block_release(root, mid_buf);
340                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
341         }
342         parent = btrfs_buffer_node(parent_buf);
343
344         if (btrfs_header_nritems(&mid->header) >
345             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
346                 return 0;
347
348         left_buf = read_node_slot(root, parent_buf, pslot - 1);
349         right_buf = read_node_slot(root, parent_buf, pslot + 1);
350
351         /* first, try to make some room in the middle buffer */
352         if (left_buf) {
353                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
354                                 &left_buf);
355                 left = btrfs_buffer_node(left_buf);
356                 orig_slot += btrfs_header_nritems(&left->header);
357                 wret = push_node_left(trans, root, left_buf, mid_buf);
358                 if (wret < 0)
359                         ret = wret;
360         }
361
362         /*
363          * then try to empty the right most buffer into the middle
364          */
365         if (right_buf) {
366                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
367                                 &right_buf);
368                 right = btrfs_buffer_node(right_buf);
369                 wret = push_node_left(trans, root, mid_buf, right_buf);
370                 if (wret < 0)
371                         ret = wret;
372                 if (btrfs_header_nritems(&right->header) == 0) {
373                         u64 blocknr = bh_blocknr(right_buf);
374                         clean_tree_block(trans, root, right_buf);
375                         wait_on_buffer(right_buf);
376                         btrfs_block_release(root, right_buf);
377                         right_buf = NULL;
378                         right = NULL;
379                         wret = del_ptr(trans, root, path, level + 1, pslot +
380                                        1);
381                         if (wret)
382                                 ret = wret;
383                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
384                         if (wret)
385                                 ret = wret;
386                 } else {
387                         btrfs_memcpy(root, parent,
388                                      &parent->ptrs[pslot + 1].key,
389                                      &right->ptrs[0].key,
390                                      sizeof(struct btrfs_disk_key));
391                         btrfs_mark_buffer_dirty(parent_buf);
392                 }
393         }
394         if (btrfs_header_nritems(&mid->header) == 1) {
395                 /*
396                  * we're not allowed to leave a node with one item in the
397                  * tree during a delete.  A deletion from lower in the tree
398                  * could try to delete the only pointer in this node.
399                  * So, pull some keys from the left.
400                  * There has to be a left pointer at this point because
401                  * otherwise we would have pulled some pointers from the
402                  * right
403                  */
404                 BUG_ON(!left_buf);
405                 wret = balance_node_right(trans, root, mid_buf, left_buf);
406                 if (wret < 0)
407                         ret = wret;
408                 BUG_ON(wret == 1);
409         }
410         if (btrfs_header_nritems(&mid->header) == 0) {
411                 /* we've managed to empty the middle node, drop it */
412                 u64 blocknr = bh_blocknr(mid_buf);
413                 clean_tree_block(trans, root, mid_buf);
414                 wait_on_buffer(mid_buf);
415                 btrfs_block_release(root, mid_buf);
416                 mid_buf = NULL;
417                 mid = NULL;
418                 wret = del_ptr(trans, root, path, level + 1, pslot);
419                 if (wret)
420                         ret = wret;
421                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
422                 if (wret)
423                         ret = wret;
424         } else {
425                 /* update the parent key to reflect our changes */
426                 btrfs_memcpy(root, parent,
427                              &parent->ptrs[pslot].key, &mid->ptrs[0].key,
428                              sizeof(struct btrfs_disk_key));
429                 btrfs_mark_buffer_dirty(parent_buf);
430         }
431
432         /* update the path */
433         if (left_buf) {
434                 if (btrfs_header_nritems(&left->header) > orig_slot) {
435                         get_bh(left_buf);
436                         path->nodes[level] = left_buf;
437                         path->slots[level + 1] -= 1;
438                         path->slots[level] = orig_slot;
439                         if (mid_buf)
440                                 btrfs_block_release(root, mid_buf);
441                 } else {
442                         orig_slot -= btrfs_header_nritems(&left->header);
443                         path->slots[level] = orig_slot;
444                 }
445         }
446         /* double check we haven't messed things up */
447         check_block(root, path, level);
448         if (orig_ptr !=
449             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
450                                 path->slots[level]))
451                 BUG();
452
453         if (right_buf)
454                 btrfs_block_release(root, right_buf);
455         if (left_buf)
456                 btrfs_block_release(root, left_buf);
457         return ret;
458 }
459
460 /* returns zero if the push worked, non-zero otherwise */
461 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
462                                 struct btrfs_root *root,
463                                 struct btrfs_path *path, int level)
464 {
465         struct buffer_head *right_buf;
466         struct buffer_head *mid_buf;
467         struct buffer_head *left_buf;
468         struct buffer_head *parent_buf = NULL;
469         struct btrfs_node *right = NULL;
470         struct btrfs_node *mid;
471         struct btrfs_node *left = NULL;
472         struct btrfs_node *parent = NULL;
473         int ret = 0;
474         int wret;
475         int pslot;
476         int orig_slot = path->slots[level];
477         u64 orig_ptr;
478
479         if (level == 0)
480                 return 1;
481
482         mid_buf = path->nodes[level];
483         mid = btrfs_buffer_node(mid_buf);
484         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
485
486         if (level < BTRFS_MAX_LEVEL - 1)
487                 parent_buf = path->nodes[level + 1];
488         pslot = path->slots[level + 1];
489
490         if (!parent_buf)
491                 return 1;
492         parent = btrfs_buffer_node(parent_buf);
493
494         left_buf = read_node_slot(root, parent_buf, pslot - 1);
495
496         /* first, try to make some room in the middle buffer */
497         if (left_buf) {
498                 u32 left_nr;
499                 left = btrfs_buffer_node(left_buf);
500                 left_nr = btrfs_header_nritems(&left->header);
501                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
502                         wret = 1;
503                 } else {
504                         btrfs_cow_block(trans, root, left_buf, parent_buf,
505                                         pslot - 1, &left_buf);
506                         left = btrfs_buffer_node(left_buf);
507                         wret = push_node_left(trans, root, left_buf, mid_buf);
508                 }
509                 if (wret < 0)
510                         ret = wret;
511                 if (wret == 0) {
512                         orig_slot += left_nr;
513                         btrfs_memcpy(root, parent,
514                                      &parent->ptrs[pslot].key,
515                                      &mid->ptrs[0].key,
516                                      sizeof(struct btrfs_disk_key));
517                         btrfs_mark_buffer_dirty(parent_buf);
518                         if (btrfs_header_nritems(&left->header) > orig_slot) {
519                                 path->nodes[level] = left_buf;
520                                 path->slots[level + 1] -= 1;
521                                 path->slots[level] = orig_slot;
522                                 btrfs_block_release(root, mid_buf);
523                         } else {
524                                 orig_slot -=
525                                         btrfs_header_nritems(&left->header);
526                                 path->slots[level] = orig_slot;
527                                 btrfs_block_release(root, left_buf);
528                         }
529                         check_node(root, path, level);
530                         return 0;
531                 }
532                 btrfs_block_release(root, left_buf);
533         }
534         right_buf = read_node_slot(root, parent_buf, pslot + 1);
535
536         /*
537          * then try to empty the right most buffer into the middle
538          */
539         if (right_buf) {
540                 u32 right_nr;
541                 right = btrfs_buffer_node(right_buf);
542                 right_nr = btrfs_header_nritems(&right->header);
543                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
544                         wret = 1;
545                 } else {
546                         btrfs_cow_block(trans, root, right_buf,
547                                         parent_buf, pslot + 1, &right_buf);
548                         right = btrfs_buffer_node(right_buf);
549                         wret = balance_node_right(trans, root,
550                                                   right_buf, mid_buf);
551                 }
552                 if (wret < 0)
553                         ret = wret;
554                 if (wret == 0) {
555                         btrfs_memcpy(root, parent,
556                                      &parent->ptrs[pslot + 1].key,
557                                      &right->ptrs[0].key,
558                                      sizeof(struct btrfs_disk_key));
559                         btrfs_mark_buffer_dirty(parent_buf);
560                         if (btrfs_header_nritems(&mid->header) <= orig_slot) {
561                                 path->nodes[level] = right_buf;
562                                 path->slots[level + 1] += 1;
563                                 path->slots[level] = orig_slot -
564                                         btrfs_header_nritems(&mid->header);
565                                 btrfs_block_release(root, mid_buf);
566                         } else {
567                                 btrfs_block_release(root, right_buf);
568                         }
569                         check_node(root, path, level);
570                         return 0;
571                 }
572                 btrfs_block_release(root, right_buf);
573         }
574         check_node(root, path, level);
575         return 1;
576 }
577
578 /*
579  * look for key in the tree.  path is filled in with nodes along the way
580  * if key is found, we return zero and you can find the item in the leaf
581  * level of the path (level 0)
582  *
583  * If the key isn't found, the path points to the slot where it should
584  * be inserted, and 1 is returned.  If there are other errors during the
585  * search a negative error number is returned.
586  *
587  * if ins_len > 0, nodes and leaves will be split as we walk down the
588  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
589  * possible)
590  */
591 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
592                       *root, struct btrfs_key *key, struct btrfs_path *p, int
593                       ins_len, int cow)
594 {
595         struct buffer_head *b;
596         struct buffer_head *cow_buf;
597         struct btrfs_node *c;
598         int slot;
599         int ret;
600         int level;
601
602         WARN_ON(p->nodes[0] != NULL);
603         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
604 again:
605         b = root->node;
606         get_bh(b);
607         while (b) {
608                 c = btrfs_buffer_node(b);
609                 level = btrfs_header_level(&c->header);
610                 if (cow) {
611                         int wret;
612                         wret = btrfs_cow_block(trans, root, b,
613                                                p->nodes[level + 1],
614                                                p->slots[level + 1],
615                                                &cow_buf);
616                         b = cow_buf;
617                         c = btrfs_buffer_node(b);
618                 }
619                 BUG_ON(!cow && ins_len);
620                 if (level != btrfs_header_level(&c->header))
621                         WARN_ON(1);
622                 level = btrfs_header_level(&c->header);
623                 p->nodes[level] = b;
624                 ret = check_block(root, p, level);
625                 if (ret)
626                         return -1;
627                 ret = bin_search(c, key, &slot);
628                 if (!btrfs_is_leaf(c)) {
629                         if (ret && slot > 0)
630                                 slot -= 1;
631                         p->slots[level] = slot;
632                         if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
633                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
634                                 int sret = split_node(trans, root, p, level);
635                                 BUG_ON(sret > 0);
636                                 if (sret)
637                                         return sret;
638                                 b = p->nodes[level];
639                                 c = btrfs_buffer_node(b);
640                                 slot = p->slots[level];
641                         } else if (ins_len < 0) {
642                                 int sret = balance_level(trans, root, p,
643                                                          level);
644                                 if (sret)
645                                         return sret;
646                                 b = p->nodes[level];
647                                 if (!b)
648                                         goto again;
649                                 c = btrfs_buffer_node(b);
650                                 slot = p->slots[level];
651                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
652                         }
653                         b = read_tree_block(root, btrfs_node_blockptr(c, slot));
654                 } else {
655                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
656                         p->slots[level] = slot;
657                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
658                             sizeof(struct btrfs_item) + ins_len) {
659                                 int sret = split_leaf(trans, root, key,
660                                                       p, ins_len);
661                                 BUG_ON(sret > 0);
662                                 if (sret)
663                                         return sret;
664                         }
665                         return ret;
666                 }
667         }
668         return 1;
669 }
670
671 /*
672  * adjust the pointers going up the tree, starting at level
673  * making sure the right key of each node is points to 'key'.
674  * This is used after shifting pointers to the left, so it stops
675  * fixing up pointers when a given leaf/node is not in slot 0 of the
676  * higher levels
677  *
678  * If this fails to write a tree block, it returns -1, but continues
679  * fixing up the blocks in ram so the tree is consistent.
680  */
681 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
682                           *root, struct btrfs_path *path, struct btrfs_disk_key
683                           *key, int level)
684 {
685         int i;
686         int ret = 0;
687         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
688                 struct btrfs_node *t;
689                 int tslot = path->slots[i];
690                 if (!path->nodes[i])
691                         break;
692                 t = btrfs_buffer_node(path->nodes[i]);
693                 btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
694                 btrfs_mark_buffer_dirty(path->nodes[i]);
695                 if (tslot != 0)
696                         break;
697         }
698         return ret;
699 }
700
701 /*
702  * try to push data from one node into the next node left in the
703  * tree.
704  *
705  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
706  * error, and > 0 if there was no room in the left hand block.
707  */
708 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
709                           *root, struct buffer_head *dst_buf, struct
710                           buffer_head *src_buf)
711 {
712         struct btrfs_node *src = btrfs_buffer_node(src_buf);
713         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
714         int push_items = 0;
715         int src_nritems;
716         int dst_nritems;
717         int ret = 0;
718
719         src_nritems = btrfs_header_nritems(&src->header);
720         dst_nritems = btrfs_header_nritems(&dst->header);
721         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
722         if (push_items <= 0) {
723                 return 1;
724         }
725
726         if (src_nritems < push_items)
727                 push_items = src_nritems;
728
729         btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
730                      push_items * sizeof(struct btrfs_key_ptr));
731         if (push_items < src_nritems) {
732                 btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
733                         (src_nritems - push_items) *
734                         sizeof(struct btrfs_key_ptr));
735         }
736         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
737         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
738         btrfs_mark_buffer_dirty(src_buf);
739         btrfs_mark_buffer_dirty(dst_buf);
740         return ret;
741 }
742
743 /*
744  * try to push data from one node into the next node right in the
745  * tree.
746  *
747  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
748  * error, and > 0 if there was no room in the right hand block.
749  *
750  * this will  only push up to 1/2 the contents of the left node over
751  */
752 static int balance_node_right(struct btrfs_trans_handle *trans, struct
753                               btrfs_root *root, struct buffer_head *dst_buf,
754                               struct buffer_head *src_buf)
755 {
756         struct btrfs_node *src = btrfs_buffer_node(src_buf);
757         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
758         int push_items = 0;
759         int max_push;
760         int src_nritems;
761         int dst_nritems;
762         int ret = 0;
763
764         src_nritems = btrfs_header_nritems(&src->header);
765         dst_nritems = btrfs_header_nritems(&dst->header);
766         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
767         if (push_items <= 0) {
768                 return 1;
769         }
770
771         max_push = src_nritems / 2 + 1;
772         /* don't try to empty the node */
773         if (max_push > src_nritems)
774                 return 1;
775         if (max_push < push_items)
776                 push_items = max_push;
777
778         btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
779                       dst_nritems * sizeof(struct btrfs_key_ptr));
780
781         btrfs_memcpy(root, dst, dst->ptrs,
782                      src->ptrs + src_nritems - push_items,
783                      push_items * sizeof(struct btrfs_key_ptr));
784
785         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
786         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
787
788         btrfs_mark_buffer_dirty(src_buf);
789         btrfs_mark_buffer_dirty(dst_buf);
790         return ret;
791 }
792
793 /*
794  * helper function to insert a new root level in the tree.
795  * A new node is allocated, and a single item is inserted to
796  * point to the existing root
797  *
798  * returns zero on success or < 0 on failure.
799  */
800 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
801                            *root, struct btrfs_path *path, int level)
802 {
803         struct buffer_head *t;
804         struct btrfs_node *lower;
805         struct btrfs_node *c;
806         struct btrfs_disk_key *lower_key;
807
808         BUG_ON(path->nodes[level]);
809         BUG_ON(path->nodes[level-1] != root->node);
810
811         t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
812         c = btrfs_buffer_node(t);
813         memset(c, 0, root->blocksize);
814         btrfs_set_header_nritems(&c->header, 1);
815         btrfs_set_header_level(&c->header, level);
816         btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
817         btrfs_set_header_generation(&c->header, trans->transid);
818         btrfs_set_header_owner(&c->header, root->root_key.objectid);
819         lower = btrfs_buffer_node(path->nodes[level-1]);
820         memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
821                sizeof(c->header.fsid));
822         if (btrfs_is_leaf(lower))
823                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
824         else
825                 lower_key = &lower->ptrs[0].key;
826         btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
827                      sizeof(struct btrfs_disk_key));
828         btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
829
830         btrfs_mark_buffer_dirty(t);
831
832         /* the super has an extra ref to root->node */
833         btrfs_block_release(root, root->node);
834         root->node = t;
835         get_bh(t);
836         path->nodes[level] = t;
837         path->slots[level] = 0;
838         return 0;
839 }
840
841 /*
842  * worker function to insert a single pointer in a node.
843  * the node should have enough room for the pointer already
844  *
845  * slot and level indicate where you want the key to go, and
846  * blocknr is the block the key points to.
847  *
848  * returns zero on success and < 0 on any error
849  */
850 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
851                       *root, struct btrfs_path *path, struct btrfs_disk_key
852                       *key, u64 blocknr, int slot, int level)
853 {
854         struct btrfs_node *lower;
855         int nritems;
856
857         BUG_ON(!path->nodes[level]);
858         lower = btrfs_buffer_node(path->nodes[level]);
859         nritems = btrfs_header_nritems(&lower->header);
860         if (slot > nritems)
861                 BUG();
862         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
863                 BUG();
864         if (slot != nritems) {
865                 btrfs_memmove(root, lower, lower->ptrs + slot + 1,
866                               lower->ptrs + slot,
867                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
868         }
869         btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
870                      key, sizeof(struct btrfs_disk_key));
871         btrfs_set_node_blockptr(lower, slot, blocknr);
872         btrfs_set_header_nritems(&lower->header, nritems + 1);
873         btrfs_mark_buffer_dirty(path->nodes[level]);
874         return 0;
875 }
876
877 /*
878  * split the node at the specified level in path in two.
879  * The path is corrected to point to the appropriate node after the split
880  *
881  * Before splitting this tries to make some room in the node by pushing
882  * left and right, if either one works, it returns right away.
883  *
884  * returns 0 on success and < 0 on failure
885  */
886 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
887                       *root, struct btrfs_path *path, int level)
888 {
889         struct buffer_head *t;
890         struct btrfs_node *c;
891         struct buffer_head *split_buffer;
892         struct btrfs_node *split;
893         int mid;
894         int ret;
895         int wret;
896         u32 c_nritems;
897
898         t = path->nodes[level];
899         c = btrfs_buffer_node(t);
900         if (t == root->node) {
901                 /* trying to split the root, lets make a new one */
902                 ret = insert_new_root(trans, root, path, level + 1);
903                 if (ret)
904                         return ret;
905         } else {
906                 ret = push_nodes_for_insert(trans, root, path, level);
907                 t = path->nodes[level];
908                 c = btrfs_buffer_node(t);
909                 if (!ret &&
910                     btrfs_header_nritems(&c->header) <
911                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
912                         return 0;
913         }
914
915         c_nritems = btrfs_header_nritems(&c->header);
916         split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
917         split = btrfs_buffer_node(split_buffer);
918         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
919         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
920         btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
921         btrfs_set_header_generation(&split->header, trans->transid);
922         btrfs_set_header_owner(&split->header, root->root_key.objectid);
923         memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
924                sizeof(split->header.fsid));
925         mid = (c_nritems + 1) / 2;
926         btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
927                      (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
928         btrfs_set_header_nritems(&split->header, c_nritems - mid);
929         btrfs_set_header_nritems(&c->header, mid);
930         ret = 0;
931
932         btrfs_mark_buffer_dirty(t);
933         btrfs_mark_buffer_dirty(split_buffer);
934         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
935                           bh_blocknr(split_buffer), path->slots[level + 1] + 1,
936                           level + 1);
937         if (wret)
938                 ret = wret;
939
940         if (path->slots[level] >= mid) {
941                 path->slots[level] -= mid;
942                 btrfs_block_release(root, t);
943                 path->nodes[level] = split_buffer;
944                 path->slots[level + 1] += 1;
945         } else {
946                 btrfs_block_release(root, split_buffer);
947         }
948         return ret;
949 }
950
951 /*
952  * how many bytes are required to store the items in a leaf.  start
953  * and nr indicate which items in the leaf to check.  This totals up the
954  * space used both by the item structs and the item data
955  */
956 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
957 {
958         int data_len;
959         int nritems = btrfs_header_nritems(&l->header);
960         int end = min(nritems, start + nr) - 1;
961
962         if (!nr)
963                 return 0;
964         data_len = btrfs_item_end(l->items + start);
965         data_len = data_len - btrfs_item_offset(l->items + end);
966         data_len += sizeof(struct btrfs_item) * nr;
967         WARN_ON(data_len < 0);
968         return data_len;
969 }
970
971 /*
972  * The space between the end of the leaf items and
973  * the start of the leaf data.  IOW, how much room
974  * the leaf has left for both items and data
975  */
976 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
977 {
978         int nritems = btrfs_header_nritems(&leaf->header);
979         return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
980 }
981
982 /*
983  * push some data in the path leaf to the right, trying to free up at
984  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
985  *
986  * returns 1 if the push failed because the other node didn't have enough
987  * room, 0 if everything worked out and < 0 if there were major errors.
988  */
989 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
990                            *root, struct btrfs_path *path, int data_size)
991 {
992         struct buffer_head *left_buf = path->nodes[0];
993         struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
994         struct btrfs_leaf *right;
995         struct buffer_head *right_buf;
996         struct buffer_head *upper;
997         struct btrfs_node *upper_node;
998         int slot;
999         int i;
1000         int free_space;
1001         int push_space = 0;
1002         int push_items = 0;
1003         struct btrfs_item *item;
1004         u32 left_nritems;
1005         u32 right_nritems;
1006
1007         slot = path->slots[1];
1008         if (!path->nodes[1]) {
1009                 return 1;
1010         }
1011         upper = path->nodes[1];
1012         upper_node = btrfs_buffer_node(upper);
1013         if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
1014                 return 1;
1015         }
1016         right_buf = read_tree_block(root,
1017                     btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
1018         right = btrfs_buffer_leaf(right_buf);
1019         free_space = btrfs_leaf_free_space(root, right);
1020         if (free_space < data_size + sizeof(struct btrfs_item)) {
1021                 btrfs_block_release(root, right_buf);
1022                 return 1;
1023         }
1024         /* cow and double check */
1025         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1026         right = btrfs_buffer_leaf(right_buf);
1027         free_space = btrfs_leaf_free_space(root, right);
1028         if (free_space < data_size + sizeof(struct btrfs_item)) {
1029                 btrfs_block_release(root, right_buf);
1030                 return 1;
1031         }
1032
1033         left_nritems = btrfs_header_nritems(&left->header);
1034         if (left_nritems == 0) {
1035                 btrfs_block_release(root, right_buf);
1036                 return 1;
1037         }
1038         for (i = left_nritems - 1; i >= 1; i--) {
1039                 item = left->items + i;
1040                 if (path->slots[0] == i)
1041                         push_space += data_size + sizeof(*item);
1042                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1043                     free_space)
1044                         break;
1045                 push_items++;
1046                 push_space += btrfs_item_size(item) + sizeof(*item);
1047         }
1048         if (push_items == 0) {
1049                 btrfs_block_release(root, right_buf);
1050                 return 1;
1051         }
1052         if (push_items == left_nritems)
1053                 WARN_ON(1);
1054         right_nritems = btrfs_header_nritems(&right->header);
1055         /* push left to right */
1056         push_space = btrfs_item_end(left->items + left_nritems - push_items);
1057         push_space -= leaf_data_end(root, left);
1058         /* make room in the right data area */
1059         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1060                       leaf_data_end(root, right) - push_space,
1061                       btrfs_leaf_data(right) +
1062                       leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
1063                       leaf_data_end(root, right));
1064         /* copy from the left data area */
1065         btrfs_memcpy(root, right, btrfs_leaf_data(right) +
1066                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1067                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1068                      push_space);
1069         btrfs_memmove(root, right, right->items + push_items, right->items,
1070                 right_nritems * sizeof(struct btrfs_item));
1071         /* copy the items from left to right */
1072         btrfs_memcpy(root, right, right->items, left->items +
1073                      left_nritems - push_items,
1074                      push_items * sizeof(struct btrfs_item));
1075
1076         /* update the item pointers */
1077         right_nritems += push_items;
1078         btrfs_set_header_nritems(&right->header, right_nritems);
1079         push_space = BTRFS_LEAF_DATA_SIZE(root);
1080         for (i = 0; i < right_nritems; i++) {
1081                 btrfs_set_item_offset(right->items + i, push_space -
1082                                       btrfs_item_size(right->items + i));
1083                 push_space = btrfs_item_offset(right->items + i);
1084         }
1085         left_nritems -= push_items;
1086         btrfs_set_header_nritems(&left->header, left_nritems);
1087
1088         btrfs_mark_buffer_dirty(left_buf);
1089         btrfs_mark_buffer_dirty(right_buf);
1090
1091         btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
1092                 &right->items[0].key, sizeof(struct btrfs_disk_key));
1093         btrfs_mark_buffer_dirty(upper);
1094
1095         /* then fixup the leaf pointer in the path */
1096         if (path->slots[0] >= left_nritems) {
1097                 path->slots[0] -= left_nritems;
1098                 btrfs_block_release(root, path->nodes[0]);
1099                 path->nodes[0] = right_buf;
1100                 path->slots[1] += 1;
1101         } else {
1102                 btrfs_block_release(root, right_buf);
1103         }
1104         return 0;
1105 }
1106 /*
1107  * push some data in the path leaf to the left, trying to free up at
1108  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1109  */
1110 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1111                           *root, struct btrfs_path *path, int data_size)
1112 {
1113         struct buffer_head *right_buf = path->nodes[0];
1114         struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
1115         struct buffer_head *t;
1116         struct btrfs_leaf *left;
1117         int slot;
1118         int i;
1119         int free_space;
1120         int push_space = 0;
1121         int push_items = 0;
1122         struct btrfs_item *item;
1123         u32 old_left_nritems;
1124         int ret = 0;
1125         int wret;
1126
1127         slot = path->slots[1];
1128         if (slot == 0) {
1129                 return 1;
1130         }
1131         if (!path->nodes[1]) {
1132                 return 1;
1133         }
1134         t = read_tree_block(root,
1135             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
1136         left = btrfs_buffer_leaf(t);
1137         free_space = btrfs_leaf_free_space(root, left);
1138         if (free_space < data_size + sizeof(struct btrfs_item)) {
1139                 btrfs_block_release(root, t);
1140                 return 1;
1141         }
1142
1143         /* cow and double check */
1144         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1145         left = btrfs_buffer_leaf(t);
1146         free_space = btrfs_leaf_free_space(root, left);
1147         if (free_space < data_size + sizeof(struct btrfs_item)) {
1148                 btrfs_block_release(root, t);
1149                 return 1;
1150         }
1151
1152         if (btrfs_header_nritems(&right->header) == 0) {
1153                 btrfs_block_release(root, t);
1154                 return 1;
1155         }
1156
1157         for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
1158                 item = right->items + i;
1159                 if (path->slots[0] == i)
1160                         push_space += data_size + sizeof(*item);
1161                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1162                     free_space)
1163                         break;
1164                 push_items++;
1165                 push_space += btrfs_item_size(item) + sizeof(*item);
1166         }
1167         if (push_items == 0) {
1168                 btrfs_block_release(root, t);
1169                 return 1;
1170         }
1171         if (push_items == btrfs_header_nritems(&right->header))
1172                 WARN_ON(1);
1173         /* push data from right to left */
1174         btrfs_memcpy(root, left, left->items +
1175                      btrfs_header_nritems(&left->header),
1176                      right->items, push_items * sizeof(struct btrfs_item));
1177         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1178                      btrfs_item_offset(right->items + push_items -1);
1179         btrfs_memcpy(root, left, btrfs_leaf_data(left) +
1180                      leaf_data_end(root, left) - push_space,
1181                      btrfs_leaf_data(right) +
1182                      btrfs_item_offset(right->items + push_items - 1),
1183                      push_space);
1184         old_left_nritems = btrfs_header_nritems(&left->header);
1185         BUG_ON(old_left_nritems < 0);
1186
1187         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1188                 u32 ioff = btrfs_item_offset(left->items + i);
1189                 btrfs_set_item_offset(left->items + i, ioff -
1190                                      (BTRFS_LEAF_DATA_SIZE(root) -
1191                                       btrfs_item_offset(left->items +
1192                                                         old_left_nritems - 1)));
1193         }
1194         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1195
1196         /* fixup right node */
1197         push_space = btrfs_item_offset(right->items + push_items - 1) -
1198                      leaf_data_end(root, right);
1199         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1200                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1201                       btrfs_leaf_data(right) +
1202                       leaf_data_end(root, right), push_space);
1203         btrfs_memmove(root, right, right->items, right->items + push_items,
1204                 (btrfs_header_nritems(&right->header) - push_items) *
1205                 sizeof(struct btrfs_item));
1206         btrfs_set_header_nritems(&right->header,
1207                                  btrfs_header_nritems(&right->header) -
1208                                  push_items);
1209         push_space = BTRFS_LEAF_DATA_SIZE(root);
1210
1211         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1212                 btrfs_set_item_offset(right->items + i, push_space -
1213                                       btrfs_item_size(right->items + i));
1214                 push_space = btrfs_item_offset(right->items + i);
1215         }
1216
1217         btrfs_mark_buffer_dirty(t);
1218         btrfs_mark_buffer_dirty(right_buf);
1219         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1220         if (wret)
1221                 ret = wret;
1222
1223         /* then fixup the leaf pointer in the path */
1224         if (path->slots[0] < push_items) {
1225                 path->slots[0] += old_left_nritems;
1226                 btrfs_block_release(root, path->nodes[0]);
1227                 path->nodes[0] = t;
1228                 path->slots[1] -= 1;
1229         } else {
1230                 btrfs_block_release(root, t);
1231                 path->slots[0] -= push_items;
1232         }
1233         BUG_ON(path->slots[0] < 0);
1234         return ret;
1235 }
1236
1237 /*
1238  * split the path's leaf in two, making sure there is at least data_size
1239  * available for the resulting leaf level of the path.
1240  *
1241  * returns 0 if all went well and < 0 on failure.
1242  */
1243 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1244                       *root, struct btrfs_key *ins_key,
1245                       struct btrfs_path *path, int data_size)
1246 {
1247         struct buffer_head *l_buf;
1248         struct btrfs_leaf *l;
1249         u32 nritems;
1250         int mid;
1251         int slot;
1252         struct btrfs_leaf *right;
1253         struct buffer_head *right_buffer;
1254         int space_needed = data_size + sizeof(struct btrfs_item);
1255         int data_copy_size;
1256         int rt_data_off;
1257         int i;
1258         int ret = 0;
1259         int wret;
1260         int double_split = 0;
1261         struct btrfs_disk_key disk_key;
1262
1263         /* first try to make some room by pushing left and right */
1264         wret = push_leaf_left(trans, root, path, data_size);
1265         if (wret < 0)
1266                 return wret;
1267         if (wret) {
1268                 wret = push_leaf_right(trans, root, path, data_size);
1269                 if (wret < 0)
1270                         return wret;
1271         }
1272         l_buf = path->nodes[0];
1273         l = btrfs_buffer_leaf(l_buf);
1274
1275         /* did the pushes work? */
1276         if (btrfs_leaf_free_space(root, l) >=
1277             sizeof(struct btrfs_item) + data_size)
1278                 return 0;
1279
1280         if (!path->nodes[1]) {
1281                 ret = insert_new_root(trans, root, path, 1);
1282                 if (ret)
1283                         return ret;
1284         }
1285         slot = path->slots[0];
1286         nritems = btrfs_header_nritems(&l->header);
1287         mid = (nritems + 1)/ 2;
1288         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1289         BUG_ON(!right_buffer);
1290         right = btrfs_buffer_leaf(right_buffer);
1291         memset(&right->header, 0, sizeof(right->header));
1292         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1293         btrfs_set_header_generation(&right->header, trans->transid);
1294         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1295         btrfs_set_header_level(&right->header, 0);
1296         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1297                sizeof(right->header.fsid));
1298         if (mid <= slot) {
1299                 if (nritems == 1 ||
1300                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1301                         BTRFS_LEAF_DATA_SIZE(root)) {
1302                         if (slot >= nritems) {
1303                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1304                                 btrfs_set_header_nritems(&right->header, 0);
1305                                 wret = insert_ptr(trans, root, path,
1306                                                   &disk_key,
1307                                                   bh_blocknr(right_buffer),
1308                                                   path->slots[1] + 1, 1);
1309                                 if (wret)
1310                                         ret = wret;
1311                                 btrfs_block_release(root, path->nodes[0]);
1312                                 path->nodes[0] = right_buffer;
1313                                 path->slots[0] = 0;
1314                                 path->slots[1] += 1;
1315                                 return ret;
1316                         }
1317                         mid = slot;
1318                         double_split = 1;
1319                 }
1320         } else {
1321                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1322                         BTRFS_LEAF_DATA_SIZE(root)) {
1323                         if (slot == 0) {
1324                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1325                                 btrfs_set_header_nritems(&right->header, 0);
1326                                 wret = insert_ptr(trans, root, path,
1327                                                   &disk_key,
1328                                                   bh_blocknr(right_buffer),
1329                                                   path->slots[1] - 1, 1);
1330                                 if (wret)
1331                                         ret = wret;
1332                                 btrfs_block_release(root, path->nodes[0]);
1333                                 path->nodes[0] = right_buffer;
1334                                 path->slots[0] = 0;
1335                                 path->slots[1] -= 1;
1336                                 if (path->slots[1] == 0) {
1337                                         wret = fixup_low_keys(trans, root,
1338                                                    path, &disk_key, 1);
1339                                         if (wret)
1340                                                 ret = wret;
1341                                 }
1342                                 return ret;
1343                         }
1344                         mid = slot;
1345                         double_split = 1;
1346                 }
1347         }
1348         btrfs_set_header_nritems(&right->header, nritems - mid);
1349         data_copy_size = btrfs_item_end(l->items + mid) -
1350                          leaf_data_end(root, l);
1351         btrfs_memcpy(root, right, right->items, l->items + mid,
1352                      (nritems - mid) * sizeof(struct btrfs_item));
1353         btrfs_memcpy(root, right,
1354                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1355                      data_copy_size, btrfs_leaf_data(l) +
1356                      leaf_data_end(root, l), data_copy_size);
1357         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1358                       btrfs_item_end(l->items + mid);
1359
1360         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1361                 u32 ioff = btrfs_item_offset(right->items + i);
1362                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1363         }
1364
1365         btrfs_set_header_nritems(&l->header, mid);
1366         ret = 0;
1367         wret = insert_ptr(trans, root, path, &right->items[0].key,
1368                           bh_blocknr(right_buffer), path->slots[1] + 1, 1);
1369         if (wret)
1370                 ret = wret;
1371         btrfs_mark_buffer_dirty(right_buffer);
1372         btrfs_mark_buffer_dirty(l_buf);
1373         BUG_ON(path->slots[0] != slot);
1374         if (mid <= slot) {
1375                 btrfs_block_release(root, path->nodes[0]);
1376                 path->nodes[0] = right_buffer;
1377                 path->slots[0] -= mid;
1378                 path->slots[1] += 1;
1379         } else
1380                 btrfs_block_release(root, right_buffer);
1381         BUG_ON(path->slots[0] < 0);
1382
1383         if (!double_split)
1384                 return ret;
1385         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1386         BUG_ON(!right_buffer);
1387         right = btrfs_buffer_leaf(right_buffer);
1388         memset(&right->header, 0, sizeof(right->header));
1389         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1390         btrfs_set_header_generation(&right->header, trans->transid);
1391         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1392         btrfs_set_header_level(&right->header, 0);
1393         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1394                sizeof(right->header.fsid));
1395         btrfs_cpu_key_to_disk(&disk_key, ins_key);
1396         btrfs_set_header_nritems(&right->header, 0);
1397         wret = insert_ptr(trans, root, path,
1398                           &disk_key,
1399                           bh_blocknr(right_buffer),
1400                           path->slots[1], 1);
1401         if (wret)
1402                 ret = wret;
1403         if (path->slots[1] == 0) {
1404                 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1405                 if (wret)
1406                         ret = wret;
1407         }
1408         btrfs_block_release(root, path->nodes[0]);
1409         path->nodes[0] = right_buffer;
1410         path->slots[0] = 0;
1411         check_node(root, path, 1);
1412         check_leaf(root, path, 0);
1413         return ret;
1414 }
1415
1416 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1417                         struct btrfs_root *root,
1418                         struct btrfs_path *path,
1419                         u32 new_size)
1420 {
1421         int ret = 0;
1422         int slot;
1423         int slot_orig;
1424         struct btrfs_leaf *leaf;
1425         struct buffer_head *leaf_buf;
1426         u32 nritems;
1427         unsigned int data_end;
1428         unsigned int old_data_start;
1429         unsigned int old_size;
1430         unsigned int size_diff;
1431         int i;
1432
1433         slot_orig = path->slots[0];
1434         leaf_buf = path->nodes[0];
1435         leaf = btrfs_buffer_leaf(leaf_buf);
1436
1437         nritems = btrfs_header_nritems(&leaf->header);
1438         data_end = leaf_data_end(root, leaf);
1439
1440         slot = path->slots[0];
1441         old_data_start = btrfs_item_offset(leaf->items + slot);
1442         old_size = btrfs_item_size(leaf->items + slot);
1443         BUG_ON(old_size <= new_size);
1444         size_diff = old_size - new_size;
1445
1446         BUG_ON(slot < 0);
1447         BUG_ON(slot >= nritems);
1448
1449         /*
1450          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1451          */
1452         /* first correct the data pointers */
1453         for (i = slot; i < nritems; i++) {
1454                 u32 ioff = btrfs_item_offset(leaf->items + i);
1455                 btrfs_set_item_offset(leaf->items + i,
1456                                       ioff + size_diff);
1457         }
1458         /* shift the data */
1459         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1460                       data_end + size_diff, btrfs_leaf_data(leaf) +
1461                       data_end, old_data_start + new_size - data_end);
1462         btrfs_set_item_size(leaf->items + slot, new_size);
1463         btrfs_mark_buffer_dirty(leaf_buf);
1464
1465         ret = 0;
1466         if (btrfs_leaf_free_space(root, leaf) < 0)
1467                 BUG();
1468         check_leaf(root, path, 0);
1469         return ret;
1470 }
1471
1472 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1473                       *root, struct btrfs_path *path, u32 data_size)
1474 {
1475         int ret = 0;
1476         int slot;
1477         int slot_orig;
1478         struct btrfs_leaf *leaf;
1479         struct buffer_head *leaf_buf;
1480         u32 nritems;
1481         unsigned int data_end;
1482         unsigned int old_data;
1483         unsigned int old_size;
1484         int i;
1485
1486         slot_orig = path->slots[0];
1487         leaf_buf = path->nodes[0];
1488         leaf = btrfs_buffer_leaf(leaf_buf);
1489
1490         nritems = btrfs_header_nritems(&leaf->header);
1491         data_end = leaf_data_end(root, leaf);
1492
1493         if (btrfs_leaf_free_space(root, leaf) < data_size)
1494                 BUG();
1495         slot = path->slots[0];
1496         old_data = btrfs_item_end(leaf->items + slot);
1497
1498         BUG_ON(slot < 0);
1499         BUG_ON(slot >= nritems);
1500
1501         /*
1502          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1503          */
1504         /* first correct the data pointers */
1505         for (i = slot; i < nritems; i++) {
1506                 u32 ioff = btrfs_item_offset(leaf->items + i);
1507                 btrfs_set_item_offset(leaf->items + i,
1508                                       ioff - data_size);
1509         }
1510         /* shift the data */
1511         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1512                       data_end - data_size, btrfs_leaf_data(leaf) +
1513                       data_end, old_data - data_end);
1514         data_end = old_data;
1515         old_size = btrfs_item_size(leaf->items + slot);
1516         btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1517         btrfs_mark_buffer_dirty(leaf_buf);
1518
1519         ret = 0;
1520         if (btrfs_leaf_free_space(root, leaf) < 0)
1521                 BUG();
1522         check_leaf(root, path, 0);
1523         return ret;
1524 }
1525
1526 /*
1527  * Given a key and some data, insert an item into the tree.
1528  * This does all the path init required, making room in the tree if needed.
1529  */
1530 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1531                             *root, struct btrfs_path *path, struct btrfs_key
1532                             *cpu_key, u32 data_size)
1533 {
1534         int ret = 0;
1535         int slot;
1536         int slot_orig;
1537         struct btrfs_leaf *leaf;
1538         struct buffer_head *leaf_buf;
1539         u32 nritems;
1540         unsigned int data_end;
1541         struct btrfs_disk_key disk_key;
1542
1543         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1544
1545         /* create a root if there isn't one */
1546         if (!root->node)
1547                 BUG();
1548         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1549         if (ret == 0) {
1550                 return -EEXIST;
1551         }
1552         if (ret < 0)
1553                 goto out;
1554
1555         slot_orig = path->slots[0];
1556         leaf_buf = path->nodes[0];
1557         leaf = btrfs_buffer_leaf(leaf_buf);
1558
1559         nritems = btrfs_header_nritems(&leaf->header);
1560         data_end = leaf_data_end(root, leaf);
1561
1562         if (btrfs_leaf_free_space(root, leaf) <
1563             sizeof(struct btrfs_item) + data_size) {
1564                 BUG();
1565         }
1566         slot = path->slots[0];
1567         BUG_ON(slot < 0);
1568         if (slot != nritems) {
1569                 int i;
1570                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1571
1572                 /*
1573                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1574                  */
1575                 /* first correct the data pointers */
1576                 for (i = slot; i < nritems; i++) {
1577                         u32 ioff = btrfs_item_offset(leaf->items + i);
1578                         btrfs_set_item_offset(leaf->items + i,
1579                                               ioff - data_size);
1580                 }
1581
1582                 /* shift the items */
1583                 btrfs_memmove(root, leaf, leaf->items + slot + 1,
1584                               leaf->items + slot,
1585                               (nritems - slot) * sizeof(struct btrfs_item));
1586
1587                 /* shift the data */
1588                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1589                               data_end - data_size, btrfs_leaf_data(leaf) +
1590                               data_end, old_data - data_end);
1591                 data_end = old_data;
1592         }
1593         /* setup the item for the new data */
1594         btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
1595                      sizeof(struct btrfs_disk_key));
1596         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1597         btrfs_set_item_size(leaf->items + slot, data_size);
1598         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1599         btrfs_mark_buffer_dirty(leaf_buf);
1600
1601         ret = 0;
1602         if (slot == 0)
1603                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1604
1605         if (btrfs_leaf_free_space(root, leaf) < 0)
1606                 BUG();
1607         check_leaf(root, path, 0);
1608 out:
1609         return ret;
1610 }
1611
1612 /*
1613  * Given a key and some data, insert an item into the tree.
1614  * This does all the path init required, making room in the tree if needed.
1615  */
1616 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1617                       *root, struct btrfs_key *cpu_key, void *data, u32
1618                       data_size)
1619 {
1620         int ret = 0;
1621         struct btrfs_path *path;
1622         u8 *ptr;
1623
1624         path = btrfs_alloc_path();
1625         BUG_ON(!path);
1626         btrfs_init_path(path);
1627         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
1628         if (!ret) {
1629                 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1630                                      path->slots[0], u8);
1631                 btrfs_memcpy(root, path->nodes[0]->b_data,
1632                              ptr, data, data_size);
1633                 btrfs_mark_buffer_dirty(path->nodes[0]);
1634         }
1635         btrfs_release_path(root, path);
1636         btrfs_free_path(path);
1637         return ret;
1638 }
1639
1640 /*
1641  * delete the pointer from a given node.
1642  *
1643  * If the delete empties a node, the node is removed from the tree,
1644  * continuing all the way the root if required.  The root is converted into
1645  * a leaf if all the nodes are emptied.
1646  */
1647 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1648                    struct btrfs_path *path, int level, int slot)
1649 {
1650         struct btrfs_node *node;
1651         struct buffer_head *parent = path->nodes[level];
1652         u32 nritems;
1653         int ret = 0;
1654         int wret;
1655
1656         node = btrfs_buffer_node(parent);
1657         nritems = btrfs_header_nritems(&node->header);
1658         if (slot != nritems -1) {
1659                 btrfs_memmove(root, node, node->ptrs + slot,
1660                               node->ptrs + slot + 1,
1661                               sizeof(struct btrfs_key_ptr) *
1662                               (nritems - slot - 1));
1663         }
1664         nritems--;
1665         btrfs_set_header_nritems(&node->header, nritems);
1666         if (nritems == 0 && parent == root->node) {
1667                 struct btrfs_header *header = btrfs_buffer_header(root->node);
1668                 BUG_ON(btrfs_header_level(header) != 1);
1669                 /* just turn the root into a leaf and break */
1670                 btrfs_set_header_level(header, 0);
1671         } else if (slot == 0) {
1672                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1673                                       level + 1);
1674                 if (wret)
1675                         ret = wret;
1676         }
1677         btrfs_mark_buffer_dirty(parent);
1678         return ret;
1679 }
1680
1681 /*
1682  * delete the item at the leaf level in path.  If that empties
1683  * the leaf, remove it from the tree
1684  */
1685 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1686                    struct btrfs_path *path)
1687 {
1688         int slot;
1689         struct btrfs_leaf *leaf;
1690         struct buffer_head *leaf_buf;
1691         int doff;
1692         int dsize;
1693         int ret = 0;
1694         int wret;
1695         u32 nritems;
1696
1697         leaf_buf = path->nodes[0];
1698         leaf = btrfs_buffer_leaf(leaf_buf);
1699         slot = path->slots[0];
1700         doff = btrfs_item_offset(leaf->items + slot);
1701         dsize = btrfs_item_size(leaf->items + slot);
1702         nritems = btrfs_header_nritems(&leaf->header);
1703
1704         if (slot != nritems - 1) {
1705                 int i;
1706                 int data_end = leaf_data_end(root, leaf);
1707                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1708                               data_end + dsize,
1709                               btrfs_leaf_data(leaf) + data_end,
1710                               doff - data_end);
1711                 for (i = slot + 1; i < nritems; i++) {
1712                         u32 ioff = btrfs_item_offset(leaf->items + i);
1713                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1714                 }
1715                 btrfs_memmove(root, leaf, leaf->items + slot,
1716                               leaf->items + slot + 1,
1717                               sizeof(struct btrfs_item) *
1718                               (nritems - slot - 1));
1719         }
1720         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1721         nritems--;
1722         /* delete the leaf if we've emptied it */
1723         if (nritems == 0) {
1724                 if (leaf_buf == root->node) {
1725                         btrfs_set_header_level(&leaf->header, 0);
1726                 } else {
1727                         clean_tree_block(trans, root, leaf_buf);
1728                         wait_on_buffer(leaf_buf);
1729                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1730                         if (wret)
1731                                 ret = wret;
1732                         wret = btrfs_free_extent(trans, root,
1733                                                  bh_blocknr(leaf_buf), 1, 1);
1734                         if (wret)
1735                                 ret = wret;
1736                 }
1737         } else {
1738                 int used = leaf_space_used(leaf, 0, nritems);
1739                 if (slot == 0) {
1740                         wret = fixup_low_keys(trans, root, path,
1741                                               &leaf->items[0].key, 1);
1742                         if (wret)
1743                                 ret = wret;
1744                 }
1745
1746                 /* delete the leaf if it is mostly empty */
1747                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1748                         /* push_leaf_left fixes the path.
1749                          * make sure the path still points to our leaf
1750                          * for possible call to del_ptr below
1751                          */
1752                         slot = path->slots[1];
1753                         get_bh(leaf_buf);
1754                         wret = push_leaf_left(trans, root, path, 1);
1755                         if (wret < 0)
1756                                 ret = wret;
1757                         if (path->nodes[0] == leaf_buf &&
1758                             btrfs_header_nritems(&leaf->header)) {
1759                                 wret = push_leaf_right(trans, root, path, 1);
1760                                 if (wret < 0)
1761                                         ret = wret;
1762                         }
1763                         if (btrfs_header_nritems(&leaf->header) == 0) {
1764                                 u64 blocknr = bh_blocknr(leaf_buf);
1765                                 clean_tree_block(trans, root, leaf_buf);
1766                                 wait_on_buffer(leaf_buf);
1767                                 wret = del_ptr(trans, root, path, 1, slot);
1768                                 if (wret)
1769                                         ret = wret;
1770                                 btrfs_block_release(root, leaf_buf);
1771                                 wret = btrfs_free_extent(trans, root, blocknr,
1772                                                          1, 1);
1773                                 if (wret)
1774                                         ret = wret;
1775                         } else {
1776                                 btrfs_mark_buffer_dirty(leaf_buf);
1777                                 btrfs_block_release(root, leaf_buf);
1778                         }
1779                 } else {
1780                         btrfs_mark_buffer_dirty(leaf_buf);
1781                 }
1782         }
1783         return ret;
1784 }
1785
1786 /*
1787  * walk up the tree as far as required to find the next leaf.
1788  * returns 0 if it found something or 1 if there are no greater leaves.
1789  * returns < 0 on io errors.
1790  */
1791 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1792 {
1793         int slot;
1794         int level = 1;
1795         u64 blocknr;
1796         struct buffer_head *c;
1797         struct btrfs_node *c_node;
1798         struct buffer_head *next = NULL;
1799
1800         while(level < BTRFS_MAX_LEVEL) {
1801                 if (!path->nodes[level])
1802                         return 1;
1803                 slot = path->slots[level] + 1;
1804                 c = path->nodes[level];
1805                 c_node = btrfs_buffer_node(c);
1806                 if (slot >= btrfs_header_nritems(&c_node->header)) {
1807                         level++;
1808                         continue;
1809                 }
1810                 blocknr = btrfs_node_blockptr(c_node, slot);
1811                 if (next)
1812                         btrfs_block_release(root, next);
1813                 next = read_tree_block(root, blocknr);
1814                 break;
1815         }
1816         path->slots[level] = slot;
1817         while(1) {
1818                 level--;
1819                 c = path->nodes[level];
1820                 btrfs_block_release(root, c);
1821                 path->nodes[level] = next;
1822                 path->slots[level] = 0;
1823                 if (!level)
1824                         break;
1825                 next = read_tree_block(root,
1826                        btrfs_node_blockptr(btrfs_buffer_node(next), 0));
1827         }
1828         return 0;
1829 }