]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/ctree.c
Btrfs: Back port to 2.6.18-el kernels
[mv-sheeva.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 int btrfs_copy_root(struct btrfs_trans_handle *trans,
74                       struct btrfs_root *root,
75                       struct extent_buffer *buf,
76                       struct extent_buffer **cow_ret, u64 new_root_objectid)
77 {
78         struct extent_buffer *cow;
79         u32 nritems;
80         int ret = 0;
81         int level;
82         struct btrfs_key first_key;
83         struct btrfs_root new_root;
84
85         memcpy(&new_root, root, sizeof(new_root));
86         new_root.root_key.objectid = new_root_objectid;
87
88         WARN_ON(root->ref_cows && trans->transid !=
89                 root->fs_info->running_transaction->transid);
90         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
91
92         level = btrfs_header_level(buf);
93         nritems = btrfs_header_nritems(buf);
94         if (nritems) {
95                 if (level == 0)
96                         btrfs_item_key_to_cpu(buf, &first_key, 0);
97                 else
98                         btrfs_node_key_to_cpu(buf, &first_key, 0);
99         } else {
100                 first_key.objectid = 0;
101         }
102         cow = __btrfs_alloc_free_block(trans, &new_root, buf->len,
103                                        new_root_objectid,
104                                        trans->transid, first_key.objectid,
105                                        level, buf->start, 0);
106         if (IS_ERR(cow))
107                 return PTR_ERR(cow);
108
109         copy_extent_buffer(cow, buf, 0, 0, cow->len);
110         btrfs_set_header_bytenr(cow, cow->start);
111         btrfs_set_header_generation(cow, trans->transid);
112         btrfs_set_header_owner(cow, new_root_objectid);
113
114         WARN_ON(btrfs_header_generation(buf) > trans->transid);
115         ret = btrfs_inc_ref(trans, &new_root, buf);
116         if (ret)
117                 return ret;
118
119         btrfs_mark_buffer_dirty(cow);
120         *cow_ret = cow;
121         return 0;
122 }
123
124 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
125                              struct btrfs_root *root,
126                              struct extent_buffer *buf,
127                              struct extent_buffer *parent, int parent_slot,
128                              struct extent_buffer **cow_ret,
129                              u64 search_start, u64 empty_size)
130 {
131         u64 root_gen;
132         struct extent_buffer *cow;
133         u32 nritems;
134         int ret = 0;
135         int different_trans = 0;
136         int level;
137         struct btrfs_key first_key;
138
139         if (root->ref_cows) {
140                 root_gen = trans->transid;
141         } else {
142                 root_gen = 0;
143         }
144
145         WARN_ON(root->ref_cows && trans->transid !=
146                 root->fs_info->running_transaction->transid);
147         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
148
149         level = btrfs_header_level(buf);
150         nritems = btrfs_header_nritems(buf);
151         if (nritems) {
152                 if (level == 0)
153                         btrfs_item_key_to_cpu(buf, &first_key, 0);
154                 else
155                         btrfs_node_key_to_cpu(buf, &first_key, 0);
156         } else {
157                 first_key.objectid = 0;
158         }
159         cow = __btrfs_alloc_free_block(trans, root, buf->len,
160                                      root->root_key.objectid,
161                                      root_gen, first_key.objectid, level,
162                                      search_start, empty_size);
163         if (IS_ERR(cow))
164                 return PTR_ERR(cow);
165
166         copy_extent_buffer(cow, buf, 0, 0, cow->len);
167         btrfs_set_header_bytenr(cow, cow->start);
168         btrfs_set_header_generation(cow, trans->transid);
169         btrfs_set_header_owner(cow, root->root_key.objectid);
170
171         WARN_ON(btrfs_header_generation(buf) > trans->transid);
172         if (btrfs_header_generation(buf) != trans->transid) {
173                 different_trans = 1;
174                 ret = btrfs_inc_ref(trans, root, buf);
175                 if (ret)
176                         return ret;
177         } else {
178                 clean_tree_block(trans, root, buf);
179         }
180
181         if (buf == root->node) {
182                 root_gen = btrfs_header_generation(buf);
183                 root->node = cow;
184                 extent_buffer_get(cow);
185                 if (buf != root->commit_root) {
186                         btrfs_free_extent(trans, root, buf->start,
187                                           buf->len, root->root_key.objectid,
188                                           root_gen, 0, 0, 1);
189                 }
190                 free_extent_buffer(buf);
191         } else {
192                 root_gen = btrfs_header_generation(parent);
193                 btrfs_set_node_blockptr(parent, parent_slot,
194                                         cow->start);
195                 WARN_ON(trans->transid == 0);
196                 btrfs_set_node_ptr_generation(parent, parent_slot,
197                                               trans->transid);
198                 btrfs_mark_buffer_dirty(parent);
199                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
200                 btrfs_free_extent(trans, root, buf->start, buf->len,
201                                   btrfs_header_owner(parent), root_gen,
202                                   0, 0, 1);
203         }
204         free_extent_buffer(buf);
205         btrfs_mark_buffer_dirty(cow);
206         *cow_ret = cow;
207         return 0;
208 }
209
210 int btrfs_cow_block(struct btrfs_trans_handle *trans,
211                     struct btrfs_root *root, struct extent_buffer *buf,
212                     struct extent_buffer *parent, int parent_slot,
213                     struct extent_buffer **cow_ret)
214 {
215         u64 search_start;
216         int ret;
217         if (trans->transaction != root->fs_info->running_transaction) {
218                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
219                        root->fs_info->running_transaction->transid);
220                 WARN_ON(1);
221         }
222         if (trans->transid != root->fs_info->generation) {
223                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
224                        root->fs_info->generation);
225                 WARN_ON(1);
226         }
227         if (btrfs_header_generation(buf) == trans->transid) {
228                 *cow_ret = buf;
229                 return 0;
230         }
231
232         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
233         ret = __btrfs_cow_block(trans, root, buf, parent,
234                                  parent_slot, cow_ret, search_start, 0);
235         return ret;
236 }
237
238 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
239 {
240         if (blocknr < other && other - (blocknr + blocksize) < 32768)
241                 return 1;
242         if (blocknr > other && blocknr - (other + blocksize) < 32768)
243                 return 1;
244         return 0;
245 }
246
247 /*
248  * compare two keys in a memcmp fashion
249  */
250 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
251 {
252         struct btrfs_key k1;
253
254         btrfs_disk_key_to_cpu(&k1, disk);
255
256         if (k1.objectid > k2->objectid)
257                 return 1;
258         if (k1.objectid < k2->objectid)
259                 return -1;
260         if (k1.type > k2->type)
261                 return 1;
262         if (k1.type < k2->type)
263                 return -1;
264         if (k1.offset > k2->offset)
265                 return 1;
266         if (k1.offset < k2->offset)
267                 return -1;
268         return 0;
269 }
270
271
272 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
273                        struct btrfs_root *root, struct extent_buffer *parent,
274                        int start_slot, int cache_only, u64 *last_ret,
275                        struct btrfs_key *progress)
276 {
277         struct extent_buffer *cur;
278         struct extent_buffer *tmp;
279         u64 blocknr;
280         u64 search_start = *last_ret;
281         u64 last_block = 0;
282         u64 other;
283         u32 parent_nritems;
284         int end_slot;
285         int i;
286         int err = 0;
287         int parent_level;
288         int uptodate;
289         u32 blocksize;
290         int progress_passed = 0;
291         struct btrfs_disk_key disk_key;
292
293         parent_level = btrfs_header_level(parent);
294         if (cache_only && parent_level != 1)
295                 return 0;
296
297         if (trans->transaction != root->fs_info->running_transaction) {
298                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
299                        root->fs_info->running_transaction->transid);
300                 WARN_ON(1);
301         }
302         if (trans->transid != root->fs_info->generation) {
303                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
304                        root->fs_info->generation);
305                 WARN_ON(1);
306         }
307
308         parent_nritems = btrfs_header_nritems(parent);
309         blocksize = btrfs_level_size(root, parent_level - 1);
310         end_slot = parent_nritems;
311
312         if (parent_nritems == 1)
313                 return 0;
314
315         for (i = start_slot; i < end_slot; i++) {
316                 int close = 1;
317
318                 if (!parent->map_token) {
319                         map_extent_buffer(parent,
320                                         btrfs_node_key_ptr_offset(i),
321                                         sizeof(struct btrfs_key_ptr),
322                                         &parent->map_token, &parent->kaddr,
323                                         &parent->map_start, &parent->map_len,
324                                         KM_USER1);
325                 }
326                 btrfs_node_key(parent, &disk_key, i);
327                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
328                         continue;
329
330                 progress_passed = 1;
331                 blocknr = btrfs_node_blockptr(parent, i);
332                 if (last_block == 0)
333                         last_block = blocknr;
334
335                 if (i > 0) {
336                         other = btrfs_node_blockptr(parent, i - 1);
337                         close = close_blocks(blocknr, other, blocksize);
338                 }
339                 if (close && i < end_slot - 2) {
340                         other = btrfs_node_blockptr(parent, i + 1);
341                         close = close_blocks(blocknr, other, blocksize);
342                 }
343                 if (close) {
344                         last_block = blocknr;
345                         continue;
346                 }
347                 if (parent->map_token) {
348                         unmap_extent_buffer(parent, parent->map_token,
349                                             KM_USER1);
350                         parent->map_token = NULL;
351                 }
352
353                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
354                 if (cur)
355                         uptodate = btrfs_buffer_uptodate(cur);
356                 else
357                         uptodate = 0;
358                 if (!cur || !uptodate) {
359                         if (cache_only) {
360                                 free_extent_buffer(cur);
361                                 continue;
362                         }
363                         if (!cur) {
364                                 cur = read_tree_block(root, blocknr,
365                                                          blocksize);
366                         } else if (!uptodate) {
367                                 btrfs_read_buffer(cur);
368                         }
369                 }
370                 if (search_start == 0)
371                         search_start = last_block;
372
373                 err = __btrfs_cow_block(trans, root, cur, parent, i,
374                                         &tmp, search_start,
375                                         min(16 * blocksize,
376                                             (end_slot - i) * blocksize));
377                 if (err) {
378                         free_extent_buffer(cur);
379                         break;
380                 }
381                 search_start = tmp->start;
382                 last_block = tmp->start;
383                 *last_ret = search_start;
384                 if (parent_level == 1)
385                         btrfs_clear_buffer_defrag(tmp);
386                 free_extent_buffer(tmp);
387         }
388         if (parent->map_token) {
389                 unmap_extent_buffer(parent, parent->map_token,
390                                     KM_USER1);
391                 parent->map_token = NULL;
392         }
393         return err;
394 }
395
396 /*
397  * The leaf data grows from end-to-front in the node.
398  * this returns the address of the start of the last item,
399  * which is the stop of the leaf data stack
400  */
401 static inline unsigned int leaf_data_end(struct btrfs_root *root,
402                                          struct extent_buffer *leaf)
403 {
404         u32 nr = btrfs_header_nritems(leaf);
405         if (nr == 0)
406                 return BTRFS_LEAF_DATA_SIZE(root);
407         return btrfs_item_offset_nr(leaf, nr - 1);
408 }
409
410 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
411                       int level)
412 {
413         struct extent_buffer *parent = NULL;
414         struct extent_buffer *node = path->nodes[level];
415         struct btrfs_disk_key parent_key;
416         struct btrfs_disk_key node_key;
417         int parent_slot;
418         int slot;
419         struct btrfs_key cpukey;
420         u32 nritems = btrfs_header_nritems(node);
421
422         if (path->nodes[level + 1])
423                 parent = path->nodes[level + 1];
424
425         slot = path->slots[level];
426         BUG_ON(nritems == 0);
427         if (parent) {
428                 parent_slot = path->slots[level + 1];
429                 btrfs_node_key(parent, &parent_key, parent_slot);
430                 btrfs_node_key(node, &node_key, 0);
431                 BUG_ON(memcmp(&parent_key, &node_key,
432                               sizeof(struct btrfs_disk_key)));
433                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
434                        btrfs_header_bytenr(node));
435         }
436         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
437         if (slot != 0) {
438                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
439                 btrfs_node_key(node, &node_key, slot);
440                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
441         }
442         if (slot < nritems - 1) {
443                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
444                 btrfs_node_key(node, &node_key, slot);
445                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
446         }
447         return 0;
448 }
449
450 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
451                       int level)
452 {
453         struct extent_buffer *leaf = path->nodes[level];
454         struct extent_buffer *parent = NULL;
455         int parent_slot;
456         struct btrfs_key cpukey;
457         struct btrfs_disk_key parent_key;
458         struct btrfs_disk_key leaf_key;
459         int slot = path->slots[0];
460
461         u32 nritems = btrfs_header_nritems(leaf);
462
463         if (path->nodes[level + 1])
464                 parent = path->nodes[level + 1];
465
466         if (nritems == 0)
467                 return 0;
468
469         if (parent) {
470                 parent_slot = path->slots[level + 1];
471                 btrfs_node_key(parent, &parent_key, parent_slot);
472                 btrfs_item_key(leaf, &leaf_key, 0);
473
474                 BUG_ON(memcmp(&parent_key, &leaf_key,
475                        sizeof(struct btrfs_disk_key)));
476                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
477                        btrfs_header_bytenr(leaf));
478         }
479 #if 0
480         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
481                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
482                 btrfs_item_key(leaf, &leaf_key, i);
483                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
484                         btrfs_print_leaf(root, leaf);
485                         printk("slot %d offset bad key\n", i);
486                         BUG_ON(1);
487                 }
488                 if (btrfs_item_offset_nr(leaf, i) !=
489                         btrfs_item_end_nr(leaf, i + 1)) {
490                         btrfs_print_leaf(root, leaf);
491                         printk("slot %d offset bad\n", i);
492                         BUG_ON(1);
493                 }
494                 if (i == 0) {
495                         if (btrfs_item_offset_nr(leaf, i) +
496                                btrfs_item_size_nr(leaf, i) !=
497                                BTRFS_LEAF_DATA_SIZE(root)) {
498                                 btrfs_print_leaf(root, leaf);
499                                 printk("slot %d first offset bad\n", i);
500                                 BUG_ON(1);
501                         }
502                 }
503         }
504         if (nritems > 0) {
505                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
506                                 btrfs_print_leaf(root, leaf);
507                                 printk("slot %d bad size \n", nritems - 1);
508                                 BUG_ON(1);
509                 }
510         }
511 #endif
512         if (slot != 0 && slot < nritems - 1) {
513                 btrfs_item_key(leaf, &leaf_key, slot);
514                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
515                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
516                         btrfs_print_leaf(root, leaf);
517                         printk("slot %d offset bad key\n", slot);
518                         BUG_ON(1);
519                 }
520                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
521                        btrfs_item_end_nr(leaf, slot)) {
522                         btrfs_print_leaf(root, leaf);
523                         printk("slot %d offset bad\n", slot);
524                         BUG_ON(1);
525                 }
526         }
527         if (slot < nritems - 1) {
528                 btrfs_item_key(leaf, &leaf_key, slot);
529                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
530                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
531                 if (btrfs_item_offset_nr(leaf, slot) !=
532                         btrfs_item_end_nr(leaf, slot + 1)) {
533                         btrfs_print_leaf(root, leaf);
534                         printk("slot %d offset bad\n", slot);
535                         BUG_ON(1);
536                 }
537         }
538         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
539                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
540         return 0;
541 }
542
543 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
544                         int level)
545 {
546         return 0;
547 #if 0
548         struct extent_buffer *buf = path->nodes[level];
549
550         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
551                                  (unsigned long)btrfs_header_fsid(buf),
552                                  BTRFS_FSID_SIZE)) {
553                 printk("warning bad block %Lu\n", buf->start);
554                 return 1;
555         }
556 #endif
557         if (level == 0)
558                 return check_leaf(root, path, level);
559         return check_node(root, path, level);
560 }
561
562 /*
563  * search for key in the extent_buffer.  The items start at offset p,
564  * and they are item_size apart.  There are 'max' items in p.
565  *
566  * the slot in the array is returned via slot, and it points to
567  * the place where you would insert key if it is not found in
568  * the array.
569  *
570  * slot may point to max if the key is bigger than all of the keys
571  */
572 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
573                               int item_size, struct btrfs_key *key,
574                               int max, int *slot)
575 {
576         int low = 0;
577         int high = max;
578         int mid;
579         int ret;
580         struct btrfs_disk_key *tmp = NULL;
581         struct btrfs_disk_key unaligned;
582         unsigned long offset;
583         char *map_token = NULL;
584         char *kaddr = NULL;
585         unsigned long map_start = 0;
586         unsigned long map_len = 0;
587         int err;
588
589         while(low < high) {
590                 mid = (low + high) / 2;
591                 offset = p + mid * item_size;
592
593                 if (!map_token || offset < map_start ||
594                     (offset + sizeof(struct btrfs_disk_key)) >
595                     map_start + map_len) {
596                         if (map_token) {
597                                 unmap_extent_buffer(eb, map_token, KM_USER0);
598                                 map_token = NULL;
599                         }
600                         err = map_extent_buffer(eb, offset,
601                                                 sizeof(struct btrfs_disk_key),
602                                                 &map_token, &kaddr,
603                                                 &map_start, &map_len, KM_USER0);
604
605                         if (!err) {
606                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
607                                                         map_start);
608                         } else {
609                                 read_extent_buffer(eb, &unaligned,
610                                                    offset, sizeof(unaligned));
611                                 tmp = &unaligned;
612                         }
613
614                 } else {
615                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
616                                                         map_start);
617                 }
618                 ret = comp_keys(tmp, key);
619
620                 if (ret < 0)
621                         low = mid + 1;
622                 else if (ret > 0)
623                         high = mid;
624                 else {
625                         *slot = mid;
626                         if (map_token)
627                                 unmap_extent_buffer(eb, map_token, KM_USER0);
628                         return 0;
629                 }
630         }
631         *slot = low;
632         if (map_token)
633                 unmap_extent_buffer(eb, map_token, KM_USER0);
634         return 1;
635 }
636
637 /*
638  * simple bin_search frontend that does the right thing for
639  * leaves vs nodes
640  */
641 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
642                       int level, int *slot)
643 {
644         if (level == 0) {
645                 return generic_bin_search(eb,
646                                           offsetof(struct btrfs_leaf, items),
647                                           sizeof(struct btrfs_item),
648                                           key, btrfs_header_nritems(eb),
649                                           slot);
650         } else {
651                 return generic_bin_search(eb,
652                                           offsetof(struct btrfs_node, ptrs),
653                                           sizeof(struct btrfs_key_ptr),
654                                           key, btrfs_header_nritems(eb),
655                                           slot);
656         }
657         return -1;
658 }
659
660 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
661                                    struct extent_buffer *parent, int slot)
662 {
663         if (slot < 0)
664                 return NULL;
665         if (slot >= btrfs_header_nritems(parent))
666                 return NULL;
667         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
668                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
669 }
670
671 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
672                          *root, struct btrfs_path *path, int level)
673 {
674         struct extent_buffer *right = NULL;
675         struct extent_buffer *mid;
676         struct extent_buffer *left = NULL;
677         struct extent_buffer *parent = NULL;
678         int ret = 0;
679         int wret;
680         int pslot;
681         int orig_slot = path->slots[level];
682         int err_on_enospc = 0;
683         u64 orig_ptr;
684
685         if (level == 0)
686                 return 0;
687
688         mid = path->nodes[level];
689         WARN_ON(btrfs_header_generation(mid) != trans->transid);
690
691         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
692
693         if (level < BTRFS_MAX_LEVEL - 1)
694                 parent = path->nodes[level + 1];
695         pslot = path->slots[level + 1];
696
697         /*
698          * deal with the case where there is only one pointer in the root
699          * by promoting the node below to a root
700          */
701         if (!parent) {
702                 struct extent_buffer *child;
703
704                 if (btrfs_header_nritems(mid) != 1)
705                         return 0;
706
707                 /* promote the child to a root */
708                 child = read_node_slot(root, mid, 0);
709                 BUG_ON(!child);
710                 root->node = child;
711                 path->nodes[level] = NULL;
712                 clean_tree_block(trans, root, mid);
713                 wait_on_tree_block_writeback(root, mid);
714                 /* once for the path */
715                 free_extent_buffer(mid);
716                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
717                                         root->root_key.objectid,
718                                         btrfs_header_generation(mid), 0, 0, 1);
719                 /* once for the root ptr */
720                 free_extent_buffer(mid);
721                 return ret;
722         }
723         if (btrfs_header_nritems(mid) >
724             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
725                 return 0;
726
727         if (btrfs_header_nritems(mid) < 2)
728                 err_on_enospc = 1;
729
730         left = read_node_slot(root, parent, pslot - 1);
731         if (left) {
732                 wret = btrfs_cow_block(trans, root, left,
733                                        parent, pslot - 1, &left);
734                 if (wret) {
735                         ret = wret;
736                         goto enospc;
737                 }
738         }
739         right = read_node_slot(root, parent, pslot + 1);
740         if (right) {
741                 wret = btrfs_cow_block(trans, root, right,
742                                        parent, pslot + 1, &right);
743                 if (wret) {
744                         ret = wret;
745                         goto enospc;
746                 }
747         }
748
749         /* first, try to make some room in the middle buffer */
750         if (left) {
751                 orig_slot += btrfs_header_nritems(left);
752                 wret = push_node_left(trans, root, left, mid);
753                 if (wret < 0)
754                         ret = wret;
755                 if (btrfs_header_nritems(mid) < 2)
756                         err_on_enospc = 1;
757         }
758
759         /*
760          * then try to empty the right most buffer into the middle
761          */
762         if (right) {
763                 wret = push_node_left(trans, root, mid, right);
764                 if (wret < 0 && wret != -ENOSPC)
765                         ret = wret;
766                 if (btrfs_header_nritems(right) == 0) {
767                         u64 bytenr = right->start;
768                         u64 generation = btrfs_header_generation(parent);
769                         u32 blocksize = right->len;
770
771                         clean_tree_block(trans, root, right);
772                         wait_on_tree_block_writeback(root, right);
773                         free_extent_buffer(right);
774                         right = NULL;
775                         wret = del_ptr(trans, root, path, level + 1, pslot +
776                                        1);
777                         if (wret)
778                                 ret = wret;
779                         wret = btrfs_free_extent(trans, root, bytenr,
780                                                  blocksize,
781                                                  btrfs_header_owner(parent),
782                                                  generation, 0, 0, 1);
783                         if (wret)
784                                 ret = wret;
785                 } else {
786                         struct btrfs_disk_key right_key;
787                         btrfs_node_key(right, &right_key, 0);
788                         btrfs_set_node_key(parent, &right_key, pslot + 1);
789                         btrfs_mark_buffer_dirty(parent);
790                 }
791         }
792         if (btrfs_header_nritems(mid) == 1) {
793                 /*
794                  * we're not allowed to leave a node with one item in the
795                  * tree during a delete.  A deletion from lower in the tree
796                  * could try to delete the only pointer in this node.
797                  * So, pull some keys from the left.
798                  * There has to be a left pointer at this point because
799                  * otherwise we would have pulled some pointers from the
800                  * right
801                  */
802                 BUG_ON(!left);
803                 wret = balance_node_right(trans, root, mid, left);
804                 if (wret < 0) {
805                         ret = wret;
806                         goto enospc;
807                 }
808                 BUG_ON(wret == 1);
809         }
810         if (btrfs_header_nritems(mid) == 0) {
811                 /* we've managed to empty the middle node, drop it */
812                 u64 root_gen = btrfs_header_generation(parent);
813                 u64 bytenr = mid->start;
814                 u32 blocksize = mid->len;
815                 clean_tree_block(trans, root, mid);
816                 wait_on_tree_block_writeback(root, mid);
817                 free_extent_buffer(mid);
818                 mid = NULL;
819                 wret = del_ptr(trans, root, path, level + 1, pslot);
820                 if (wret)
821                         ret = wret;
822                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
823                                          btrfs_header_owner(parent),
824                                          root_gen, 0, 0, 1);
825                 if (wret)
826                         ret = wret;
827         } else {
828                 /* update the parent key to reflect our changes */
829                 struct btrfs_disk_key mid_key;
830                 btrfs_node_key(mid, &mid_key, 0);
831                 btrfs_set_node_key(parent, &mid_key, pslot);
832                 btrfs_mark_buffer_dirty(parent);
833         }
834
835         /* update the path */
836         if (left) {
837                 if (btrfs_header_nritems(left) > orig_slot) {
838                         extent_buffer_get(left);
839                         path->nodes[level] = left;
840                         path->slots[level + 1] -= 1;
841                         path->slots[level] = orig_slot;
842                         if (mid)
843                                 free_extent_buffer(mid);
844                 } else {
845                         orig_slot -= btrfs_header_nritems(left);
846                         path->slots[level] = orig_slot;
847                 }
848         }
849         /* double check we haven't messed things up */
850         check_block(root, path, level);
851         if (orig_ptr !=
852             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
853                 BUG();
854 enospc:
855         if (right)
856                 free_extent_buffer(right);
857         if (left)
858                 free_extent_buffer(left);
859         return ret;
860 }
861
862 /* returns zero if the push worked, non-zero otherwise */
863 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
864                                 struct btrfs_root *root,
865                                 struct btrfs_path *path, int level)
866 {
867         struct extent_buffer *right = NULL;
868         struct extent_buffer *mid;
869         struct extent_buffer *left = NULL;
870         struct extent_buffer *parent = NULL;
871         int ret = 0;
872         int wret;
873         int pslot;
874         int orig_slot = path->slots[level];
875         u64 orig_ptr;
876
877         if (level == 0)
878                 return 1;
879
880         mid = path->nodes[level];
881         WARN_ON(btrfs_header_generation(mid) != trans->transid);
882         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
883
884         if (level < BTRFS_MAX_LEVEL - 1)
885                 parent = path->nodes[level + 1];
886         pslot = path->slots[level + 1];
887
888         if (!parent)
889                 return 1;
890
891         left = read_node_slot(root, parent, pslot - 1);
892
893         /* first, try to make some room in the middle buffer */
894         if (left) {
895                 u32 left_nr;
896                 left_nr = btrfs_header_nritems(left);
897                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
898                         wret = 1;
899                 } else {
900                         ret = btrfs_cow_block(trans, root, left, parent,
901                                               pslot - 1, &left);
902                         if (ret)
903                                 wret = 1;
904                         else {
905                                 wret = push_node_left(trans, root,
906                                                       left, mid);
907                         }
908                 }
909                 if (wret < 0)
910                         ret = wret;
911                 if (wret == 0) {
912                         struct btrfs_disk_key disk_key;
913                         orig_slot += left_nr;
914                         btrfs_node_key(mid, &disk_key, 0);
915                         btrfs_set_node_key(parent, &disk_key, pslot);
916                         btrfs_mark_buffer_dirty(parent);
917                         if (btrfs_header_nritems(left) > orig_slot) {
918                                 path->nodes[level] = left;
919                                 path->slots[level + 1] -= 1;
920                                 path->slots[level] = orig_slot;
921                                 free_extent_buffer(mid);
922                         } else {
923                                 orig_slot -=
924                                         btrfs_header_nritems(left);
925                                 path->slots[level] = orig_slot;
926                                 free_extent_buffer(left);
927                         }
928                         return 0;
929                 }
930                 free_extent_buffer(left);
931         }
932         right= read_node_slot(root, parent, pslot + 1);
933
934         /*
935          * then try to empty the right most buffer into the middle
936          */
937         if (right) {
938                 u32 right_nr;
939                 right_nr = btrfs_header_nritems(right);
940                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
941                         wret = 1;
942                 } else {
943                         ret = btrfs_cow_block(trans, root, right,
944                                               parent, pslot + 1,
945                                               &right);
946                         if (ret)
947                                 wret = 1;
948                         else {
949                                 wret = balance_node_right(trans, root,
950                                                           right, mid);
951                         }
952                 }
953                 if (wret < 0)
954                         ret = wret;
955                 if (wret == 0) {
956                         struct btrfs_disk_key disk_key;
957
958                         btrfs_node_key(right, &disk_key, 0);
959                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
960                         btrfs_mark_buffer_dirty(parent);
961
962                         if (btrfs_header_nritems(mid) <= orig_slot) {
963                                 path->nodes[level] = right;
964                                 path->slots[level + 1] += 1;
965                                 path->slots[level] = orig_slot -
966                                         btrfs_header_nritems(mid);
967                                 free_extent_buffer(mid);
968                         } else {
969                                 free_extent_buffer(right);
970                         }
971                         return 0;
972                 }
973                 free_extent_buffer(right);
974         }
975         return 1;
976 }
977
978 /*
979  * readahead one full node of leaves
980  */
981 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
982                              int level, int slot)
983 {
984         struct extent_buffer *node;
985         u32 nritems;
986         u64 search;
987         u64 lowest_read;
988         u64 highest_read;
989         u64 nread = 0;
990         int direction = path->reada;
991         struct extent_buffer *eb;
992         u32 nr;
993         u32 blocksize;
994         u32 nscan = 0;
995
996         if (level != 1)
997                 return;
998
999         if (!path->nodes[level])
1000                 return;
1001
1002         node = path->nodes[level];
1003         search = btrfs_node_blockptr(node, slot);
1004         blocksize = btrfs_level_size(root, level - 1);
1005         eb = btrfs_find_tree_block(root, search, blocksize);
1006         if (eb) {
1007                 free_extent_buffer(eb);
1008                 return;
1009         }
1010
1011         highest_read = search;
1012         lowest_read = search;
1013
1014         nritems = btrfs_header_nritems(node);
1015         nr = slot;
1016         while(1) {
1017                 if (direction < 0) {
1018                         if (nr == 0)
1019                                 break;
1020                         nr--;
1021                 } else if (direction > 0) {
1022                         nr++;
1023                         if (nr >= nritems)
1024                                 break;
1025                 }
1026                 search = btrfs_node_blockptr(node, nr);
1027                 if ((search >= lowest_read && search <= highest_read) ||
1028                     (search < lowest_read && lowest_read - search <= 32768) ||
1029                     (search > highest_read && search - highest_read <= 32768)) {
1030                         readahead_tree_block(root, search, blocksize);
1031                         nread += blocksize;
1032                 }
1033                 nscan++;
1034                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1035                         break;
1036                 if(nread > (1024 * 1024) || nscan > 128)
1037                         break;
1038
1039                 if (search < lowest_read)
1040                         lowest_read = search;
1041                 if (search > highest_read)
1042                         highest_read = search;
1043         }
1044 }
1045 /*
1046  * look for key in the tree.  path is filled in with nodes along the way
1047  * if key is found, we return zero and you can find the item in the leaf
1048  * level of the path (level 0)
1049  *
1050  * If the key isn't found, the path points to the slot where it should
1051  * be inserted, and 1 is returned.  If there are other errors during the
1052  * search a negative error number is returned.
1053  *
1054  * if ins_len > 0, nodes and leaves will be split as we walk down the
1055  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1056  * possible)
1057  */
1058 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1059                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1060                       ins_len, int cow)
1061 {
1062         struct extent_buffer *b;
1063         u64 bytenr;
1064         u64 ptr_gen;
1065         int slot;
1066         int ret;
1067         int level;
1068         int should_reada = p->reada;
1069         u8 lowest_level = 0;
1070
1071         lowest_level = p->lowest_level;
1072         WARN_ON(lowest_level && ins_len);
1073         WARN_ON(p->nodes[0] != NULL);
1074         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1075 again:
1076         b = root->node;
1077         extent_buffer_get(b);
1078         while (b) {
1079                 level = btrfs_header_level(b);
1080                 if (cow) {
1081                         int wret;
1082                         wret = btrfs_cow_block(trans, root, b,
1083                                                p->nodes[level + 1],
1084                                                p->slots[level + 1],
1085                                                &b);
1086                         if (wret) {
1087                                 free_extent_buffer(b);
1088                                 return wret;
1089                         }
1090                 }
1091                 BUG_ON(!cow && ins_len);
1092                 if (level != btrfs_header_level(b))
1093                         WARN_ON(1);
1094                 level = btrfs_header_level(b);
1095                 p->nodes[level] = b;
1096                 ret = check_block(root, p, level);
1097                 if (ret)
1098                         return -1;
1099                 ret = bin_search(b, key, level, &slot);
1100                 if (level != 0) {
1101                         if (ret && slot > 0)
1102                                 slot -= 1;
1103                         p->slots[level] = slot;
1104                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1105                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1106                                 int sret = split_node(trans, root, p, level);
1107                                 BUG_ON(sret > 0);
1108                                 if (sret)
1109                                         return sret;
1110                                 b = p->nodes[level];
1111                                 slot = p->slots[level];
1112                         } else if (ins_len < 0) {
1113                                 int sret = balance_level(trans, root, p,
1114                                                          level);
1115                                 if (sret)
1116                                         return sret;
1117                                 b = p->nodes[level];
1118                                 if (!b) {
1119                                         btrfs_release_path(NULL, p);
1120                                         goto again;
1121                                 }
1122                                 slot = p->slots[level];
1123                                 BUG_ON(btrfs_header_nritems(b) == 1);
1124                         }
1125                         /* this is only true while dropping a snapshot */
1126                         if (level == lowest_level)
1127                                 break;
1128                         bytenr = btrfs_node_blockptr(b, slot);
1129                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1130                         if (should_reada)
1131                                 reada_for_search(root, p, level, slot);
1132                         b = read_tree_block(root, bytenr,
1133                                             btrfs_level_size(root, level - 1));
1134                         if (ptr_gen != btrfs_header_generation(b)) {
1135                                 printk("block %llu bad gen wanted %llu "
1136                                        "found %llu\n",
1137                                 (unsigned long long)b->start,
1138                                 (unsigned long long)ptr_gen,
1139                                 (unsigned long long)btrfs_header_generation(b));
1140                         }
1141                 } else {
1142                         p->slots[level] = slot;
1143                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1144                             sizeof(struct btrfs_item) + ins_len) {
1145                                 int sret = split_leaf(trans, root, key,
1146                                                       p, ins_len, ret == 0);
1147                                 BUG_ON(sret > 0);
1148                                 if (sret)
1149                                         return sret;
1150                         }
1151                         return ret;
1152                 }
1153         }
1154         return 1;
1155 }
1156
1157 /*
1158  * adjust the pointers going up the tree, starting at level
1159  * making sure the right key of each node is points to 'key'.
1160  * This is used after shifting pointers to the left, so it stops
1161  * fixing up pointers when a given leaf/node is not in slot 0 of the
1162  * higher levels
1163  *
1164  * If this fails to write a tree block, it returns -1, but continues
1165  * fixing up the blocks in ram so the tree is consistent.
1166  */
1167 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1168                           struct btrfs_root *root, struct btrfs_path *path,
1169                           struct btrfs_disk_key *key, int level)
1170 {
1171         int i;
1172         int ret = 0;
1173         struct extent_buffer *t;
1174
1175         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1176                 int tslot = path->slots[i];
1177                 if (!path->nodes[i])
1178                         break;
1179                 t = path->nodes[i];
1180                 btrfs_set_node_key(t, key, tslot);
1181                 btrfs_mark_buffer_dirty(path->nodes[i]);
1182                 if (tslot != 0)
1183                         break;
1184         }
1185         return ret;
1186 }
1187
1188 /*
1189  * try to push data from one node into the next node left in the
1190  * tree.
1191  *
1192  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1193  * error, and > 0 if there was no room in the left hand block.
1194  */
1195 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1196                           *root, struct extent_buffer *dst,
1197                           struct extent_buffer *src)
1198 {
1199         int push_items = 0;
1200         int src_nritems;
1201         int dst_nritems;
1202         int ret = 0;
1203
1204         src_nritems = btrfs_header_nritems(src);
1205         dst_nritems = btrfs_header_nritems(dst);
1206         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1207         WARN_ON(btrfs_header_generation(src) != trans->transid);
1208         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1209
1210         if (push_items <= 0) {
1211                 return 1;
1212         }
1213
1214         if (src_nritems < push_items)
1215                 push_items = src_nritems;
1216
1217         copy_extent_buffer(dst, src,
1218                            btrfs_node_key_ptr_offset(dst_nritems),
1219                            btrfs_node_key_ptr_offset(0),
1220                            push_items * sizeof(struct btrfs_key_ptr));
1221
1222         if (push_items < src_nritems) {
1223                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1224                                       btrfs_node_key_ptr_offset(push_items),
1225                                       (src_nritems - push_items) *
1226                                       sizeof(struct btrfs_key_ptr));
1227         }
1228         btrfs_set_header_nritems(src, src_nritems - push_items);
1229         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1230         btrfs_mark_buffer_dirty(src);
1231         btrfs_mark_buffer_dirty(dst);
1232         return ret;
1233 }
1234
1235 /*
1236  * try to push data from one node into the next node right in the
1237  * tree.
1238  *
1239  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1240  * error, and > 0 if there was no room in the right hand block.
1241  *
1242  * this will  only push up to 1/2 the contents of the left node over
1243  */
1244 static int balance_node_right(struct btrfs_trans_handle *trans,
1245                               struct btrfs_root *root,
1246                               struct extent_buffer *dst,
1247                               struct extent_buffer *src)
1248 {
1249         int push_items = 0;
1250         int max_push;
1251         int src_nritems;
1252         int dst_nritems;
1253         int ret = 0;
1254
1255         WARN_ON(btrfs_header_generation(src) != trans->transid);
1256         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1257
1258         src_nritems = btrfs_header_nritems(src);
1259         dst_nritems = btrfs_header_nritems(dst);
1260         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1261         if (push_items <= 0)
1262                 return 1;
1263
1264         max_push = src_nritems / 2 + 1;
1265         /* don't try to empty the node */
1266         if (max_push >= src_nritems)
1267                 return 1;
1268
1269         if (max_push < push_items)
1270                 push_items = max_push;
1271
1272         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1273                                       btrfs_node_key_ptr_offset(0),
1274                                       (dst_nritems) *
1275                                       sizeof(struct btrfs_key_ptr));
1276
1277         copy_extent_buffer(dst, src,
1278                            btrfs_node_key_ptr_offset(0),
1279                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1280                            push_items * sizeof(struct btrfs_key_ptr));
1281
1282         btrfs_set_header_nritems(src, src_nritems - push_items);
1283         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1284
1285         btrfs_mark_buffer_dirty(src);
1286         btrfs_mark_buffer_dirty(dst);
1287         return ret;
1288 }
1289
1290 /*
1291  * helper function to insert a new root level in the tree.
1292  * A new node is allocated, and a single item is inserted to
1293  * point to the existing root
1294  *
1295  * returns zero on success or < 0 on failure.
1296  */
1297 static int insert_new_root(struct btrfs_trans_handle *trans,
1298                            struct btrfs_root *root,
1299                            struct btrfs_path *path, int level)
1300 {
1301         u64 root_gen;
1302         u64 lower_gen;
1303         struct extent_buffer *lower;
1304         struct extent_buffer *c;
1305         struct btrfs_disk_key lower_key;
1306
1307         BUG_ON(path->nodes[level]);
1308         BUG_ON(path->nodes[level-1] != root->node);
1309
1310         if (root->ref_cows)
1311                 root_gen = trans->transid;
1312         else
1313                 root_gen = 0;
1314
1315         lower = path->nodes[level-1];
1316         if (level == 1)
1317                 btrfs_item_key(lower, &lower_key, 0);
1318         else
1319                 btrfs_node_key(lower, &lower_key, 0);
1320
1321         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1322                                    root->root_key.objectid,
1323                                    root_gen, lower_key.objectid, level,
1324                                    root->node->start, 0);
1325         if (IS_ERR(c))
1326                 return PTR_ERR(c);
1327         memset_extent_buffer(c, 0, 0, root->nodesize);
1328         btrfs_set_header_nritems(c, 1);
1329         btrfs_set_header_level(c, level);
1330         btrfs_set_header_bytenr(c, c->start);
1331         btrfs_set_header_generation(c, trans->transid);
1332         btrfs_set_header_owner(c, root->root_key.objectid);
1333
1334         write_extent_buffer(c, root->fs_info->fsid,
1335                             (unsigned long)btrfs_header_fsid(c),
1336                             BTRFS_FSID_SIZE);
1337         btrfs_set_node_key(c, &lower_key, 0);
1338         btrfs_set_node_blockptr(c, 0, lower->start);
1339         lower_gen = btrfs_header_generation(lower);
1340         WARN_ON(lower_gen == 0);
1341
1342         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1343
1344         btrfs_mark_buffer_dirty(c);
1345
1346         /* the super has an extra ref to root->node */
1347         free_extent_buffer(root->node);
1348         root->node = c;
1349         extent_buffer_get(c);
1350         path->nodes[level] = c;
1351         path->slots[level] = 0;
1352
1353         if (root->ref_cows && lower_gen != trans->transid) {
1354                 struct btrfs_path *back_path = btrfs_alloc_path();
1355                 int ret;
1356                 ret = btrfs_insert_extent_backref(trans,
1357                                                   root->fs_info->extent_root,
1358                                                   path, lower->start,
1359                                                   root->root_key.objectid,
1360                                                   trans->transid, 0, 0);
1361                 BUG_ON(ret);
1362                 btrfs_free_path(back_path);
1363         }
1364         return 0;
1365 }
1366
1367 /*
1368  * worker function to insert a single pointer in a node.
1369  * the node should have enough room for the pointer already
1370  *
1371  * slot and level indicate where you want the key to go, and
1372  * blocknr is the block the key points to.
1373  *
1374  * returns zero on success and < 0 on any error
1375  */
1376 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1377                       *root, struct btrfs_path *path, struct btrfs_disk_key
1378                       *key, u64 bytenr, int slot, int level)
1379 {
1380         struct extent_buffer *lower;
1381         int nritems;
1382
1383         BUG_ON(!path->nodes[level]);
1384         lower = path->nodes[level];
1385         nritems = btrfs_header_nritems(lower);
1386         if (slot > nritems)
1387                 BUG();
1388         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1389                 BUG();
1390         if (slot != nritems) {
1391                 memmove_extent_buffer(lower,
1392                               btrfs_node_key_ptr_offset(slot + 1),
1393                               btrfs_node_key_ptr_offset(slot),
1394                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1395         }
1396         btrfs_set_node_key(lower, key, slot);
1397         btrfs_set_node_blockptr(lower, slot, bytenr);
1398         WARN_ON(trans->transid == 0);
1399         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1400         btrfs_set_header_nritems(lower, nritems + 1);
1401         btrfs_mark_buffer_dirty(lower);
1402         return 0;
1403 }
1404
1405 /*
1406  * split the node at the specified level in path in two.
1407  * The path is corrected to point to the appropriate node after the split
1408  *
1409  * Before splitting this tries to make some room in the node by pushing
1410  * left and right, if either one works, it returns right away.
1411  *
1412  * returns 0 on success and < 0 on failure
1413  */
1414 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1415                       *root, struct btrfs_path *path, int level)
1416 {
1417         u64 root_gen;
1418         struct extent_buffer *c;
1419         struct extent_buffer *split;
1420         struct btrfs_disk_key disk_key;
1421         int mid;
1422         int ret;
1423         int wret;
1424         u32 c_nritems;
1425
1426         c = path->nodes[level];
1427         WARN_ON(btrfs_header_generation(c) != trans->transid);
1428         if (c == root->node) {
1429                 /* trying to split the root, lets make a new one */
1430                 ret = insert_new_root(trans, root, path, level + 1);
1431                 if (ret)
1432                         return ret;
1433         } else {
1434                 ret = push_nodes_for_insert(trans, root, path, level);
1435                 c = path->nodes[level];
1436                 if (!ret && btrfs_header_nritems(c) <
1437                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1438                         return 0;
1439                 if (ret < 0)
1440                         return ret;
1441         }
1442
1443         c_nritems = btrfs_header_nritems(c);
1444         if (root->ref_cows)
1445                 root_gen = trans->transid;
1446         else
1447                 root_gen = 0;
1448
1449         btrfs_node_key(c, &disk_key, 0);
1450         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1451                                          root->root_key.objectid,
1452                                          root_gen,
1453                                          btrfs_disk_key_objectid(&disk_key),
1454                                          level, c->start, 0);
1455         if (IS_ERR(split))
1456                 return PTR_ERR(split);
1457
1458         btrfs_set_header_flags(split, btrfs_header_flags(c));
1459         btrfs_set_header_level(split, btrfs_header_level(c));
1460         btrfs_set_header_bytenr(split, split->start);
1461         btrfs_set_header_generation(split, trans->transid);
1462         btrfs_set_header_owner(split, root->root_key.objectid);
1463         write_extent_buffer(split, root->fs_info->fsid,
1464                             (unsigned long)btrfs_header_fsid(split),
1465                             BTRFS_FSID_SIZE);
1466
1467         mid = (c_nritems + 1) / 2;
1468
1469         copy_extent_buffer(split, c,
1470                            btrfs_node_key_ptr_offset(0),
1471                            btrfs_node_key_ptr_offset(mid),
1472                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1473         btrfs_set_header_nritems(split, c_nritems - mid);
1474         btrfs_set_header_nritems(c, mid);
1475         ret = 0;
1476
1477         btrfs_mark_buffer_dirty(c);
1478         btrfs_mark_buffer_dirty(split);
1479
1480         btrfs_node_key(split, &disk_key, 0);
1481         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1482                           path->slots[level + 1] + 1,
1483                           level + 1);
1484         if (wret)
1485                 ret = wret;
1486
1487         if (path->slots[level] >= mid) {
1488                 path->slots[level] -= mid;
1489                 free_extent_buffer(c);
1490                 path->nodes[level] = split;
1491                 path->slots[level + 1] += 1;
1492         } else {
1493                 free_extent_buffer(split);
1494         }
1495         return ret;
1496 }
1497
1498 /*
1499  * how many bytes are required to store the items in a leaf.  start
1500  * and nr indicate which items in the leaf to check.  This totals up the
1501  * space used both by the item structs and the item data
1502  */
1503 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1504 {
1505         int data_len;
1506         int nritems = btrfs_header_nritems(l);
1507         int end = min(nritems, start + nr) - 1;
1508
1509         if (!nr)
1510                 return 0;
1511         data_len = btrfs_item_end_nr(l, start);
1512         data_len = data_len - btrfs_item_offset_nr(l, end);
1513         data_len += sizeof(struct btrfs_item) * nr;
1514         WARN_ON(data_len < 0);
1515         return data_len;
1516 }
1517
1518 /*
1519  * The space between the end of the leaf items and
1520  * the start of the leaf data.  IOW, how much room
1521  * the leaf has left for both items and data
1522  */
1523 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1524 {
1525         int nritems = btrfs_header_nritems(leaf);
1526         int ret;
1527         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1528         if (ret < 0) {
1529                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1530                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1531                        leaf_space_used(leaf, 0, nritems), nritems);
1532         }
1533         return ret;
1534 }
1535
1536 /*
1537  * push some data in the path leaf to the right, trying to free up at
1538  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1539  *
1540  * returns 1 if the push failed because the other node didn't have enough
1541  * room, 0 if everything worked out and < 0 if there were major errors.
1542  */
1543 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1544                            *root, struct btrfs_path *path, int data_size,
1545                            int empty)
1546 {
1547         struct extent_buffer *left = path->nodes[0];
1548         struct extent_buffer *right;
1549         struct extent_buffer *upper;
1550         struct btrfs_disk_key disk_key;
1551         int slot;
1552         u32 i;
1553         int free_space;
1554         int push_space = 0;
1555         int push_items = 0;
1556         struct btrfs_item *item;
1557         u32 left_nritems;
1558         u32 nr;
1559         u32 right_nritems;
1560         u32 data_end;
1561         u32 this_item_size;
1562         int ret;
1563
1564         slot = path->slots[1];
1565         if (!path->nodes[1]) {
1566                 return 1;
1567         }
1568         upper = path->nodes[1];
1569         if (slot >= btrfs_header_nritems(upper) - 1)
1570                 return 1;
1571
1572         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1573                                 root->leafsize);
1574         free_space = btrfs_leaf_free_space(root, right);
1575         if (free_space < data_size + sizeof(struct btrfs_item)) {
1576                 free_extent_buffer(right);
1577                 return 1;
1578         }
1579
1580         /* cow and double check */
1581         ret = btrfs_cow_block(trans, root, right, upper,
1582                               slot + 1, &right);
1583         if (ret) {
1584                 free_extent_buffer(right);
1585                 return 1;
1586         }
1587         free_space = btrfs_leaf_free_space(root, right);
1588         if (free_space < data_size + sizeof(struct btrfs_item)) {
1589                 free_extent_buffer(right);
1590                 return 1;
1591         }
1592
1593         left_nritems = btrfs_header_nritems(left);
1594         if (left_nritems == 0) {
1595                 free_extent_buffer(right);
1596                 return 1;
1597         }
1598
1599         if (empty)
1600                 nr = 0;
1601         else
1602                 nr = 1;
1603
1604         i = left_nritems - 1;
1605         while (i >= nr) {
1606                 item = btrfs_item_nr(left, i);
1607
1608                 if (path->slots[0] == i)
1609                         push_space += data_size + sizeof(*item);
1610
1611                 if (!left->map_token) {
1612                         map_extent_buffer(left, (unsigned long)item,
1613                                         sizeof(struct btrfs_item),
1614                                         &left->map_token, &left->kaddr,
1615                                         &left->map_start, &left->map_len,
1616                                         KM_USER1);
1617                 }
1618
1619                 this_item_size = btrfs_item_size(left, item);
1620                 if (this_item_size + sizeof(*item) + push_space > free_space)
1621                         break;
1622                 push_items++;
1623                 push_space += this_item_size + sizeof(*item);
1624                 if (i == 0)
1625                         break;
1626                 i--;
1627         }
1628         if (left->map_token) {
1629                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1630                 left->map_token = NULL;
1631         }
1632
1633         if (push_items == 0) {
1634                 free_extent_buffer(right);
1635                 return 1;
1636         }
1637
1638         if (!empty && push_items == left_nritems)
1639                 WARN_ON(1);
1640
1641         /* push left to right */
1642         right_nritems = btrfs_header_nritems(right);
1643
1644         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1645         push_space -= leaf_data_end(root, left);
1646
1647         /* make room in the right data area */
1648         data_end = leaf_data_end(root, right);
1649         memmove_extent_buffer(right,
1650                               btrfs_leaf_data(right) + data_end - push_space,
1651                               btrfs_leaf_data(right) + data_end,
1652                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1653
1654         /* copy from the left data area */
1655         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1656                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1657                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1658                      push_space);
1659
1660         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1661                               btrfs_item_nr_offset(0),
1662                               right_nritems * sizeof(struct btrfs_item));
1663
1664         /* copy the items from left to right */
1665         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1666                    btrfs_item_nr_offset(left_nritems - push_items),
1667                    push_items * sizeof(struct btrfs_item));
1668
1669         /* update the item pointers */
1670         right_nritems += push_items;
1671         btrfs_set_header_nritems(right, right_nritems);
1672         push_space = BTRFS_LEAF_DATA_SIZE(root);
1673         for (i = 0; i < right_nritems; i++) {
1674                 item = btrfs_item_nr(right, i);
1675                 if (!right->map_token) {
1676                         map_extent_buffer(right, (unsigned long)item,
1677                                         sizeof(struct btrfs_item),
1678                                         &right->map_token, &right->kaddr,
1679                                         &right->map_start, &right->map_len,
1680                                         KM_USER1);
1681                 }
1682                 push_space -= btrfs_item_size(right, item);
1683                 btrfs_set_item_offset(right, item, push_space);
1684         }
1685
1686         if (right->map_token) {
1687                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1688                 right->map_token = NULL;
1689         }
1690         left_nritems -= push_items;
1691         btrfs_set_header_nritems(left, left_nritems);
1692
1693         if (left_nritems)
1694                 btrfs_mark_buffer_dirty(left);
1695         btrfs_mark_buffer_dirty(right);
1696
1697         btrfs_item_key(right, &disk_key, 0);
1698         btrfs_set_node_key(upper, &disk_key, slot + 1);
1699         btrfs_mark_buffer_dirty(upper);
1700
1701         /* then fixup the leaf pointer in the path */
1702         if (path->slots[0] >= left_nritems) {
1703                 path->slots[0] -= left_nritems;
1704                 free_extent_buffer(path->nodes[0]);
1705                 path->nodes[0] = right;
1706                 path->slots[1] += 1;
1707         } else {
1708                 free_extent_buffer(right);
1709         }
1710         return 0;
1711 }
1712 /*
1713  * push some data in the path leaf to the left, trying to free up at
1714  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1715  */
1716 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1717                           *root, struct btrfs_path *path, int data_size,
1718                           int empty)
1719 {
1720         struct btrfs_disk_key disk_key;
1721         struct extent_buffer *right = path->nodes[0];
1722         struct extent_buffer *left;
1723         int slot;
1724         int i;
1725         int free_space;
1726         int push_space = 0;
1727         int push_items = 0;
1728         struct btrfs_item *item;
1729         u32 old_left_nritems;
1730         u32 right_nritems;
1731         u32 nr;
1732         int ret = 0;
1733         int wret;
1734         u32 this_item_size;
1735         u32 old_left_item_size;
1736
1737         slot = path->slots[1];
1738         if (slot == 0)
1739                 return 1;
1740         if (!path->nodes[1])
1741                 return 1;
1742
1743         right_nritems = btrfs_header_nritems(right);
1744         if (right_nritems == 0) {
1745                 return 1;
1746         }
1747
1748         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1749                                slot - 1), root->leafsize);
1750         free_space = btrfs_leaf_free_space(root, left);
1751         if (free_space < data_size + sizeof(struct btrfs_item)) {
1752                 free_extent_buffer(left);
1753                 return 1;
1754         }
1755
1756         /* cow and double check */
1757         ret = btrfs_cow_block(trans, root, left,
1758                               path->nodes[1], slot - 1, &left);
1759         if (ret) {
1760                 /* we hit -ENOSPC, but it isn't fatal here */
1761                 free_extent_buffer(left);
1762                 return 1;
1763         }
1764
1765         free_space = btrfs_leaf_free_space(root, left);
1766         if (free_space < data_size + sizeof(struct btrfs_item)) {
1767                 free_extent_buffer(left);
1768                 return 1;
1769         }
1770
1771         if (empty)
1772                 nr = right_nritems;
1773         else
1774                 nr = right_nritems - 1;
1775
1776         for (i = 0; i < nr; i++) {
1777                 item = btrfs_item_nr(right, i);
1778                 if (!right->map_token) {
1779                         map_extent_buffer(right, (unsigned long)item,
1780                                         sizeof(struct btrfs_item),
1781                                         &right->map_token, &right->kaddr,
1782                                         &right->map_start, &right->map_len,
1783                                         KM_USER1);
1784                 }
1785
1786                 if (path->slots[0] == i)
1787                         push_space += data_size + sizeof(*item);
1788
1789                 this_item_size = btrfs_item_size(right, item);
1790                 if (this_item_size + sizeof(*item) + push_space > free_space)
1791                         break;
1792
1793                 push_items++;
1794                 push_space += this_item_size + sizeof(*item);
1795         }
1796
1797         if (right->map_token) {
1798                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1799                 right->map_token = NULL;
1800         }
1801
1802         if (push_items == 0) {
1803                 free_extent_buffer(left);
1804                 return 1;
1805         }
1806         if (!empty && push_items == btrfs_header_nritems(right))
1807                 WARN_ON(1);
1808
1809         /* push data from right to left */
1810         copy_extent_buffer(left, right,
1811                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1812                            btrfs_item_nr_offset(0),
1813                            push_items * sizeof(struct btrfs_item));
1814
1815         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1816                      btrfs_item_offset_nr(right, push_items -1);
1817
1818         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1819                      leaf_data_end(root, left) - push_space,
1820                      btrfs_leaf_data(right) +
1821                      btrfs_item_offset_nr(right, push_items - 1),
1822                      push_space);
1823         old_left_nritems = btrfs_header_nritems(left);
1824         BUG_ON(old_left_nritems < 0);
1825
1826         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1827         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1828                 u32 ioff;
1829
1830                 item = btrfs_item_nr(left, i);
1831                 if (!left->map_token) {
1832                         map_extent_buffer(left, (unsigned long)item,
1833                                         sizeof(struct btrfs_item),
1834                                         &left->map_token, &left->kaddr,
1835                                         &left->map_start, &left->map_len,
1836                                         KM_USER1);
1837                 }
1838
1839                 ioff = btrfs_item_offset(left, item);
1840                 btrfs_set_item_offset(left, item,
1841                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1842         }
1843         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1844         if (left->map_token) {
1845                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1846                 left->map_token = NULL;
1847         }
1848
1849         /* fixup right node */
1850         if (push_items > right_nritems) {
1851                 printk("push items %d nr %u\n", push_items, right_nritems);
1852                 WARN_ON(1);
1853         }
1854
1855         if (push_items < right_nritems) {
1856                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1857                                                   leaf_data_end(root, right);
1858                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1859                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1860                                       btrfs_leaf_data(right) +
1861                                       leaf_data_end(root, right), push_space);
1862
1863                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1864                               btrfs_item_nr_offset(push_items),
1865                              (btrfs_header_nritems(right) - push_items) *
1866                              sizeof(struct btrfs_item));
1867         }
1868         right_nritems -= push_items;
1869         btrfs_set_header_nritems(right, right_nritems);
1870         push_space = BTRFS_LEAF_DATA_SIZE(root);
1871         for (i = 0; i < right_nritems; i++) {
1872                 item = btrfs_item_nr(right, i);
1873
1874                 if (!right->map_token) {
1875                         map_extent_buffer(right, (unsigned long)item,
1876                                         sizeof(struct btrfs_item),
1877                                         &right->map_token, &right->kaddr,
1878                                         &right->map_start, &right->map_len,
1879                                         KM_USER1);
1880                 }
1881
1882                 push_space = push_space - btrfs_item_size(right, item);
1883                 btrfs_set_item_offset(right, item, push_space);
1884         }
1885         if (right->map_token) {
1886                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1887                 right->map_token = NULL;
1888         }
1889
1890         btrfs_mark_buffer_dirty(left);
1891         if (right_nritems)
1892                 btrfs_mark_buffer_dirty(right);
1893
1894         btrfs_item_key(right, &disk_key, 0);
1895         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1896         if (wret)
1897                 ret = wret;
1898
1899         /* then fixup the leaf pointer in the path */
1900         if (path->slots[0] < push_items) {
1901                 path->slots[0] += old_left_nritems;
1902                 free_extent_buffer(path->nodes[0]);
1903                 path->nodes[0] = left;
1904                 path->slots[1] -= 1;
1905         } else {
1906                 free_extent_buffer(left);
1907                 path->slots[0] -= push_items;
1908         }
1909         BUG_ON(path->slots[0] < 0);
1910         return ret;
1911 }
1912
1913 /*
1914  * split the path's leaf in two, making sure there is at least data_size
1915  * available for the resulting leaf level of the path.
1916  *
1917  * returns 0 if all went well and < 0 on failure.
1918  */
1919 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1920                       *root, struct btrfs_key *ins_key,
1921                       struct btrfs_path *path, int data_size, int extend)
1922 {
1923         u64 root_gen;
1924         struct extent_buffer *l;
1925         u32 nritems;
1926         int mid;
1927         int slot;
1928         struct extent_buffer *right;
1929         int space_needed = data_size + sizeof(struct btrfs_item);
1930         int data_copy_size;
1931         int rt_data_off;
1932         int i;
1933         int ret = 0;
1934         int wret;
1935         int double_split;
1936         int num_doubles = 0;
1937         struct btrfs_disk_key disk_key;
1938
1939         if (extend)
1940                 space_needed = data_size;
1941
1942         if (root->ref_cows)
1943                 root_gen = trans->transid;
1944         else
1945                 root_gen = 0;
1946
1947         /* first try to make some room by pushing left and right */
1948         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1949                 wret = push_leaf_right(trans, root, path, data_size, 0);
1950                 if (wret < 0) {
1951                         return wret;
1952                 }
1953                 if (wret) {
1954                         wret = push_leaf_left(trans, root, path, data_size, 0);
1955                         if (wret < 0)
1956                                 return wret;
1957                 }
1958                 l = path->nodes[0];
1959
1960                 /* did the pushes work? */
1961                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1962                         return 0;
1963         }
1964
1965         if (!path->nodes[1]) {
1966                 ret = insert_new_root(trans, root, path, 1);
1967                 if (ret)
1968                         return ret;
1969         }
1970 again:
1971         double_split = 0;
1972         l = path->nodes[0];
1973         slot = path->slots[0];
1974         nritems = btrfs_header_nritems(l);
1975         mid = (nritems + 1)/ 2;
1976
1977         btrfs_item_key(l, &disk_key, 0);
1978
1979         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1980                                          root->root_key.objectid,
1981                                          root_gen, disk_key.objectid, 0,
1982                                          l->start, 0);
1983         if (IS_ERR(right))
1984                 return PTR_ERR(right);
1985
1986         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1987         btrfs_set_header_bytenr(right, right->start);
1988         btrfs_set_header_generation(right, trans->transid);
1989         btrfs_set_header_owner(right, root->root_key.objectid);
1990         btrfs_set_header_level(right, 0);
1991         write_extent_buffer(right, root->fs_info->fsid,
1992                             (unsigned long)btrfs_header_fsid(right),
1993                             BTRFS_FSID_SIZE);
1994         if (mid <= slot) {
1995                 if (nritems == 1 ||
1996                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1997                         BTRFS_LEAF_DATA_SIZE(root)) {
1998                         if (slot >= nritems) {
1999                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2000                                 btrfs_set_header_nritems(right, 0);
2001                                 wret = insert_ptr(trans, root, path,
2002                                                   &disk_key, right->start,
2003                                                   path->slots[1] + 1, 1);
2004                                 if (wret)
2005                                         ret = wret;
2006                                 free_extent_buffer(path->nodes[0]);
2007                                 path->nodes[0] = right;
2008                                 path->slots[0] = 0;
2009                                 path->slots[1] += 1;
2010                                 return ret;
2011                         }
2012                         mid = slot;
2013                         if (mid != nritems &&
2014                             leaf_space_used(l, mid, nritems - mid) +
2015                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2016                                 double_split = 1;
2017                         }
2018                 }
2019         } else {
2020                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2021                         BTRFS_LEAF_DATA_SIZE(root)) {
2022                         if (!extend && slot == 0) {
2023                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2024                                 btrfs_set_header_nritems(right, 0);
2025                                 wret = insert_ptr(trans, root, path,
2026                                                   &disk_key,
2027                                                   right->start,
2028                                                   path->slots[1], 1);
2029                                 if (wret)
2030                                         ret = wret;
2031                                 free_extent_buffer(path->nodes[0]);
2032                                 path->nodes[0] = right;
2033                                 path->slots[0] = 0;
2034                                 if (path->slots[1] == 0) {
2035                                         wret = fixup_low_keys(trans, root,
2036                                                    path, &disk_key, 1);
2037                                         if (wret)
2038                                                 ret = wret;
2039                                 }
2040                                 return ret;
2041                         } else if (extend && slot == 0) {
2042                                 mid = 1;
2043                         } else {
2044                                 mid = slot;
2045                                 if (mid != nritems &&
2046                                     leaf_space_used(l, mid, nritems - mid) +
2047                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2048                                         double_split = 1;
2049                                 }
2050                         }
2051                 }
2052         }
2053         nritems = nritems - mid;
2054         btrfs_set_header_nritems(right, nritems);
2055         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2056
2057         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2058                            btrfs_item_nr_offset(mid),
2059                            nritems * sizeof(struct btrfs_item));
2060
2061         copy_extent_buffer(right, l,
2062                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2063                      data_copy_size, btrfs_leaf_data(l) +
2064                      leaf_data_end(root, l), data_copy_size);
2065
2066         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2067                       btrfs_item_end_nr(l, mid);
2068
2069         for (i = 0; i < nritems; i++) {
2070                 struct btrfs_item *item = btrfs_item_nr(right, i);
2071                 u32 ioff;
2072
2073                 if (!right->map_token) {
2074                         map_extent_buffer(right, (unsigned long)item,
2075                                         sizeof(struct btrfs_item),
2076                                         &right->map_token, &right->kaddr,
2077                                         &right->map_start, &right->map_len,
2078                                         KM_USER1);
2079                 }
2080
2081                 ioff = btrfs_item_offset(right, item);
2082                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2083         }
2084
2085         if (right->map_token) {
2086                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2087                 right->map_token = NULL;
2088         }
2089
2090         btrfs_set_header_nritems(l, mid);
2091         ret = 0;
2092         btrfs_item_key(right, &disk_key, 0);
2093         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2094                           path->slots[1] + 1, 1);
2095         if (wret)
2096                 ret = wret;
2097
2098         btrfs_mark_buffer_dirty(right);
2099         btrfs_mark_buffer_dirty(l);
2100         BUG_ON(path->slots[0] != slot);
2101
2102         if (mid <= slot) {
2103                 free_extent_buffer(path->nodes[0]);
2104                 path->nodes[0] = right;
2105                 path->slots[0] -= mid;
2106                 path->slots[1] += 1;
2107         } else
2108                 free_extent_buffer(right);
2109
2110         BUG_ON(path->slots[0] < 0);
2111
2112         if (double_split) {
2113                 BUG_ON(num_doubles != 0);
2114                 num_doubles++;
2115                 goto again;
2116         }
2117         return ret;
2118 }
2119
2120 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2121                         struct btrfs_root *root,
2122                         struct btrfs_path *path,
2123                         u32 new_size, int from_end)
2124 {
2125         int ret = 0;
2126         int slot;
2127         int slot_orig;
2128         struct extent_buffer *leaf;
2129         struct btrfs_item *item;
2130         u32 nritems;
2131         unsigned int data_end;
2132         unsigned int old_data_start;
2133         unsigned int old_size;
2134         unsigned int size_diff;
2135         int i;
2136
2137         slot_orig = path->slots[0];
2138         leaf = path->nodes[0];
2139         slot = path->slots[0];
2140
2141         old_size = btrfs_item_size_nr(leaf, slot);
2142         if (old_size == new_size)
2143                 return 0;
2144
2145         nritems = btrfs_header_nritems(leaf);
2146         data_end = leaf_data_end(root, leaf);
2147
2148         old_data_start = btrfs_item_offset_nr(leaf, slot);
2149
2150         size_diff = old_size - new_size;
2151
2152         BUG_ON(slot < 0);
2153         BUG_ON(slot >= nritems);
2154
2155         /*
2156          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2157          */
2158         /* first correct the data pointers */
2159         for (i = slot; i < nritems; i++) {
2160                 u32 ioff;
2161                 item = btrfs_item_nr(leaf, i);
2162
2163                 if (!leaf->map_token) {
2164                         map_extent_buffer(leaf, (unsigned long)item,
2165                                         sizeof(struct btrfs_item),
2166                                         &leaf->map_token, &leaf->kaddr,
2167                                         &leaf->map_start, &leaf->map_len,
2168                                         KM_USER1);
2169                 }
2170
2171                 ioff = btrfs_item_offset(leaf, item);
2172                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2173         }
2174
2175         if (leaf->map_token) {
2176                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2177                 leaf->map_token = NULL;
2178         }
2179
2180         /* shift the data */
2181         if (from_end) {
2182                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2183                               data_end + size_diff, btrfs_leaf_data(leaf) +
2184                               data_end, old_data_start + new_size - data_end);
2185         } else {
2186                 struct btrfs_disk_key disk_key;
2187                 u64 offset;
2188
2189                 btrfs_item_key(leaf, &disk_key, slot);
2190
2191                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2192                         unsigned long ptr;
2193                         struct btrfs_file_extent_item *fi;
2194
2195                         fi = btrfs_item_ptr(leaf, slot,
2196                                             struct btrfs_file_extent_item);
2197                         fi = (struct btrfs_file_extent_item *)(
2198                              (unsigned long)fi - size_diff);
2199
2200                         if (btrfs_file_extent_type(leaf, fi) ==
2201                             BTRFS_FILE_EXTENT_INLINE) {
2202                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2203                                 memmove_extent_buffer(leaf, ptr,
2204                                         (unsigned long)fi,
2205                                         offsetof(struct btrfs_file_extent_item,
2206                                                  disk_bytenr));
2207                         }
2208                 }
2209
2210                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2211                               data_end + size_diff, btrfs_leaf_data(leaf) +
2212                               data_end, old_data_start - data_end);
2213
2214                 offset = btrfs_disk_key_offset(&disk_key);
2215                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2216                 btrfs_set_item_key(leaf, &disk_key, slot);
2217                 if (slot == 0)
2218                         fixup_low_keys(trans, root, path, &disk_key, 1);
2219         }
2220
2221         item = btrfs_item_nr(leaf, slot);
2222         btrfs_set_item_size(leaf, item, new_size);
2223         btrfs_mark_buffer_dirty(leaf);
2224
2225         ret = 0;
2226         if (btrfs_leaf_free_space(root, leaf) < 0) {
2227                 btrfs_print_leaf(root, leaf);
2228                 BUG();
2229         }
2230         return ret;
2231 }
2232
2233 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2234                       struct btrfs_root *root, struct btrfs_path *path,
2235                       u32 data_size)
2236 {
2237         int ret = 0;
2238         int slot;
2239         int slot_orig;
2240         struct extent_buffer *leaf;
2241         struct btrfs_item *item;
2242         u32 nritems;
2243         unsigned int data_end;
2244         unsigned int old_data;
2245         unsigned int old_size;
2246         int i;
2247
2248         slot_orig = path->slots[0];
2249         leaf = path->nodes[0];
2250
2251         nritems = btrfs_header_nritems(leaf);
2252         data_end = leaf_data_end(root, leaf);
2253
2254         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2255                 btrfs_print_leaf(root, leaf);
2256                 BUG();
2257         }
2258         slot = path->slots[0];
2259         old_data = btrfs_item_end_nr(leaf, slot);
2260
2261         BUG_ON(slot < 0);
2262         if (slot >= nritems) {
2263                 btrfs_print_leaf(root, leaf);
2264                 printk("slot %d too large, nritems %d\n", slot, nritems);
2265                 BUG_ON(1);
2266         }
2267
2268         /*
2269          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2270          */
2271         /* first correct the data pointers */
2272         for (i = slot; i < nritems; i++) {
2273                 u32 ioff;
2274                 item = btrfs_item_nr(leaf, i);
2275
2276                 if (!leaf->map_token) {
2277                         map_extent_buffer(leaf, (unsigned long)item,
2278                                         sizeof(struct btrfs_item),
2279                                         &leaf->map_token, &leaf->kaddr,
2280                                         &leaf->map_start, &leaf->map_len,
2281                                         KM_USER1);
2282                 }
2283                 ioff = btrfs_item_offset(leaf, item);
2284                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2285         }
2286
2287         if (leaf->map_token) {
2288                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2289                 leaf->map_token = NULL;
2290         }
2291
2292         /* shift the data */
2293         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2294                       data_end - data_size, btrfs_leaf_data(leaf) +
2295                       data_end, old_data - data_end);
2296
2297         data_end = old_data;
2298         old_size = btrfs_item_size_nr(leaf, slot);
2299         item = btrfs_item_nr(leaf, slot);
2300         btrfs_set_item_size(leaf, item, old_size + data_size);
2301         btrfs_mark_buffer_dirty(leaf);
2302
2303         ret = 0;
2304         if (btrfs_leaf_free_space(root, leaf) < 0) {
2305                 btrfs_print_leaf(root, leaf);
2306                 BUG();
2307         }
2308         return ret;
2309 }
2310
2311 /*
2312  * Given a key and some data, insert an item into the tree.
2313  * This does all the path init required, making room in the tree if needed.
2314  */
2315 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2316                             struct btrfs_root *root,
2317                             struct btrfs_path *path,
2318                             struct btrfs_key *cpu_key, u32 data_size)
2319 {
2320         struct extent_buffer *leaf;
2321         struct btrfs_item *item;
2322         int ret = 0;
2323         int slot;
2324         int slot_orig;
2325         u32 nritems;
2326         unsigned int data_end;
2327         struct btrfs_disk_key disk_key;
2328
2329         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2330
2331         /* create a root if there isn't one */
2332         if (!root->node)
2333                 BUG();
2334
2335         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2336         if (ret == 0) {
2337                 return -EEXIST;
2338         }
2339         if (ret < 0)
2340                 goto out;
2341
2342         slot_orig = path->slots[0];
2343         leaf = path->nodes[0];
2344
2345         nritems = btrfs_header_nritems(leaf);
2346         data_end = leaf_data_end(root, leaf);
2347
2348         if (btrfs_leaf_free_space(root, leaf) <
2349             sizeof(struct btrfs_item) + data_size) {
2350                 btrfs_print_leaf(root, leaf);
2351                 printk("not enough freespace need %u have %d\n",
2352                        data_size, btrfs_leaf_free_space(root, leaf));
2353                 BUG();
2354         }
2355
2356         slot = path->slots[0];
2357         BUG_ON(slot < 0);
2358
2359         if (slot != nritems) {
2360                 int i;
2361                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2362
2363                 if (old_data < data_end) {
2364                         btrfs_print_leaf(root, leaf);
2365                         printk("slot %d old_data %d data_end %d\n",
2366                                slot, old_data, data_end);
2367                         BUG_ON(1);
2368                 }
2369                 /*
2370                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2371                  */
2372                 /* first correct the data pointers */
2373                 WARN_ON(leaf->map_token);
2374                 for (i = slot; i < nritems; i++) {
2375                         u32 ioff;
2376
2377                         item = btrfs_item_nr(leaf, i);
2378                         if (!leaf->map_token) {
2379                                 map_extent_buffer(leaf, (unsigned long)item,
2380                                         sizeof(struct btrfs_item),
2381                                         &leaf->map_token, &leaf->kaddr,
2382                                         &leaf->map_start, &leaf->map_len,
2383                                         KM_USER1);
2384                         }
2385
2386                         ioff = btrfs_item_offset(leaf, item);
2387                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2388                 }
2389                 if (leaf->map_token) {
2390                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2391                         leaf->map_token = NULL;
2392                 }
2393
2394                 /* shift the items */
2395                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2396                               btrfs_item_nr_offset(slot),
2397                               (nritems - slot) * sizeof(struct btrfs_item));
2398
2399                 /* shift the data */
2400                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2401                               data_end - data_size, btrfs_leaf_data(leaf) +
2402                               data_end, old_data - data_end);
2403                 data_end = old_data;
2404         }
2405
2406         /* setup the item for the new data */
2407         btrfs_set_item_key(leaf, &disk_key, slot);
2408         item = btrfs_item_nr(leaf, slot);
2409         btrfs_set_item_offset(leaf, item, data_end - data_size);
2410         btrfs_set_item_size(leaf, item, data_size);
2411         btrfs_set_header_nritems(leaf, nritems + 1);
2412         btrfs_mark_buffer_dirty(leaf);
2413
2414         ret = 0;
2415         if (slot == 0)
2416                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2417
2418         if (btrfs_leaf_free_space(root, leaf) < 0) {
2419                 btrfs_print_leaf(root, leaf);
2420                 BUG();
2421         }
2422 out:
2423         return ret;
2424 }
2425
2426 /*
2427  * Given a key and some data, insert an item into the tree.
2428  * This does all the path init required, making room in the tree if needed.
2429  */
2430 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2431                       *root, struct btrfs_key *cpu_key, void *data, u32
2432                       data_size)
2433 {
2434         int ret = 0;
2435         struct btrfs_path *path;
2436         struct extent_buffer *leaf;
2437         unsigned long ptr;
2438
2439         path = btrfs_alloc_path();
2440         BUG_ON(!path);
2441         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2442         if (!ret) {
2443                 leaf = path->nodes[0];
2444                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2445                 write_extent_buffer(leaf, data, ptr, data_size);
2446                 btrfs_mark_buffer_dirty(leaf);
2447         }
2448         btrfs_free_path(path);
2449         return ret;
2450 }
2451
2452 /*
2453  * delete the pointer from a given node.
2454  *
2455  * If the delete empties a node, the node is removed from the tree,
2456  * continuing all the way the root if required.  The root is converted into
2457  * a leaf if all the nodes are emptied.
2458  */
2459 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2460                    struct btrfs_path *path, int level, int slot)
2461 {
2462         struct extent_buffer *parent = path->nodes[level];
2463         u32 nritems;
2464         int ret = 0;
2465         int wret;
2466
2467         nritems = btrfs_header_nritems(parent);
2468         if (slot != nritems -1) {
2469                 memmove_extent_buffer(parent,
2470                               btrfs_node_key_ptr_offset(slot),
2471                               btrfs_node_key_ptr_offset(slot + 1),
2472                               sizeof(struct btrfs_key_ptr) *
2473                               (nritems - slot - 1));
2474         }
2475         nritems--;
2476         btrfs_set_header_nritems(parent, nritems);
2477         if (nritems == 0 && parent == root->node) {
2478                 BUG_ON(btrfs_header_level(root->node) != 1);
2479                 /* just turn the root into a leaf and break */
2480                 btrfs_set_header_level(root->node, 0);
2481         } else if (slot == 0) {
2482                 struct btrfs_disk_key disk_key;
2483
2484                 btrfs_node_key(parent, &disk_key, 0);
2485                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2486                 if (wret)
2487                         ret = wret;
2488         }
2489         btrfs_mark_buffer_dirty(parent);
2490         return ret;
2491 }
2492
2493 /*
2494  * delete the item at the leaf level in path.  If that empties
2495  * the leaf, remove it from the tree
2496  */
2497 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2498                    struct btrfs_path *path)
2499 {
2500         int slot;
2501         struct extent_buffer *leaf;
2502         struct btrfs_item *item;
2503         int doff;
2504         int dsize;
2505         int ret = 0;
2506         int wret;
2507         u32 nritems;
2508
2509         leaf = path->nodes[0];
2510         slot = path->slots[0];
2511         doff = btrfs_item_offset_nr(leaf, slot);
2512         dsize = btrfs_item_size_nr(leaf, slot);
2513         nritems = btrfs_header_nritems(leaf);
2514
2515         if (slot != nritems - 1) {
2516                 int i;
2517                 int data_end = leaf_data_end(root, leaf);
2518
2519                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2520                               data_end + dsize,
2521                               btrfs_leaf_data(leaf) + data_end,
2522                               doff - data_end);
2523
2524                 for (i = slot + 1; i < nritems; i++) {
2525                         u32 ioff;
2526
2527                         item = btrfs_item_nr(leaf, i);
2528                         if (!leaf->map_token) {
2529                                 map_extent_buffer(leaf, (unsigned long)item,
2530                                         sizeof(struct btrfs_item),
2531                                         &leaf->map_token, &leaf->kaddr,
2532                                         &leaf->map_start, &leaf->map_len,
2533                                         KM_USER1);
2534                         }
2535                         ioff = btrfs_item_offset(leaf, item);
2536                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2537                 }
2538
2539                 if (leaf->map_token) {
2540                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2541                         leaf->map_token = NULL;
2542                 }
2543
2544                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2545                               btrfs_item_nr_offset(slot + 1),
2546                               sizeof(struct btrfs_item) *
2547                               (nritems - slot - 1));
2548         }
2549         btrfs_set_header_nritems(leaf, nritems - 1);
2550         nritems--;
2551
2552         /* delete the leaf if we've emptied it */
2553         if (nritems == 0) {
2554                 if (leaf == root->node) {
2555                         btrfs_set_header_level(leaf, 0);
2556                 } else {
2557                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2558                         clean_tree_block(trans, root, leaf);
2559                         wait_on_tree_block_writeback(root, leaf);
2560                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2561                         if (wret)
2562                                 ret = wret;
2563                         wret = btrfs_free_extent(trans, root,
2564                                          leaf->start, leaf->len,
2565                                          btrfs_header_owner(path->nodes[1]),
2566                                          root_gen, 0, 0, 1);
2567                         if (wret)
2568                                 ret = wret;
2569                 }
2570         } else {
2571                 int used = leaf_space_used(leaf, 0, nritems);
2572                 if (slot == 0) {
2573                         struct btrfs_disk_key disk_key;
2574
2575                         btrfs_item_key(leaf, &disk_key, 0);
2576                         wret = fixup_low_keys(trans, root, path,
2577                                               &disk_key, 1);
2578                         if (wret)
2579                                 ret = wret;
2580                 }
2581
2582                 /* delete the leaf if it is mostly empty */
2583                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2584                         /* push_leaf_left fixes the path.
2585                          * make sure the path still points to our leaf
2586                          * for possible call to del_ptr below
2587                          */
2588                         slot = path->slots[1];
2589                         extent_buffer_get(leaf);
2590
2591                         wret = push_leaf_right(trans, root, path, 1, 1);
2592                         if (wret < 0 && wret != -ENOSPC)
2593                                 ret = wret;
2594
2595                         if (path->nodes[0] == leaf &&
2596                             btrfs_header_nritems(leaf)) {
2597                                 wret = push_leaf_left(trans, root, path, 1, 1);
2598                                 if (wret < 0 && wret != -ENOSPC)
2599                                         ret = wret;
2600                         }
2601
2602                         if (btrfs_header_nritems(leaf) == 0) {
2603                                 u64 root_gen;
2604                                 u64 bytenr = leaf->start;
2605                                 u32 blocksize = leaf->len;
2606
2607                                 root_gen = btrfs_header_generation(
2608                                                            path->nodes[1]);
2609
2610                                 clean_tree_block(trans, root, leaf);
2611                                 wait_on_tree_block_writeback(root, leaf);
2612
2613                                 wret = del_ptr(trans, root, path, 1, slot);
2614                                 if (wret)
2615                                         ret = wret;
2616
2617                                 free_extent_buffer(leaf);
2618                                 wret = btrfs_free_extent(trans, root, bytenr,
2619                                              blocksize,
2620                                              btrfs_header_owner(path->nodes[1]),
2621                                              root_gen, 0, 0, 1);
2622                                 if (wret)
2623                                         ret = wret;
2624                         } else {
2625                                 btrfs_mark_buffer_dirty(leaf);
2626                                 free_extent_buffer(leaf);
2627                         }
2628                 } else {
2629                         btrfs_mark_buffer_dirty(leaf);
2630                 }
2631         }
2632         return ret;
2633 }
2634
2635 /*
2636  * walk up the tree as far as required to find the previous leaf.
2637  * returns 0 if it found something or 1 if there are no lesser leaves.
2638  * returns < 0 on io errors.
2639  */
2640 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2641 {
2642         int slot;
2643         int level = 1;
2644         u64 bytenr;
2645         struct extent_buffer *c;
2646         struct extent_buffer *next = NULL;
2647
2648         while(level < BTRFS_MAX_LEVEL) {
2649                 if (!path->nodes[level])
2650                         return 1;
2651
2652                 slot = path->slots[level];
2653                 c = path->nodes[level];
2654                 if (slot == 0) {
2655                         level++;
2656                         if (level == BTRFS_MAX_LEVEL)
2657                                 return 1;
2658                         continue;
2659                 }
2660                 slot--;
2661
2662                 bytenr = btrfs_node_blockptr(c, slot);
2663                 if (next)
2664                         free_extent_buffer(next);
2665
2666                 if (path->reada < 0)
2667                         reada_for_search(root, path, level, slot);
2668
2669                 next = read_tree_block(root, bytenr,
2670                                        btrfs_level_size(root, level - 1));
2671                 break;
2672         }
2673         path->slots[level] = slot;
2674         while(1) {
2675                 level--;
2676                 c = path->nodes[level];
2677                 free_extent_buffer(c);
2678                 path->nodes[level] = next;
2679                 path->slots[level] = 0;
2680                 if (!level)
2681                         break;
2682                 if (path->reada)
2683                         reada_for_search(root, path, level, 0);
2684                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2685                                        btrfs_level_size(root, level - 1));
2686         }
2687         return 0;
2688 }
2689
2690 /*
2691  * walk up the tree as far as required to find the next leaf.
2692  * returns 0 if it found something or 1 if there are no greater leaves.
2693  * returns < 0 on io errors.
2694  */
2695 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2696 {
2697         int slot;
2698         int level = 1;
2699         u64 bytenr;
2700         struct extent_buffer *c;
2701         struct extent_buffer *next = NULL;
2702
2703         while(level < BTRFS_MAX_LEVEL) {
2704                 if (!path->nodes[level])
2705                         return 1;
2706
2707                 slot = path->slots[level] + 1;
2708                 c = path->nodes[level];
2709                 if (slot >= btrfs_header_nritems(c)) {
2710                         level++;
2711                         if (level == BTRFS_MAX_LEVEL)
2712                                 return 1;
2713                         continue;
2714                 }
2715
2716                 bytenr = btrfs_node_blockptr(c, slot);
2717                 if (next)
2718                         free_extent_buffer(next);
2719
2720                 if (path->reada)
2721                         reada_for_search(root, path, level, slot);
2722
2723                 next = read_tree_block(root, bytenr,
2724                                        btrfs_level_size(root, level -1));
2725                 break;
2726         }
2727         path->slots[level] = slot;
2728         while(1) {
2729                 level--;
2730                 c = path->nodes[level];
2731                 free_extent_buffer(c);
2732                 path->nodes[level] = next;
2733                 path->slots[level] = 0;
2734                 if (!level)
2735                         break;
2736                 if (path->reada)
2737                         reada_for_search(root, path, level, 0);
2738                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2739                                        btrfs_level_size(root, level - 1));
2740         }
2741         return 0;
2742 }