]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/ctree.c
Btrfs: stop using highmem for extent_buffers
[karo-tx-linux.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include "ctree.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "print-tree.h"
25 #include "locking.h"
26
27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_path *path, int level);
29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
30                       *root, struct btrfs_key *ins_key,
31                       struct btrfs_path *path, int data_size, int extend);
32 static int push_node_left(struct btrfs_trans_handle *trans,
33                           struct btrfs_root *root, struct extent_buffer *dst,
34                           struct extent_buffer *src, int empty);
35 static int balance_node_right(struct btrfs_trans_handle *trans,
36                               struct btrfs_root *root,
37                               struct extent_buffer *dst_buf,
38                               struct extent_buffer *src_buf);
39 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40                    struct btrfs_path *path, int level, int slot);
41
42 struct btrfs_path *btrfs_alloc_path(void)
43 {
44         struct btrfs_path *path;
45         path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46         return path;
47 }
48
49 /*
50  * set all locked nodes in the path to blocking locks.  This should
51  * be done before scheduling
52  */
53 noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54 {
55         int i;
56         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
57                 if (p->nodes[i] && p->locks[i])
58                         btrfs_set_lock_blocking(p->nodes[i]);
59         }
60 }
61
62 /*
63  * reset all the locked nodes in the patch to spinning locks.
64  *
65  * held is used to keep lockdep happy, when lockdep is enabled
66  * we set held to a blocking lock before we go around and
67  * retake all the spinlocks in the path.  You can safely use NULL
68  * for held
69  */
70 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
71                                         struct extent_buffer *held)
72 {
73         int i;
74
75 #ifdef CONFIG_DEBUG_LOCK_ALLOC
76         /* lockdep really cares that we take all of these spinlocks
77          * in the right order.  If any of the locks in the path are not
78          * currently blocking, it is going to complain.  So, make really
79          * really sure by forcing the path to blocking before we clear
80          * the path blocking.
81          */
82         if (held)
83                 btrfs_set_lock_blocking(held);
84         btrfs_set_path_blocking(p);
85 #endif
86
87         for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
88                 if (p->nodes[i] && p->locks[i])
89                         btrfs_clear_lock_blocking(p->nodes[i]);
90         }
91
92 #ifdef CONFIG_DEBUG_LOCK_ALLOC
93         if (held)
94                 btrfs_clear_lock_blocking(held);
95 #endif
96 }
97
98 /* this also releases the path */
99 void btrfs_free_path(struct btrfs_path *p)
100 {
101         if (!p)
102                 return;
103         btrfs_release_path(p);
104         kmem_cache_free(btrfs_path_cachep, p);
105 }
106
107 /*
108  * path release drops references on the extent buffers in the path
109  * and it drops any locks held by this path
110  *
111  * It is safe to call this on paths that no locks or extent buffers held.
112  */
113 noinline void btrfs_release_path(struct btrfs_path *p)
114 {
115         int i;
116
117         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
118                 p->slots[i] = 0;
119                 if (!p->nodes[i])
120                         continue;
121                 if (p->locks[i]) {
122                         btrfs_tree_unlock(p->nodes[i]);
123                         p->locks[i] = 0;
124                 }
125                 free_extent_buffer(p->nodes[i]);
126                 p->nodes[i] = NULL;
127         }
128 }
129
130 /*
131  * safely gets a reference on the root node of a tree.  A lock
132  * is not taken, so a concurrent writer may put a different node
133  * at the root of the tree.  See btrfs_lock_root_node for the
134  * looping required.
135  *
136  * The extent buffer returned by this has a reference taken, so
137  * it won't disappear.  It may stop being the root of the tree
138  * at any time because there are no locks held.
139  */
140 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
141 {
142         struct extent_buffer *eb;
143
144         rcu_read_lock();
145         eb = rcu_dereference(root->node);
146         extent_buffer_get(eb);
147         rcu_read_unlock();
148         return eb;
149 }
150
151 /* loop around taking references on and locking the root node of the
152  * tree until you end up with a lock on the root.  A locked buffer
153  * is returned, with a reference held.
154  */
155 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
156 {
157         struct extent_buffer *eb;
158
159         while (1) {
160                 eb = btrfs_root_node(root);
161                 btrfs_tree_lock(eb);
162                 if (eb == root->node)
163                         break;
164                 btrfs_tree_unlock(eb);
165                 free_extent_buffer(eb);
166         }
167         return eb;
168 }
169
170 /* cowonly root (everything not a reference counted cow subvolume), just get
171  * put onto a simple dirty list.  transaction.c walks this to make sure they
172  * get properly updated on disk.
173  */
174 static void add_root_to_dirty_list(struct btrfs_root *root)
175 {
176         if (root->track_dirty && list_empty(&root->dirty_list)) {
177                 list_add(&root->dirty_list,
178                          &root->fs_info->dirty_cowonly_roots);
179         }
180 }
181
182 /*
183  * used by snapshot creation to make a copy of a root for a tree with
184  * a given objectid.  The buffer with the new root node is returned in
185  * cow_ret, and this func returns zero on success or a negative error code.
186  */
187 int btrfs_copy_root(struct btrfs_trans_handle *trans,
188                       struct btrfs_root *root,
189                       struct extent_buffer *buf,
190                       struct extent_buffer **cow_ret, u64 new_root_objectid)
191 {
192         struct extent_buffer *cow;
193         int ret = 0;
194         int level;
195         struct btrfs_disk_key disk_key;
196
197         WARN_ON(root->ref_cows && trans->transid !=
198                 root->fs_info->running_transaction->transid);
199         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
200
201         level = btrfs_header_level(buf);
202         if (level == 0)
203                 btrfs_item_key(buf, &disk_key, 0);
204         else
205                 btrfs_node_key(buf, &disk_key, 0);
206
207         cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
208                                      new_root_objectid, &disk_key, level,
209                                      buf->start, 0);
210         if (IS_ERR(cow))
211                 return PTR_ERR(cow);
212
213         copy_extent_buffer(cow, buf, 0, 0, cow->len);
214         btrfs_set_header_bytenr(cow, cow->start);
215         btrfs_set_header_generation(cow, trans->transid);
216         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
217         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
218                                      BTRFS_HEADER_FLAG_RELOC);
219         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
220                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
221         else
222                 btrfs_set_header_owner(cow, new_root_objectid);
223
224         write_extent_buffer(cow, root->fs_info->fsid,
225                             (unsigned long)btrfs_header_fsid(cow),
226                             BTRFS_FSID_SIZE);
227
228         WARN_ON(btrfs_header_generation(buf) > trans->transid);
229         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
230                 ret = btrfs_inc_ref(trans, root, cow, 1);
231         else
232                 ret = btrfs_inc_ref(trans, root, cow, 0);
233
234         if (ret)
235                 return ret;
236
237         btrfs_mark_buffer_dirty(cow);
238         *cow_ret = cow;
239         return 0;
240 }
241
242 /*
243  * check if the tree block can be shared by multiple trees
244  */
245 int btrfs_block_can_be_shared(struct btrfs_root *root,
246                               struct extent_buffer *buf)
247 {
248         /*
249          * Tree blocks not in refernece counted trees and tree roots
250          * are never shared. If a block was allocated after the last
251          * snapshot and the block was not allocated by tree relocation,
252          * we know the block is not shared.
253          */
254         if (root->ref_cows &&
255             buf != root->node && buf != root->commit_root &&
256             (btrfs_header_generation(buf) <=
257              btrfs_root_last_snapshot(&root->root_item) ||
258              btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
259                 return 1;
260 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
261         if (root->ref_cows &&
262             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
263                 return 1;
264 #endif
265         return 0;
266 }
267
268 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
269                                        struct btrfs_root *root,
270                                        struct extent_buffer *buf,
271                                        struct extent_buffer *cow,
272                                        int *last_ref)
273 {
274         u64 refs;
275         u64 owner;
276         u64 flags;
277         u64 new_flags = 0;
278         int ret;
279
280         /*
281          * Backrefs update rules:
282          *
283          * Always use full backrefs for extent pointers in tree block
284          * allocated by tree relocation.
285          *
286          * If a shared tree block is no longer referenced by its owner
287          * tree (btrfs_header_owner(buf) == root->root_key.objectid),
288          * use full backrefs for extent pointers in tree block.
289          *
290          * If a tree block is been relocating
291          * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
292          * use full backrefs for extent pointers in tree block.
293          * The reason for this is some operations (such as drop tree)
294          * are only allowed for blocks use full backrefs.
295          */
296
297         if (btrfs_block_can_be_shared(root, buf)) {
298                 ret = btrfs_lookup_extent_info(trans, root, buf->start,
299                                                buf->len, &refs, &flags);
300                 BUG_ON(ret);
301                 BUG_ON(refs == 0);
302         } else {
303                 refs = 1;
304                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
305                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
306                         flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
307                 else
308                         flags = 0;
309         }
310
311         owner = btrfs_header_owner(buf);
312         BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
313                !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
314
315         if (refs > 1) {
316                 if ((owner == root->root_key.objectid ||
317                      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
318                     !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
319                         ret = btrfs_inc_ref(trans, root, buf, 1);
320                         BUG_ON(ret);
321
322                         if (root->root_key.objectid ==
323                             BTRFS_TREE_RELOC_OBJECTID) {
324                                 ret = btrfs_dec_ref(trans, root, buf, 0);
325                                 BUG_ON(ret);
326                                 ret = btrfs_inc_ref(trans, root, cow, 1);
327                                 BUG_ON(ret);
328                         }
329                         new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
330                 } else {
331
332                         if (root->root_key.objectid ==
333                             BTRFS_TREE_RELOC_OBJECTID)
334                                 ret = btrfs_inc_ref(trans, root, cow, 1);
335                         else
336                                 ret = btrfs_inc_ref(trans, root, cow, 0);
337                         BUG_ON(ret);
338                 }
339                 if (new_flags != 0) {
340                         ret = btrfs_set_disk_extent_flags(trans, root,
341                                                           buf->start,
342                                                           buf->len,
343                                                           new_flags, 0);
344                         BUG_ON(ret);
345                 }
346         } else {
347                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
348                         if (root->root_key.objectid ==
349                             BTRFS_TREE_RELOC_OBJECTID)
350                                 ret = btrfs_inc_ref(trans, root, cow, 1);
351                         else
352                                 ret = btrfs_inc_ref(trans, root, cow, 0);
353                         BUG_ON(ret);
354                         ret = btrfs_dec_ref(trans, root, buf, 1);
355                         BUG_ON(ret);
356                 }
357                 clean_tree_block(trans, root, buf);
358                 *last_ref = 1;
359         }
360         return 0;
361 }
362
363 /*
364  * does the dirty work in cow of a single block.  The parent block (if
365  * supplied) is updated to point to the new cow copy.  The new buffer is marked
366  * dirty and returned locked.  If you modify the block it needs to be marked
367  * dirty again.
368  *
369  * search_start -- an allocation hint for the new block
370  *
371  * empty_size -- a hint that you plan on doing more cow.  This is the size in
372  * bytes the allocator should try to find free next to the block it returns.
373  * This is just a hint and may be ignored by the allocator.
374  */
375 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
376                              struct btrfs_root *root,
377                              struct extent_buffer *buf,
378                              struct extent_buffer *parent, int parent_slot,
379                              struct extent_buffer **cow_ret,
380                              u64 search_start, u64 empty_size)
381 {
382         struct btrfs_disk_key disk_key;
383         struct extent_buffer *cow;
384         int level;
385         int last_ref = 0;
386         int unlock_orig = 0;
387         u64 parent_start;
388
389         if (*cow_ret == buf)
390                 unlock_orig = 1;
391
392         btrfs_assert_tree_locked(buf);
393
394         WARN_ON(root->ref_cows && trans->transid !=
395                 root->fs_info->running_transaction->transid);
396         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
397
398         level = btrfs_header_level(buf);
399
400         if (level == 0)
401                 btrfs_item_key(buf, &disk_key, 0);
402         else
403                 btrfs_node_key(buf, &disk_key, 0);
404
405         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
406                 if (parent)
407                         parent_start = parent->start;
408                 else
409                         parent_start = 0;
410         } else
411                 parent_start = 0;
412
413         cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
414                                      root->root_key.objectid, &disk_key,
415                                      level, search_start, empty_size);
416         if (IS_ERR(cow))
417                 return PTR_ERR(cow);
418
419         /* cow is set to blocking by btrfs_init_new_buffer */
420
421         copy_extent_buffer(cow, buf, 0, 0, cow->len);
422         btrfs_set_header_bytenr(cow, cow->start);
423         btrfs_set_header_generation(cow, trans->transid);
424         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
425         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
426                                      BTRFS_HEADER_FLAG_RELOC);
427         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
428                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
429         else
430                 btrfs_set_header_owner(cow, root->root_key.objectid);
431
432         write_extent_buffer(cow, root->fs_info->fsid,
433                             (unsigned long)btrfs_header_fsid(cow),
434                             BTRFS_FSID_SIZE);
435
436         update_ref_for_cow(trans, root, buf, cow, &last_ref);
437
438         if (root->ref_cows)
439                 btrfs_reloc_cow_block(trans, root, buf, cow);
440
441         if (buf == root->node) {
442                 WARN_ON(parent && parent != buf);
443                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445                         parent_start = buf->start;
446                 else
447                         parent_start = 0;
448
449                 extent_buffer_get(cow);
450                 rcu_assign_pointer(root->node, cow);
451
452                 btrfs_free_tree_block(trans, root, buf, parent_start,
453                                       last_ref);
454                 free_extent_buffer(buf);
455                 add_root_to_dirty_list(root);
456         } else {
457                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
458                         parent_start = parent->start;
459                 else
460                         parent_start = 0;
461
462                 WARN_ON(trans->transid != btrfs_header_generation(parent));
463                 btrfs_set_node_blockptr(parent, parent_slot,
464                                         cow->start);
465                 btrfs_set_node_ptr_generation(parent, parent_slot,
466                                               trans->transid);
467                 btrfs_mark_buffer_dirty(parent);
468                 btrfs_free_tree_block(trans, root, buf, parent_start,
469                                       last_ref);
470         }
471         if (unlock_orig)
472                 btrfs_tree_unlock(buf);
473         free_extent_buffer(buf);
474         btrfs_mark_buffer_dirty(cow);
475         *cow_ret = cow;
476         return 0;
477 }
478
479 static inline int should_cow_block(struct btrfs_trans_handle *trans,
480                                    struct btrfs_root *root,
481                                    struct extent_buffer *buf)
482 {
483         if (btrfs_header_generation(buf) == trans->transid &&
484             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
485             !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
486               btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
487                 return 0;
488         return 1;
489 }
490
491 /*
492  * cows a single block, see __btrfs_cow_block for the real work.
493  * This version of it has extra checks so that a block isn't cow'd more than
494  * once per transaction, as long as it hasn't been written yet
495  */
496 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
497                     struct btrfs_root *root, struct extent_buffer *buf,
498                     struct extent_buffer *parent, int parent_slot,
499                     struct extent_buffer **cow_ret)
500 {
501         u64 search_start;
502         int ret;
503
504         if (trans->transaction != root->fs_info->running_transaction) {
505                 printk(KERN_CRIT "trans %llu running %llu\n",
506                        (unsigned long long)trans->transid,
507                        (unsigned long long)
508                        root->fs_info->running_transaction->transid);
509                 WARN_ON(1);
510         }
511         if (trans->transid != root->fs_info->generation) {
512                 printk(KERN_CRIT "trans %llu running %llu\n",
513                        (unsigned long long)trans->transid,
514                        (unsigned long long)root->fs_info->generation);
515                 WARN_ON(1);
516         }
517
518         if (!should_cow_block(trans, root, buf)) {
519                 *cow_ret = buf;
520                 return 0;
521         }
522
523         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
524
525         if (parent)
526                 btrfs_set_lock_blocking(parent);
527         btrfs_set_lock_blocking(buf);
528
529         ret = __btrfs_cow_block(trans, root, buf, parent,
530                                  parent_slot, cow_ret, search_start, 0);
531
532         trace_btrfs_cow_block(root, buf, *cow_ret);
533
534         return ret;
535 }
536
537 /*
538  * helper function for defrag to decide if two blocks pointed to by a
539  * node are actually close by
540  */
541 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
542 {
543         if (blocknr < other && other - (blocknr + blocksize) < 32768)
544                 return 1;
545         if (blocknr > other && blocknr - (other + blocksize) < 32768)
546                 return 1;
547         return 0;
548 }
549
550 /*
551  * compare two keys in a memcmp fashion
552  */
553 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
554 {
555         struct btrfs_key k1;
556
557         btrfs_disk_key_to_cpu(&k1, disk);
558
559         return btrfs_comp_cpu_keys(&k1, k2);
560 }
561
562 /*
563  * same as comp_keys only with two btrfs_key's
564  */
565 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
566 {
567         if (k1->objectid > k2->objectid)
568                 return 1;
569         if (k1->objectid < k2->objectid)
570                 return -1;
571         if (k1->type > k2->type)
572                 return 1;
573         if (k1->type < k2->type)
574                 return -1;
575         if (k1->offset > k2->offset)
576                 return 1;
577         if (k1->offset < k2->offset)
578                 return -1;
579         return 0;
580 }
581
582 /*
583  * this is used by the defrag code to go through all the
584  * leaves pointed to by a node and reallocate them so that
585  * disk order is close to key order
586  */
587 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
588                        struct btrfs_root *root, struct extent_buffer *parent,
589                        int start_slot, int cache_only, u64 *last_ret,
590                        struct btrfs_key *progress)
591 {
592         struct extent_buffer *cur;
593         u64 blocknr;
594         u64 gen;
595         u64 search_start = *last_ret;
596         u64 last_block = 0;
597         u64 other;
598         u32 parent_nritems;
599         int end_slot;
600         int i;
601         int err = 0;
602         int parent_level;
603         int uptodate;
604         u32 blocksize;
605         int progress_passed = 0;
606         struct btrfs_disk_key disk_key;
607
608         parent_level = btrfs_header_level(parent);
609         if (cache_only && parent_level != 1)
610                 return 0;
611
612         if (trans->transaction != root->fs_info->running_transaction)
613                 WARN_ON(1);
614         if (trans->transid != root->fs_info->generation)
615                 WARN_ON(1);
616
617         parent_nritems = btrfs_header_nritems(parent);
618         blocksize = btrfs_level_size(root, parent_level - 1);
619         end_slot = parent_nritems;
620
621         if (parent_nritems == 1)
622                 return 0;
623
624         btrfs_set_lock_blocking(parent);
625
626         for (i = start_slot; i < end_slot; i++) {
627                 int close = 1;
628
629                 btrfs_node_key(parent, &disk_key, i);
630                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
631                         continue;
632
633                 progress_passed = 1;
634                 blocknr = btrfs_node_blockptr(parent, i);
635                 gen = btrfs_node_ptr_generation(parent, i);
636                 if (last_block == 0)
637                         last_block = blocknr;
638
639                 if (i > 0) {
640                         other = btrfs_node_blockptr(parent, i - 1);
641                         close = close_blocks(blocknr, other, blocksize);
642                 }
643                 if (!close && i < end_slot - 2) {
644                         other = btrfs_node_blockptr(parent, i + 1);
645                         close = close_blocks(blocknr, other, blocksize);
646                 }
647                 if (close) {
648                         last_block = blocknr;
649                         continue;
650                 }
651
652                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
653                 if (cur)
654                         uptodate = btrfs_buffer_uptodate(cur, gen);
655                 else
656                         uptodate = 0;
657                 if (!cur || !uptodate) {
658                         if (cache_only) {
659                                 free_extent_buffer(cur);
660                                 continue;
661                         }
662                         if (!cur) {
663                                 cur = read_tree_block(root, blocknr,
664                                                          blocksize, gen);
665                                 if (!cur)
666                                         return -EIO;
667                         } else if (!uptodate) {
668                                 btrfs_read_buffer(cur, gen);
669                         }
670                 }
671                 if (search_start == 0)
672                         search_start = last_block;
673
674                 btrfs_tree_lock(cur);
675                 btrfs_set_lock_blocking(cur);
676                 err = __btrfs_cow_block(trans, root, cur, parent, i,
677                                         &cur, search_start,
678                                         min(16 * blocksize,
679                                             (end_slot - i) * blocksize));
680                 if (err) {
681                         btrfs_tree_unlock(cur);
682                         free_extent_buffer(cur);
683                         break;
684                 }
685                 search_start = cur->start;
686                 last_block = cur->start;
687                 *last_ret = search_start;
688                 btrfs_tree_unlock(cur);
689                 free_extent_buffer(cur);
690         }
691         return err;
692 }
693
694 /*
695  * The leaf data grows from end-to-front in the node.
696  * this returns the address of the start of the last item,
697  * which is the stop of the leaf data stack
698  */
699 static inline unsigned int leaf_data_end(struct btrfs_root *root,
700                                          struct extent_buffer *leaf)
701 {
702         u32 nr = btrfs_header_nritems(leaf);
703         if (nr == 0)
704                 return BTRFS_LEAF_DATA_SIZE(root);
705         return btrfs_item_offset_nr(leaf, nr - 1);
706 }
707
708
709 /*
710  * search for key in the extent_buffer.  The items start at offset p,
711  * and they are item_size apart.  There are 'max' items in p.
712  *
713  * the slot in the array is returned via slot, and it points to
714  * the place where you would insert key if it is not found in
715  * the array.
716  *
717  * slot may point to max if the key is bigger than all of the keys
718  */
719 static noinline int generic_bin_search(struct extent_buffer *eb,
720                                        unsigned long p,
721                                        int item_size, struct btrfs_key *key,
722                                        int max, int *slot)
723 {
724         int low = 0;
725         int high = max;
726         int mid;
727         int ret;
728         struct btrfs_disk_key *tmp = NULL;
729         struct btrfs_disk_key unaligned;
730         unsigned long offset;
731         char *kaddr = NULL;
732         unsigned long map_start = 0;
733         unsigned long map_len = 0;
734         int err;
735
736         while (low < high) {
737                 mid = (low + high) / 2;
738                 offset = p + mid * item_size;
739
740                 if (!kaddr || offset < map_start ||
741                     (offset + sizeof(struct btrfs_disk_key)) >
742                     map_start + map_len) {
743
744                         err = map_private_extent_buffer(eb, offset,
745                                                 sizeof(struct btrfs_disk_key),
746                                                 &kaddr, &map_start, &map_len);
747
748                         if (!err) {
749                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
750                                                         map_start);
751                         } else {
752                                 read_extent_buffer(eb, &unaligned,
753                                                    offset, sizeof(unaligned));
754                                 tmp = &unaligned;
755                         }
756
757                 } else {
758                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
759                                                         map_start);
760                 }
761                 ret = comp_keys(tmp, key);
762
763                 if (ret < 0)
764                         low = mid + 1;
765                 else if (ret > 0)
766                         high = mid;
767                 else {
768                         *slot = mid;
769                         return 0;
770                 }
771         }
772         *slot = low;
773         return 1;
774 }
775
776 /*
777  * simple bin_search frontend that does the right thing for
778  * leaves vs nodes
779  */
780 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
781                       int level, int *slot)
782 {
783         if (level == 0) {
784                 return generic_bin_search(eb,
785                                           offsetof(struct btrfs_leaf, items),
786                                           sizeof(struct btrfs_item),
787                                           key, btrfs_header_nritems(eb),
788                                           slot);
789         } else {
790                 return generic_bin_search(eb,
791                                           offsetof(struct btrfs_node, ptrs),
792                                           sizeof(struct btrfs_key_ptr),
793                                           key, btrfs_header_nritems(eb),
794                                           slot);
795         }
796         return -1;
797 }
798
799 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
800                      int level, int *slot)
801 {
802         return bin_search(eb, key, level, slot);
803 }
804
805 static void root_add_used(struct btrfs_root *root, u32 size)
806 {
807         spin_lock(&root->accounting_lock);
808         btrfs_set_root_used(&root->root_item,
809                             btrfs_root_used(&root->root_item) + size);
810         spin_unlock(&root->accounting_lock);
811 }
812
813 static void root_sub_used(struct btrfs_root *root, u32 size)
814 {
815         spin_lock(&root->accounting_lock);
816         btrfs_set_root_used(&root->root_item,
817                             btrfs_root_used(&root->root_item) - size);
818         spin_unlock(&root->accounting_lock);
819 }
820
821 /* given a node and slot number, this reads the blocks it points to.  The
822  * extent buffer is returned with a reference taken (but unlocked).
823  * NULL is returned on error.
824  */
825 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
826                                    struct extent_buffer *parent, int slot)
827 {
828         int level = btrfs_header_level(parent);
829         if (slot < 0)
830                 return NULL;
831         if (slot >= btrfs_header_nritems(parent))
832                 return NULL;
833
834         BUG_ON(level == 0);
835
836         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
837                        btrfs_level_size(root, level - 1),
838                        btrfs_node_ptr_generation(parent, slot));
839 }
840
841 /*
842  * node level balancing, used to make sure nodes are in proper order for
843  * item deletion.  We balance from the top down, so we have to make sure
844  * that a deletion won't leave an node completely empty later on.
845  */
846 static noinline int balance_level(struct btrfs_trans_handle *trans,
847                          struct btrfs_root *root,
848                          struct btrfs_path *path, int level)
849 {
850         struct extent_buffer *right = NULL;
851         struct extent_buffer *mid;
852         struct extent_buffer *left = NULL;
853         struct extent_buffer *parent = NULL;
854         int ret = 0;
855         int wret;
856         int pslot;
857         int orig_slot = path->slots[level];
858         u64 orig_ptr;
859
860         if (level == 0)
861                 return 0;
862
863         mid = path->nodes[level];
864
865         WARN_ON(!path->locks[level]);
866         WARN_ON(btrfs_header_generation(mid) != trans->transid);
867
868         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
869
870         if (level < BTRFS_MAX_LEVEL - 1)
871                 parent = path->nodes[level + 1];
872         pslot = path->slots[level + 1];
873
874         /*
875          * deal with the case where there is only one pointer in the root
876          * by promoting the node below to a root
877          */
878         if (!parent) {
879                 struct extent_buffer *child;
880
881                 if (btrfs_header_nritems(mid) != 1)
882                         return 0;
883
884                 /* promote the child to a root */
885                 child = read_node_slot(root, mid, 0);
886                 BUG_ON(!child);
887                 btrfs_tree_lock(child);
888                 btrfs_set_lock_blocking(child);
889                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
890                 if (ret) {
891                         btrfs_tree_unlock(child);
892                         free_extent_buffer(child);
893                         goto enospc;
894                 }
895
896                 rcu_assign_pointer(root->node, child);
897
898                 add_root_to_dirty_list(root);
899                 btrfs_tree_unlock(child);
900
901                 path->locks[level] = 0;
902                 path->nodes[level] = NULL;
903                 clean_tree_block(trans, root, mid);
904                 btrfs_tree_unlock(mid);
905                 /* once for the path */
906                 free_extent_buffer(mid);
907
908                 root_sub_used(root, mid->len);
909                 btrfs_free_tree_block(trans, root, mid, 0, 1);
910                 /* once for the root ptr */
911                 free_extent_buffer(mid);
912                 return 0;
913         }
914         if (btrfs_header_nritems(mid) >
915             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
916                 return 0;
917
918         btrfs_header_nritems(mid);
919
920         left = read_node_slot(root, parent, pslot - 1);
921         if (left) {
922                 btrfs_tree_lock(left);
923                 btrfs_set_lock_blocking(left);
924                 wret = btrfs_cow_block(trans, root, left,
925                                        parent, pslot - 1, &left);
926                 if (wret) {
927                         ret = wret;
928                         goto enospc;
929                 }
930         }
931         right = read_node_slot(root, parent, pslot + 1);
932         if (right) {
933                 btrfs_tree_lock(right);
934                 btrfs_set_lock_blocking(right);
935                 wret = btrfs_cow_block(trans, root, right,
936                                        parent, pslot + 1, &right);
937                 if (wret) {
938                         ret = wret;
939                         goto enospc;
940                 }
941         }
942
943         /* first, try to make some room in the middle buffer */
944         if (left) {
945                 orig_slot += btrfs_header_nritems(left);
946                 wret = push_node_left(trans, root, left, mid, 1);
947                 if (wret < 0)
948                         ret = wret;
949                 btrfs_header_nritems(mid);
950         }
951
952         /*
953          * then try to empty the right most buffer into the middle
954          */
955         if (right) {
956                 wret = push_node_left(trans, root, mid, right, 1);
957                 if (wret < 0 && wret != -ENOSPC)
958                         ret = wret;
959                 if (btrfs_header_nritems(right) == 0) {
960                         clean_tree_block(trans, root, right);
961                         btrfs_tree_unlock(right);
962                         wret = del_ptr(trans, root, path, level + 1, pslot +
963                                        1);
964                         if (wret)
965                                 ret = wret;
966                         root_sub_used(root, right->len);
967                         btrfs_free_tree_block(trans, root, right, 0, 1);
968                         free_extent_buffer(right);
969                         right = NULL;
970                 } else {
971                         struct btrfs_disk_key right_key;
972                         btrfs_node_key(right, &right_key, 0);
973                         btrfs_set_node_key(parent, &right_key, pslot + 1);
974                         btrfs_mark_buffer_dirty(parent);
975                 }
976         }
977         if (btrfs_header_nritems(mid) == 1) {
978                 /*
979                  * we're not allowed to leave a node with one item in the
980                  * tree during a delete.  A deletion from lower in the tree
981                  * could try to delete the only pointer in this node.
982                  * So, pull some keys from the left.
983                  * There has to be a left pointer at this point because
984                  * otherwise we would have pulled some pointers from the
985                  * right
986                  */
987                 BUG_ON(!left);
988                 wret = balance_node_right(trans, root, mid, left);
989                 if (wret < 0) {
990                         ret = wret;
991                         goto enospc;
992                 }
993                 if (wret == 1) {
994                         wret = push_node_left(trans, root, left, mid, 1);
995                         if (wret < 0)
996                                 ret = wret;
997                 }
998                 BUG_ON(wret == 1);
999         }
1000         if (btrfs_header_nritems(mid) == 0) {
1001                 clean_tree_block(trans, root, mid);
1002                 btrfs_tree_unlock(mid);
1003                 wret = del_ptr(trans, root, path, level + 1, pslot);
1004                 if (wret)
1005                         ret = wret;
1006                 root_sub_used(root, mid->len);
1007                 btrfs_free_tree_block(trans, root, mid, 0, 1);
1008                 free_extent_buffer(mid);
1009                 mid = NULL;
1010         } else {
1011                 /* update the parent key to reflect our changes */
1012                 struct btrfs_disk_key mid_key;
1013                 btrfs_node_key(mid, &mid_key, 0);
1014                 btrfs_set_node_key(parent, &mid_key, pslot);
1015                 btrfs_mark_buffer_dirty(parent);
1016         }
1017
1018         /* update the path */
1019         if (left) {
1020                 if (btrfs_header_nritems(left) > orig_slot) {
1021                         extent_buffer_get(left);
1022                         /* left was locked after cow */
1023                         path->nodes[level] = left;
1024                         path->slots[level + 1] -= 1;
1025                         path->slots[level] = orig_slot;
1026                         if (mid) {
1027                                 btrfs_tree_unlock(mid);
1028                                 free_extent_buffer(mid);
1029                         }
1030                 } else {
1031                         orig_slot -= btrfs_header_nritems(left);
1032                         path->slots[level] = orig_slot;
1033                 }
1034         }
1035         /* double check we haven't messed things up */
1036         if (orig_ptr !=
1037             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1038                 BUG();
1039 enospc:
1040         if (right) {
1041                 btrfs_tree_unlock(right);
1042                 free_extent_buffer(right);
1043         }
1044         if (left) {
1045                 if (path->nodes[level] != left)
1046                         btrfs_tree_unlock(left);
1047                 free_extent_buffer(left);
1048         }
1049         return ret;
1050 }
1051
1052 /* Node balancing for insertion.  Here we only split or push nodes around
1053  * when they are completely full.  This is also done top down, so we
1054  * have to be pessimistic.
1055  */
1056 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1057                                           struct btrfs_root *root,
1058                                           struct btrfs_path *path, int level)
1059 {
1060         struct extent_buffer *right = NULL;
1061         struct extent_buffer *mid;
1062         struct extent_buffer *left = NULL;
1063         struct extent_buffer *parent = NULL;
1064         int ret = 0;
1065         int wret;
1066         int pslot;
1067         int orig_slot = path->slots[level];
1068
1069         if (level == 0)
1070                 return 1;
1071
1072         mid = path->nodes[level];
1073         WARN_ON(btrfs_header_generation(mid) != trans->transid);
1074
1075         if (level < BTRFS_MAX_LEVEL - 1)
1076                 parent = path->nodes[level + 1];
1077         pslot = path->slots[level + 1];
1078
1079         if (!parent)
1080                 return 1;
1081
1082         left = read_node_slot(root, parent, pslot - 1);
1083
1084         /* first, try to make some room in the middle buffer */
1085         if (left) {
1086                 u32 left_nr;
1087
1088                 btrfs_tree_lock(left);
1089                 btrfs_set_lock_blocking(left);
1090
1091                 left_nr = btrfs_header_nritems(left);
1092                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1093                         wret = 1;
1094                 } else {
1095                         ret = btrfs_cow_block(trans, root, left, parent,
1096                                               pslot - 1, &left);
1097                         if (ret)
1098                                 wret = 1;
1099                         else {
1100                                 wret = push_node_left(trans, root,
1101                                                       left, mid, 0);
1102                         }
1103                 }
1104                 if (wret < 0)
1105                         ret = wret;
1106                 if (wret == 0) {
1107                         struct btrfs_disk_key disk_key;
1108                         orig_slot += left_nr;
1109                         btrfs_node_key(mid, &disk_key, 0);
1110                         btrfs_set_node_key(parent, &disk_key, pslot);
1111                         btrfs_mark_buffer_dirty(parent);
1112                         if (btrfs_header_nritems(left) > orig_slot) {
1113                                 path->nodes[level] = left;
1114                                 path->slots[level + 1] -= 1;
1115                                 path->slots[level] = orig_slot;
1116                                 btrfs_tree_unlock(mid);
1117                                 free_extent_buffer(mid);
1118                         } else {
1119                                 orig_slot -=
1120                                         btrfs_header_nritems(left);
1121                                 path->slots[level] = orig_slot;
1122                                 btrfs_tree_unlock(left);
1123                                 free_extent_buffer(left);
1124                         }
1125                         return 0;
1126                 }
1127                 btrfs_tree_unlock(left);
1128                 free_extent_buffer(left);
1129         }
1130         right = read_node_slot(root, parent, pslot + 1);
1131
1132         /*
1133          * then try to empty the right most buffer into the middle
1134          */
1135         if (right) {
1136                 u32 right_nr;
1137
1138                 btrfs_tree_lock(right);
1139                 btrfs_set_lock_blocking(right);
1140
1141                 right_nr = btrfs_header_nritems(right);
1142                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1143                         wret = 1;
1144                 } else {
1145                         ret = btrfs_cow_block(trans, root, right,
1146                                               parent, pslot + 1,
1147                                               &right);
1148                         if (ret)
1149                                 wret = 1;
1150                         else {
1151                                 wret = balance_node_right(trans, root,
1152                                                           right, mid);
1153                         }
1154                 }
1155                 if (wret < 0)
1156                         ret = wret;
1157                 if (wret == 0) {
1158                         struct btrfs_disk_key disk_key;
1159
1160                         btrfs_node_key(right, &disk_key, 0);
1161                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1162                         btrfs_mark_buffer_dirty(parent);
1163
1164                         if (btrfs_header_nritems(mid) <= orig_slot) {
1165                                 path->nodes[level] = right;
1166                                 path->slots[level + 1] += 1;
1167                                 path->slots[level] = orig_slot -
1168                                         btrfs_header_nritems(mid);
1169                                 btrfs_tree_unlock(mid);
1170                                 free_extent_buffer(mid);
1171                         } else {
1172                                 btrfs_tree_unlock(right);
1173                                 free_extent_buffer(right);
1174                         }
1175                         return 0;
1176                 }
1177                 btrfs_tree_unlock(right);
1178                 free_extent_buffer(right);
1179         }
1180         return 1;
1181 }
1182
1183 /*
1184  * readahead one full node of leaves, finding things that are close
1185  * to the block in 'slot', and triggering ra on them.
1186  */
1187 static void reada_for_search(struct btrfs_root *root,
1188                              struct btrfs_path *path,
1189                              int level, int slot, u64 objectid)
1190 {
1191         struct extent_buffer *node;
1192         struct btrfs_disk_key disk_key;
1193         u32 nritems;
1194         u64 search;
1195         u64 target;
1196         u64 nread = 0;
1197         u64 gen;
1198         int direction = path->reada;
1199         struct extent_buffer *eb;
1200         u32 nr;
1201         u32 blocksize;
1202         u32 nscan = 0;
1203
1204         if (level != 1)
1205                 return;
1206
1207         if (!path->nodes[level])
1208                 return;
1209
1210         node = path->nodes[level];
1211
1212         search = btrfs_node_blockptr(node, slot);
1213         blocksize = btrfs_level_size(root, level - 1);
1214         eb = btrfs_find_tree_block(root, search, blocksize);
1215         if (eb) {
1216                 free_extent_buffer(eb);
1217                 return;
1218         }
1219
1220         target = search;
1221
1222         nritems = btrfs_header_nritems(node);
1223         nr = slot;
1224
1225         while (1) {
1226                 if (direction < 0) {
1227                         if (nr == 0)
1228                                 break;
1229                         nr--;
1230                 } else if (direction > 0) {
1231                         nr++;
1232                         if (nr >= nritems)
1233                                 break;
1234                 }
1235                 if (path->reada < 0 && objectid) {
1236                         btrfs_node_key(node, &disk_key, nr);
1237                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1238                                 break;
1239                 }
1240                 search = btrfs_node_blockptr(node, nr);
1241                 if ((search <= target && target - search <= 65536) ||
1242                     (search > target && search - target <= 65536)) {
1243                         gen = btrfs_node_ptr_generation(node, nr);
1244                         readahead_tree_block(root, search, blocksize, gen);
1245                         nread += blocksize;
1246                 }
1247                 nscan++;
1248                 if ((nread > 65536 || nscan > 32))
1249                         break;
1250         }
1251 }
1252
1253 /*
1254  * returns -EAGAIN if it had to drop the path, or zero if everything was in
1255  * cache
1256  */
1257 static noinline int reada_for_balance(struct btrfs_root *root,
1258                                       struct btrfs_path *path, int level)
1259 {
1260         int slot;
1261         int nritems;
1262         struct extent_buffer *parent;
1263         struct extent_buffer *eb;
1264         u64 gen;
1265         u64 block1 = 0;
1266         u64 block2 = 0;
1267         int ret = 0;
1268         int blocksize;
1269
1270         parent = path->nodes[level + 1];
1271         if (!parent)
1272                 return 0;
1273
1274         nritems = btrfs_header_nritems(parent);
1275         slot = path->slots[level + 1];
1276         blocksize = btrfs_level_size(root, level);
1277
1278         if (slot > 0) {
1279                 block1 = btrfs_node_blockptr(parent, slot - 1);
1280                 gen = btrfs_node_ptr_generation(parent, slot - 1);
1281                 eb = btrfs_find_tree_block(root, block1, blocksize);
1282                 if (eb && btrfs_buffer_uptodate(eb, gen))
1283                         block1 = 0;
1284                 free_extent_buffer(eb);
1285         }
1286         if (slot + 1 < nritems) {
1287                 block2 = btrfs_node_blockptr(parent, slot + 1);
1288                 gen = btrfs_node_ptr_generation(parent, slot + 1);
1289                 eb = btrfs_find_tree_block(root, block2, blocksize);
1290                 if (eb && btrfs_buffer_uptodate(eb, gen))
1291                         block2 = 0;
1292                 free_extent_buffer(eb);
1293         }
1294         if (block1 || block2) {
1295                 ret = -EAGAIN;
1296
1297                 /* release the whole path */
1298                 btrfs_release_path(path);
1299
1300                 /* read the blocks */
1301                 if (block1)
1302                         readahead_tree_block(root, block1, blocksize, 0);
1303                 if (block2)
1304                         readahead_tree_block(root, block2, blocksize, 0);
1305
1306                 if (block1) {
1307                         eb = read_tree_block(root, block1, blocksize, 0);
1308                         free_extent_buffer(eb);
1309                 }
1310                 if (block2) {
1311                         eb = read_tree_block(root, block2, blocksize, 0);
1312                         free_extent_buffer(eb);
1313                 }
1314         }
1315         return ret;
1316 }
1317
1318
1319 /*
1320  * when we walk down the tree, it is usually safe to unlock the higher layers
1321  * in the tree.  The exceptions are when our path goes through slot 0, because
1322  * operations on the tree might require changing key pointers higher up in the
1323  * tree.
1324  *
1325  * callers might also have set path->keep_locks, which tells this code to keep
1326  * the lock if the path points to the last slot in the block.  This is part of
1327  * walking through the tree, and selecting the next slot in the higher block.
1328  *
1329  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1330  * if lowest_unlock is 1, level 0 won't be unlocked
1331  */
1332 static noinline void unlock_up(struct btrfs_path *path, int level,
1333                                int lowest_unlock)
1334 {
1335         int i;
1336         int skip_level = level;
1337         int no_skips = 0;
1338         struct extent_buffer *t;
1339
1340         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1341                 if (!path->nodes[i])
1342                         break;
1343                 if (!path->locks[i])
1344                         break;
1345                 if (!no_skips && path->slots[i] == 0) {
1346                         skip_level = i + 1;
1347                         continue;
1348                 }
1349                 if (!no_skips && path->keep_locks) {
1350                         u32 nritems;
1351                         t = path->nodes[i];
1352                         nritems = btrfs_header_nritems(t);
1353                         if (nritems < 1 || path->slots[i] >= nritems - 1) {
1354                                 skip_level = i + 1;
1355                                 continue;
1356                         }
1357                 }
1358                 if (skip_level < i && i >= lowest_unlock)
1359                         no_skips = 1;
1360
1361                 t = path->nodes[i];
1362                 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1363                         btrfs_tree_unlock(t);
1364                         path->locks[i] = 0;
1365                 }
1366         }
1367 }
1368
1369 /*
1370  * This releases any locks held in the path starting at level and
1371  * going all the way up to the root.
1372  *
1373  * btrfs_search_slot will keep the lock held on higher nodes in a few
1374  * corner cases, such as COW of the block at slot zero in the node.  This
1375  * ignores those rules, and it should only be called when there are no
1376  * more updates to be done higher up in the tree.
1377  */
1378 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1379 {
1380         int i;
1381
1382         if (path->keep_locks)
1383                 return;
1384
1385         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1386                 if (!path->nodes[i])
1387                         continue;
1388                 if (!path->locks[i])
1389                         continue;
1390                 btrfs_tree_unlock(path->nodes[i]);
1391                 path->locks[i] = 0;
1392         }
1393 }
1394
1395 /*
1396  * helper function for btrfs_search_slot.  The goal is to find a block
1397  * in cache without setting the path to blocking.  If we find the block
1398  * we return zero and the path is unchanged.
1399  *
1400  * If we can't find the block, we set the path blocking and do some
1401  * reada.  -EAGAIN is returned and the search must be repeated.
1402  */
1403 static int
1404 read_block_for_search(struct btrfs_trans_handle *trans,
1405                        struct btrfs_root *root, struct btrfs_path *p,
1406                        struct extent_buffer **eb_ret, int level, int slot,
1407                        struct btrfs_key *key)
1408 {
1409         u64 blocknr;
1410         u64 gen;
1411         u32 blocksize;
1412         struct extent_buffer *b = *eb_ret;
1413         struct extent_buffer *tmp;
1414         int ret;
1415
1416         blocknr = btrfs_node_blockptr(b, slot);
1417         gen = btrfs_node_ptr_generation(b, slot);
1418         blocksize = btrfs_level_size(root, level - 1);
1419
1420         tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1421         if (tmp) {
1422                 if (btrfs_buffer_uptodate(tmp, 0)) {
1423                         if (btrfs_buffer_uptodate(tmp, gen)) {
1424                                 /*
1425                                  * we found an up to date block without
1426                                  * sleeping, return
1427                                  * right away
1428                                  */
1429                                 *eb_ret = tmp;
1430                                 return 0;
1431                         }
1432                         /* the pages were up to date, but we failed
1433                          * the generation number check.  Do a full
1434                          * read for the generation number that is correct.
1435                          * We must do this without dropping locks so
1436                          * we can trust our generation number
1437                          */
1438                         free_extent_buffer(tmp);
1439                         tmp = read_tree_block(root, blocknr, blocksize, gen);
1440                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1441                                 *eb_ret = tmp;
1442                                 return 0;
1443                         }
1444                         free_extent_buffer(tmp);
1445                         btrfs_release_path(p);
1446                         return -EIO;
1447                 }
1448         }
1449
1450         /*
1451          * reduce lock contention at high levels
1452          * of the btree by dropping locks before
1453          * we read.  Don't release the lock on the current
1454          * level because we need to walk this node to figure
1455          * out which blocks to read.
1456          */
1457         btrfs_unlock_up_safe(p, level + 1);
1458         btrfs_set_path_blocking(p);
1459
1460         free_extent_buffer(tmp);
1461         if (p->reada)
1462                 reada_for_search(root, p, level, slot, key->objectid);
1463
1464         btrfs_release_path(p);
1465
1466         ret = -EAGAIN;
1467         tmp = read_tree_block(root, blocknr, blocksize, 0);
1468         if (tmp) {
1469                 /*
1470                  * If the read above didn't mark this buffer up to date,
1471                  * it will never end up being up to date.  Set ret to EIO now
1472                  * and give up so that our caller doesn't loop forever
1473                  * on our EAGAINs.
1474                  */
1475                 if (!btrfs_buffer_uptodate(tmp, 0))
1476                         ret = -EIO;
1477                 free_extent_buffer(tmp);
1478         }
1479         return ret;
1480 }
1481
1482 /*
1483  * helper function for btrfs_search_slot.  This does all of the checks
1484  * for node-level blocks and does any balancing required based on
1485  * the ins_len.
1486  *
1487  * If no extra work was required, zero is returned.  If we had to
1488  * drop the path, -EAGAIN is returned and btrfs_search_slot must
1489  * start over
1490  */
1491 static int
1492 setup_nodes_for_search(struct btrfs_trans_handle *trans,
1493                        struct btrfs_root *root, struct btrfs_path *p,
1494                        struct extent_buffer *b, int level, int ins_len)
1495 {
1496         int ret;
1497         if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1498             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1499                 int sret;
1500
1501                 sret = reada_for_balance(root, p, level);
1502                 if (sret)
1503                         goto again;
1504
1505                 btrfs_set_path_blocking(p);
1506                 sret = split_node(trans, root, p, level);
1507                 btrfs_clear_path_blocking(p, NULL);
1508
1509                 BUG_ON(sret > 0);
1510                 if (sret) {
1511                         ret = sret;
1512                         goto done;
1513                 }
1514                 b = p->nodes[level];
1515         } else if (ins_len < 0 && btrfs_header_nritems(b) <
1516                    BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
1517                 int sret;
1518
1519                 sret = reada_for_balance(root, p, level);
1520                 if (sret)
1521                         goto again;
1522
1523                 btrfs_set_path_blocking(p);
1524                 sret = balance_level(trans, root, p, level);
1525                 btrfs_clear_path_blocking(p, NULL);
1526
1527                 if (sret) {
1528                         ret = sret;
1529                         goto done;
1530                 }
1531                 b = p->nodes[level];
1532                 if (!b) {
1533                         btrfs_release_path(p);
1534                         goto again;
1535                 }
1536                 BUG_ON(btrfs_header_nritems(b) == 1);
1537         }
1538         return 0;
1539
1540 again:
1541         ret = -EAGAIN;
1542 done:
1543         return ret;
1544 }
1545
1546 /*
1547  * look for key in the tree.  path is filled in with nodes along the way
1548  * if key is found, we return zero and you can find the item in the leaf
1549  * level of the path (level 0)
1550  *
1551  * If the key isn't found, the path points to the slot where it should
1552  * be inserted, and 1 is returned.  If there are other errors during the
1553  * search a negative error number is returned.
1554  *
1555  * if ins_len > 0, nodes and leaves will be split as we walk down the
1556  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1557  * possible)
1558  */
1559 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1560                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1561                       ins_len, int cow)
1562 {
1563         struct extent_buffer *b;
1564         int slot;
1565         int ret;
1566         int err;
1567         int level;
1568         int lowest_unlock = 1;
1569         u8 lowest_level = 0;
1570
1571         lowest_level = p->lowest_level;
1572         WARN_ON(lowest_level && ins_len > 0);
1573         WARN_ON(p->nodes[0] != NULL);
1574
1575         if (ins_len < 0)
1576                 lowest_unlock = 2;
1577
1578 again:
1579         if (p->search_commit_root) {
1580                 b = root->commit_root;
1581                 extent_buffer_get(b);
1582                 if (!p->skip_locking)
1583                         btrfs_tree_lock(b);
1584         } else {
1585                 if (p->skip_locking)
1586                         b = btrfs_root_node(root);
1587                 else
1588                         b = btrfs_lock_root_node(root);
1589         }
1590
1591         while (b) {
1592                 level = btrfs_header_level(b);
1593
1594                 /*
1595                  * setup the path here so we can release it under lock
1596                  * contention with the cow code
1597                  */
1598                 p->nodes[level] = b;
1599                 if (!p->skip_locking)
1600                         p->locks[level] = 1;
1601
1602                 if (cow) {
1603                         /*
1604                          * if we don't really need to cow this block
1605                          * then we don't want to set the path blocking,
1606                          * so we test it here
1607                          */
1608                         if (!should_cow_block(trans, root, b))
1609                                 goto cow_done;
1610
1611                         btrfs_set_path_blocking(p);
1612
1613                         err = btrfs_cow_block(trans, root, b,
1614                                               p->nodes[level + 1],
1615                                               p->slots[level + 1], &b);
1616                         if (err) {
1617                                 ret = err;
1618                                 goto done;
1619                         }
1620                 }
1621 cow_done:
1622                 BUG_ON(!cow && ins_len);
1623
1624                 p->nodes[level] = b;
1625                 if (!p->skip_locking)
1626                         p->locks[level] = 1;
1627
1628                 btrfs_clear_path_blocking(p, NULL);
1629
1630                 /*
1631                  * we have a lock on b and as long as we aren't changing
1632                  * the tree, there is no way to for the items in b to change.
1633                  * It is safe to drop the lock on our parent before we
1634                  * go through the expensive btree search on b.
1635                  *
1636                  * If cow is true, then we might be changing slot zero,
1637                  * which may require changing the parent.  So, we can't
1638                  * drop the lock until after we know which slot we're
1639                  * operating on.
1640                  */
1641                 if (!cow)
1642                         btrfs_unlock_up_safe(p, level + 1);
1643
1644                 ret = bin_search(b, key, level, &slot);
1645
1646                 if (level != 0) {
1647                         int dec = 0;
1648                         if (ret && slot > 0) {
1649                                 dec = 1;
1650                                 slot -= 1;
1651                         }
1652                         p->slots[level] = slot;
1653                         err = setup_nodes_for_search(trans, root, p, b, level,
1654                                                      ins_len);
1655                         if (err == -EAGAIN)
1656                                 goto again;
1657                         if (err) {
1658                                 ret = err;
1659                                 goto done;
1660                         }
1661                         b = p->nodes[level];
1662                         slot = p->slots[level];
1663
1664                         unlock_up(p, level, lowest_unlock);
1665
1666                         if (level == lowest_level) {
1667                                 if (dec)
1668                                         p->slots[level]++;
1669                                 goto done;
1670                         }
1671
1672                         err = read_block_for_search(trans, root, p,
1673                                                     &b, level, slot, key);
1674                         if (err == -EAGAIN)
1675                                 goto again;
1676                         if (err) {
1677                                 ret = err;
1678                                 goto done;
1679                         }
1680
1681                         if (!p->skip_locking) {
1682                                 btrfs_clear_path_blocking(p, NULL);
1683                                 err = btrfs_try_spin_lock(b);
1684
1685                                 if (!err) {
1686                                         btrfs_set_path_blocking(p);
1687                                         btrfs_tree_lock(b);
1688                                         btrfs_clear_path_blocking(p, b);
1689                                 }
1690                         }
1691                 } else {
1692                         p->slots[level] = slot;
1693                         if (ins_len > 0 &&
1694                             btrfs_leaf_free_space(root, b) < ins_len) {
1695                                 btrfs_set_path_blocking(p);
1696                                 err = split_leaf(trans, root, key,
1697                                                  p, ins_len, ret == 0);
1698                                 btrfs_clear_path_blocking(p, NULL);
1699
1700                                 BUG_ON(err > 0);
1701                                 if (err) {
1702                                         ret = err;
1703                                         goto done;
1704                                 }
1705                         }
1706                         if (!p->search_for_split)
1707                                 unlock_up(p, level, lowest_unlock);
1708                         goto done;
1709                 }
1710         }
1711         ret = 1;
1712 done:
1713         /*
1714          * we don't really know what they plan on doing with the path
1715          * from here on, so for now just mark it as blocking
1716          */
1717         if (!p->leave_spinning)
1718                 btrfs_set_path_blocking(p);
1719         if (ret < 0)
1720                 btrfs_release_path(p);
1721         return ret;
1722 }
1723
1724 /*
1725  * adjust the pointers going up the tree, starting at level
1726  * making sure the right key of each node is points to 'key'.
1727  * This is used after shifting pointers to the left, so it stops
1728  * fixing up pointers when a given leaf/node is not in slot 0 of the
1729  * higher levels
1730  *
1731  * If this fails to write a tree block, it returns -1, but continues
1732  * fixing up the blocks in ram so the tree is consistent.
1733  */
1734 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1735                           struct btrfs_root *root, struct btrfs_path *path,
1736                           struct btrfs_disk_key *key, int level)
1737 {
1738         int i;
1739         int ret = 0;
1740         struct extent_buffer *t;
1741
1742         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1743                 int tslot = path->slots[i];
1744                 if (!path->nodes[i])
1745                         break;
1746                 t = path->nodes[i];
1747                 btrfs_set_node_key(t, key, tslot);
1748                 btrfs_mark_buffer_dirty(path->nodes[i]);
1749                 if (tslot != 0)
1750                         break;
1751         }
1752         return ret;
1753 }
1754
1755 /*
1756  * update item key.
1757  *
1758  * This function isn't completely safe. It's the caller's responsibility
1759  * that the new key won't break the order
1760  */
1761 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1762                             struct btrfs_root *root, struct btrfs_path *path,
1763                             struct btrfs_key *new_key)
1764 {
1765         struct btrfs_disk_key disk_key;
1766         struct extent_buffer *eb;
1767         int slot;
1768
1769         eb = path->nodes[0];
1770         slot = path->slots[0];
1771         if (slot > 0) {
1772                 btrfs_item_key(eb, &disk_key, slot - 1);
1773                 if (comp_keys(&disk_key, new_key) >= 0)
1774                         return -1;
1775         }
1776         if (slot < btrfs_header_nritems(eb) - 1) {
1777                 btrfs_item_key(eb, &disk_key, slot + 1);
1778                 if (comp_keys(&disk_key, new_key) <= 0)
1779                         return -1;
1780         }
1781
1782         btrfs_cpu_key_to_disk(&disk_key, new_key);
1783         btrfs_set_item_key(eb, &disk_key, slot);
1784         btrfs_mark_buffer_dirty(eb);
1785         if (slot == 0)
1786                 fixup_low_keys(trans, root, path, &disk_key, 1);
1787         return 0;
1788 }
1789
1790 /*
1791  * try to push data from one node into the next node left in the
1792  * tree.
1793  *
1794  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1795  * error, and > 0 if there was no room in the left hand block.
1796  */
1797 static int push_node_left(struct btrfs_trans_handle *trans,
1798                           struct btrfs_root *root, struct extent_buffer *dst,
1799                           struct extent_buffer *src, int empty)
1800 {
1801         int push_items = 0;
1802         int src_nritems;
1803         int dst_nritems;
1804         int ret = 0;
1805
1806         src_nritems = btrfs_header_nritems(src);
1807         dst_nritems = btrfs_header_nritems(dst);
1808         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1809         WARN_ON(btrfs_header_generation(src) != trans->transid);
1810         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1811
1812         if (!empty && src_nritems <= 8)
1813                 return 1;
1814
1815         if (push_items <= 0)
1816                 return 1;
1817
1818         if (empty) {
1819                 push_items = min(src_nritems, push_items);
1820                 if (push_items < src_nritems) {
1821                         /* leave at least 8 pointers in the node if
1822                          * we aren't going to empty it
1823                          */
1824                         if (src_nritems - push_items < 8) {
1825                                 if (push_items <= 8)
1826                                         return 1;
1827                                 push_items -= 8;
1828                         }
1829                 }
1830         } else
1831                 push_items = min(src_nritems - 8, push_items);
1832
1833         copy_extent_buffer(dst, src,
1834                            btrfs_node_key_ptr_offset(dst_nritems),
1835                            btrfs_node_key_ptr_offset(0),
1836                            push_items * sizeof(struct btrfs_key_ptr));
1837
1838         if (push_items < src_nritems) {
1839                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1840                                       btrfs_node_key_ptr_offset(push_items),
1841                                       (src_nritems - push_items) *
1842                                       sizeof(struct btrfs_key_ptr));
1843         }
1844         btrfs_set_header_nritems(src, src_nritems - push_items);
1845         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1846         btrfs_mark_buffer_dirty(src);
1847         btrfs_mark_buffer_dirty(dst);
1848
1849         return ret;
1850 }
1851
1852 /*
1853  * try to push data from one node into the next node right in the
1854  * tree.
1855  *
1856  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1857  * error, and > 0 if there was no room in the right hand block.
1858  *
1859  * this will  only push up to 1/2 the contents of the left node over
1860  */
1861 static int balance_node_right(struct btrfs_trans_handle *trans,
1862                               struct btrfs_root *root,
1863                               struct extent_buffer *dst,
1864                               struct extent_buffer *src)
1865 {
1866         int push_items = 0;
1867         int max_push;
1868         int src_nritems;
1869         int dst_nritems;
1870         int ret = 0;
1871
1872         WARN_ON(btrfs_header_generation(src) != trans->transid);
1873         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1874
1875         src_nritems = btrfs_header_nritems(src);
1876         dst_nritems = btrfs_header_nritems(dst);
1877         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1878         if (push_items <= 0)
1879                 return 1;
1880
1881         if (src_nritems < 4)
1882                 return 1;
1883
1884         max_push = src_nritems / 2 + 1;
1885         /* don't try to empty the node */
1886         if (max_push >= src_nritems)
1887                 return 1;
1888
1889         if (max_push < push_items)
1890                 push_items = max_push;
1891
1892         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1893                                       btrfs_node_key_ptr_offset(0),
1894                                       (dst_nritems) *
1895                                       sizeof(struct btrfs_key_ptr));
1896
1897         copy_extent_buffer(dst, src,
1898                            btrfs_node_key_ptr_offset(0),
1899                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1900                            push_items * sizeof(struct btrfs_key_ptr));
1901
1902         btrfs_set_header_nritems(src, src_nritems - push_items);
1903         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1904
1905         btrfs_mark_buffer_dirty(src);
1906         btrfs_mark_buffer_dirty(dst);
1907
1908         return ret;
1909 }
1910
1911 /*
1912  * helper function to insert a new root level in the tree.
1913  * A new node is allocated, and a single item is inserted to
1914  * point to the existing root
1915  *
1916  * returns zero on success or < 0 on failure.
1917  */
1918 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
1919                            struct btrfs_root *root,
1920                            struct btrfs_path *path, int level)
1921 {
1922         u64 lower_gen;
1923         struct extent_buffer *lower;
1924         struct extent_buffer *c;
1925         struct extent_buffer *old;
1926         struct btrfs_disk_key lower_key;
1927
1928         BUG_ON(path->nodes[level]);
1929         BUG_ON(path->nodes[level-1] != root->node);
1930
1931         lower = path->nodes[level-1];
1932         if (level == 1)
1933                 btrfs_item_key(lower, &lower_key, 0);
1934         else
1935                 btrfs_node_key(lower, &lower_key, 0);
1936
1937         c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
1938                                    root->root_key.objectid, &lower_key,
1939                                    level, root->node->start, 0);
1940         if (IS_ERR(c))
1941                 return PTR_ERR(c);
1942
1943         root_add_used(root, root->nodesize);
1944
1945         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
1946         btrfs_set_header_nritems(c, 1);
1947         btrfs_set_header_level(c, level);
1948         btrfs_set_header_bytenr(c, c->start);
1949         btrfs_set_header_generation(c, trans->transid);
1950         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
1951         btrfs_set_header_owner(c, root->root_key.objectid);
1952
1953         write_extent_buffer(c, root->fs_info->fsid,
1954                             (unsigned long)btrfs_header_fsid(c),
1955                             BTRFS_FSID_SIZE);
1956
1957         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1958                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1959                             BTRFS_UUID_SIZE);
1960
1961         btrfs_set_node_key(c, &lower_key, 0);
1962         btrfs_set_node_blockptr(c, 0, lower->start);
1963         lower_gen = btrfs_header_generation(lower);
1964         WARN_ON(lower_gen != trans->transid);
1965
1966         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1967
1968         btrfs_mark_buffer_dirty(c);
1969
1970         old = root->node;
1971         rcu_assign_pointer(root->node, c);
1972
1973         /* the super has an extra ref to root->node */
1974         free_extent_buffer(old);
1975
1976         add_root_to_dirty_list(root);
1977         extent_buffer_get(c);
1978         path->nodes[level] = c;
1979         path->locks[level] = 1;
1980         path->slots[level] = 0;
1981         return 0;
1982 }
1983
1984 /*
1985  * worker function to insert a single pointer in a node.
1986  * the node should have enough room for the pointer already
1987  *
1988  * slot and level indicate where you want the key to go, and
1989  * blocknr is the block the key points to.
1990  *
1991  * returns zero on success and < 0 on any error
1992  */
1993 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1994                       *root, struct btrfs_path *path, struct btrfs_disk_key
1995                       *key, u64 bytenr, int slot, int level)
1996 {
1997         struct extent_buffer *lower;
1998         int nritems;
1999
2000         BUG_ON(!path->nodes[level]);
2001         btrfs_assert_tree_locked(path->nodes[level]);
2002         lower = path->nodes[level];
2003         nritems = btrfs_header_nritems(lower);
2004         BUG_ON(slot > nritems);
2005         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2006                 BUG();
2007         if (slot != nritems) {
2008                 memmove_extent_buffer(lower,
2009                               btrfs_node_key_ptr_offset(slot + 1),
2010                               btrfs_node_key_ptr_offset(slot),
2011                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
2012         }
2013         btrfs_set_node_key(lower, key, slot);
2014         btrfs_set_node_blockptr(lower, slot, bytenr);
2015         WARN_ON(trans->transid == 0);
2016         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2017         btrfs_set_header_nritems(lower, nritems + 1);
2018         btrfs_mark_buffer_dirty(lower);
2019         return 0;
2020 }
2021
2022 /*
2023  * split the node at the specified level in path in two.
2024  * The path is corrected to point to the appropriate node after the split
2025  *
2026  * Before splitting this tries to make some room in the node by pushing
2027  * left and right, if either one works, it returns right away.
2028  *
2029  * returns 0 on success and < 0 on failure
2030  */
2031 static noinline int split_node(struct btrfs_trans_handle *trans,
2032                                struct btrfs_root *root,
2033                                struct btrfs_path *path, int level)
2034 {
2035         struct extent_buffer *c;
2036         struct extent_buffer *split;
2037         struct btrfs_disk_key disk_key;
2038         int mid;
2039         int ret;
2040         int wret;
2041         u32 c_nritems;
2042
2043         c = path->nodes[level];
2044         WARN_ON(btrfs_header_generation(c) != trans->transid);
2045         if (c == root->node) {
2046                 /* trying to split the root, lets make a new one */
2047                 ret = insert_new_root(trans, root, path, level + 1);
2048                 if (ret)
2049                         return ret;
2050         } else {
2051                 ret = push_nodes_for_insert(trans, root, path, level);
2052                 c = path->nodes[level];
2053                 if (!ret && btrfs_header_nritems(c) <
2054                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2055                         return 0;
2056                 if (ret < 0)
2057                         return ret;
2058         }
2059
2060         c_nritems = btrfs_header_nritems(c);
2061         mid = (c_nritems + 1) / 2;
2062         btrfs_node_key(c, &disk_key, mid);
2063
2064         split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2065                                         root->root_key.objectid,
2066                                         &disk_key, level, c->start, 0);
2067         if (IS_ERR(split))
2068                 return PTR_ERR(split);
2069
2070         root_add_used(root, root->nodesize);
2071
2072         memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2073         btrfs_set_header_level(split, btrfs_header_level(c));
2074         btrfs_set_header_bytenr(split, split->start);
2075         btrfs_set_header_generation(split, trans->transid);
2076         btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2077         btrfs_set_header_owner(split, root->root_key.objectid);
2078         write_extent_buffer(split, root->fs_info->fsid,
2079                             (unsigned long)btrfs_header_fsid(split),
2080                             BTRFS_FSID_SIZE);
2081         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2082                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
2083                             BTRFS_UUID_SIZE);
2084
2085
2086         copy_extent_buffer(split, c,
2087                            btrfs_node_key_ptr_offset(0),
2088                            btrfs_node_key_ptr_offset(mid),
2089                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2090         btrfs_set_header_nritems(split, c_nritems - mid);
2091         btrfs_set_header_nritems(c, mid);
2092         ret = 0;
2093
2094         btrfs_mark_buffer_dirty(c);
2095         btrfs_mark_buffer_dirty(split);
2096
2097         wret = insert_ptr(trans, root, path, &disk_key, split->start,
2098                           path->slots[level + 1] + 1,
2099                           level + 1);
2100         if (wret)
2101                 ret = wret;
2102
2103         if (path->slots[level] >= mid) {
2104                 path->slots[level] -= mid;
2105                 btrfs_tree_unlock(c);
2106                 free_extent_buffer(c);
2107                 path->nodes[level] = split;
2108                 path->slots[level + 1] += 1;
2109         } else {
2110                 btrfs_tree_unlock(split);
2111                 free_extent_buffer(split);
2112         }
2113         return ret;
2114 }
2115
2116 /*
2117  * how many bytes are required to store the items in a leaf.  start
2118  * and nr indicate which items in the leaf to check.  This totals up the
2119  * space used both by the item structs and the item data
2120  */
2121 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2122 {
2123         int data_len;
2124         int nritems = btrfs_header_nritems(l);
2125         int end = min(nritems, start + nr) - 1;
2126
2127         if (!nr)
2128                 return 0;
2129         data_len = btrfs_item_end_nr(l, start);
2130         data_len = data_len - btrfs_item_offset_nr(l, end);
2131         data_len += sizeof(struct btrfs_item) * nr;
2132         WARN_ON(data_len < 0);
2133         return data_len;
2134 }
2135
2136 /*
2137  * The space between the end of the leaf items and
2138  * the start of the leaf data.  IOW, how much room
2139  * the leaf has left for both items and data
2140  */
2141 noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2142                                    struct extent_buffer *leaf)
2143 {
2144         int nritems = btrfs_header_nritems(leaf);
2145         int ret;
2146         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2147         if (ret < 0) {
2148                 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2149                        "used %d nritems %d\n",
2150                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2151                        leaf_space_used(leaf, 0, nritems), nritems);
2152         }
2153         return ret;
2154 }
2155
2156 /*
2157  * min slot controls the lowest index we're willing to push to the
2158  * right.  We'll push up to and including min_slot, but no lower
2159  */
2160 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2161                                       struct btrfs_root *root,
2162                                       struct btrfs_path *path,
2163                                       int data_size, int empty,
2164                                       struct extent_buffer *right,
2165                                       int free_space, u32 left_nritems,
2166                                       u32 min_slot)
2167 {
2168         struct extent_buffer *left = path->nodes[0];
2169         struct extent_buffer *upper = path->nodes[1];
2170         struct btrfs_disk_key disk_key;
2171         int slot;
2172         u32 i;
2173         int push_space = 0;
2174         int push_items = 0;
2175         struct btrfs_item *item;
2176         u32 nr;
2177         u32 right_nritems;
2178         u32 data_end;
2179         u32 this_item_size;
2180
2181         if (empty)
2182                 nr = 0;
2183         else
2184                 nr = max_t(u32, 1, min_slot);
2185
2186         if (path->slots[0] >= left_nritems)
2187                 push_space += data_size;
2188
2189         slot = path->slots[1];
2190         i = left_nritems - 1;
2191         while (i >= nr) {
2192                 item = btrfs_item_nr(left, i);
2193
2194                 if (!empty && push_items > 0) {
2195                         if (path->slots[0] > i)
2196                                 break;
2197                         if (path->slots[0] == i) {
2198                                 int space = btrfs_leaf_free_space(root, left);
2199                                 if (space + push_space * 2 > free_space)
2200                                         break;
2201                         }
2202                 }
2203
2204                 if (path->slots[0] == i)
2205                         push_space += data_size;
2206
2207                 this_item_size = btrfs_item_size(left, item);
2208                 if (this_item_size + sizeof(*item) + push_space > free_space)
2209                         break;
2210
2211                 push_items++;
2212                 push_space += this_item_size + sizeof(*item);
2213                 if (i == 0)
2214                         break;
2215                 i--;
2216         }
2217
2218         if (push_items == 0)
2219                 goto out_unlock;
2220
2221         if (!empty && push_items == left_nritems)
2222                 WARN_ON(1);
2223
2224         /* push left to right */
2225         right_nritems = btrfs_header_nritems(right);
2226
2227         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2228         push_space -= leaf_data_end(root, left);
2229
2230         /* make room in the right data area */
2231         data_end = leaf_data_end(root, right);
2232         memmove_extent_buffer(right,
2233                               btrfs_leaf_data(right) + data_end - push_space,
2234                               btrfs_leaf_data(right) + data_end,
2235                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
2236
2237         /* copy from the left data area */
2238         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2239                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2240                      btrfs_leaf_data(left) + leaf_data_end(root, left),
2241                      push_space);
2242
2243         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2244                               btrfs_item_nr_offset(0),
2245                               right_nritems * sizeof(struct btrfs_item));
2246
2247         /* copy the items from left to right */
2248         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2249                    btrfs_item_nr_offset(left_nritems - push_items),
2250                    push_items * sizeof(struct btrfs_item));
2251
2252         /* update the item pointers */
2253         right_nritems += push_items;
2254         btrfs_set_header_nritems(right, right_nritems);
2255         push_space = BTRFS_LEAF_DATA_SIZE(root);
2256         for (i = 0; i < right_nritems; i++) {
2257                 item = btrfs_item_nr(right, i);
2258                 push_space -= btrfs_item_size(right, item);
2259                 btrfs_set_item_offset(right, item, push_space);
2260         }
2261
2262         left_nritems -= push_items;
2263         btrfs_set_header_nritems(left, left_nritems);
2264
2265         if (left_nritems)
2266                 btrfs_mark_buffer_dirty(left);
2267         else
2268                 clean_tree_block(trans, root, left);
2269
2270         btrfs_mark_buffer_dirty(right);
2271
2272         btrfs_item_key(right, &disk_key, 0);
2273         btrfs_set_node_key(upper, &disk_key, slot + 1);
2274         btrfs_mark_buffer_dirty(upper);
2275
2276         /* then fixup the leaf pointer in the path */
2277         if (path->slots[0] >= left_nritems) {
2278                 path->slots[0] -= left_nritems;
2279                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2280                         clean_tree_block(trans, root, path->nodes[0]);
2281                 btrfs_tree_unlock(path->nodes[0]);
2282                 free_extent_buffer(path->nodes[0]);
2283                 path->nodes[0] = right;
2284                 path->slots[1] += 1;
2285         } else {
2286                 btrfs_tree_unlock(right);
2287                 free_extent_buffer(right);
2288         }
2289         return 0;
2290
2291 out_unlock:
2292         btrfs_tree_unlock(right);
2293         free_extent_buffer(right);
2294         return 1;
2295 }
2296
2297 /*
2298  * push some data in the path leaf to the right, trying to free up at
2299  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2300  *
2301  * returns 1 if the push failed because the other node didn't have enough
2302  * room, 0 if everything worked out and < 0 if there were major errors.
2303  *
2304  * this will push starting from min_slot to the end of the leaf.  It won't
2305  * push any slot lower than min_slot
2306  */
2307 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2308                            *root, struct btrfs_path *path,
2309                            int min_data_size, int data_size,
2310                            int empty, u32 min_slot)
2311 {
2312         struct extent_buffer *left = path->nodes[0];
2313         struct extent_buffer *right;
2314         struct extent_buffer *upper;
2315         int slot;
2316         int free_space;
2317         u32 left_nritems;
2318         int ret;
2319
2320         if (!path->nodes[1])
2321                 return 1;
2322
2323         slot = path->slots[1];
2324         upper = path->nodes[1];
2325         if (slot >= btrfs_header_nritems(upper) - 1)
2326                 return 1;
2327
2328         btrfs_assert_tree_locked(path->nodes[1]);
2329
2330         right = read_node_slot(root, upper, slot + 1);
2331         if (right == NULL)
2332                 return 1;
2333
2334         btrfs_tree_lock(right);
2335         btrfs_set_lock_blocking(right);
2336
2337         free_space = btrfs_leaf_free_space(root, right);
2338         if (free_space < data_size)
2339                 goto out_unlock;
2340
2341         /* cow and double check */
2342         ret = btrfs_cow_block(trans, root, right, upper,
2343                               slot + 1, &right);
2344         if (ret)
2345                 goto out_unlock;
2346
2347         free_space = btrfs_leaf_free_space(root, right);
2348         if (free_space < data_size)
2349                 goto out_unlock;
2350
2351         left_nritems = btrfs_header_nritems(left);
2352         if (left_nritems == 0)
2353                 goto out_unlock;
2354
2355         return __push_leaf_right(trans, root, path, min_data_size, empty,
2356                                 right, free_space, left_nritems, min_slot);
2357 out_unlock:
2358         btrfs_tree_unlock(right);
2359         free_extent_buffer(right);
2360         return 1;
2361 }
2362
2363 /*
2364  * push some data in the path leaf to the left, trying to free up at
2365  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2366  *
2367  * max_slot can put a limit on how far into the leaf we'll push items.  The
2368  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
2369  * items
2370  */
2371 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2372                                      struct btrfs_root *root,
2373                                      struct btrfs_path *path, int data_size,
2374                                      int empty, struct extent_buffer *left,
2375                                      int free_space, u32 right_nritems,
2376                                      u32 max_slot)
2377 {
2378         struct btrfs_disk_key disk_key;
2379         struct extent_buffer *right = path->nodes[0];
2380         int i;
2381         int push_space = 0;
2382         int push_items = 0;
2383         struct btrfs_item *item;
2384         u32 old_left_nritems;
2385         u32 nr;
2386         int ret = 0;
2387         int wret;
2388         u32 this_item_size;
2389         u32 old_left_item_size;
2390
2391         if (empty)
2392                 nr = min(right_nritems, max_slot);
2393         else
2394                 nr = min(right_nritems - 1, max_slot);
2395
2396         for (i = 0; i < nr; i++) {
2397                 item = btrfs_item_nr(right, i);
2398
2399                 if (!empty && push_items > 0) {
2400                         if (path->slots[0] < i)
2401                                 break;
2402                         if (path->slots[0] == i) {
2403                                 int space = btrfs_leaf_free_space(root, right);
2404                                 if (space + push_space * 2 > free_space)
2405                                         break;
2406                         }
2407                 }
2408
2409                 if (path->slots[0] == i)
2410                         push_space += data_size;
2411
2412                 this_item_size = btrfs_item_size(right, item);
2413                 if (this_item_size + sizeof(*item) + push_space > free_space)
2414                         break;
2415
2416                 push_items++;
2417                 push_space += this_item_size + sizeof(*item);
2418         }
2419
2420         if (push_items == 0) {
2421                 ret = 1;
2422                 goto out;
2423         }
2424         if (!empty && push_items == btrfs_header_nritems(right))
2425                 WARN_ON(1);
2426
2427         /* push data from right to left */
2428         copy_extent_buffer(left, right,
2429                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
2430                            btrfs_item_nr_offset(0),
2431                            push_items * sizeof(struct btrfs_item));
2432
2433         push_space = BTRFS_LEAF_DATA_SIZE(root) -
2434                      btrfs_item_offset_nr(right, push_items - 1);
2435
2436         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2437                      leaf_data_end(root, left) - push_space,
2438                      btrfs_leaf_data(right) +
2439                      btrfs_item_offset_nr(right, push_items - 1),
2440                      push_space);
2441         old_left_nritems = btrfs_header_nritems(left);
2442         BUG_ON(old_left_nritems <= 0);
2443
2444         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2445         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2446                 u32 ioff;
2447
2448                 item = btrfs_item_nr(left, i);
2449
2450                 ioff = btrfs_item_offset(left, item);
2451                 btrfs_set_item_offset(left, item,
2452                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2453         }
2454         btrfs_set_header_nritems(left, old_left_nritems + push_items);
2455
2456         /* fixup right node */
2457         if (push_items > right_nritems) {
2458                 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2459                        right_nritems);
2460                 WARN_ON(1);
2461         }
2462
2463         if (push_items < right_nritems) {
2464                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2465                                                   leaf_data_end(root, right);
2466                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2467                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
2468                                       btrfs_leaf_data(right) +
2469                                       leaf_data_end(root, right), push_space);
2470
2471                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2472                               btrfs_item_nr_offset(push_items),
2473                              (btrfs_header_nritems(right) - push_items) *
2474                              sizeof(struct btrfs_item));
2475         }
2476         right_nritems -= push_items;
2477         btrfs_set_header_nritems(right, right_nritems);
2478         push_space = BTRFS_LEAF_DATA_SIZE(root);
2479         for (i = 0; i < right_nritems; i++) {
2480                 item = btrfs_item_nr(right, i);
2481
2482                 push_space = push_space - btrfs_item_size(right, item);
2483                 btrfs_set_item_offset(right, item, push_space);
2484         }
2485
2486         btrfs_mark_buffer_dirty(left);
2487         if (right_nritems)
2488                 btrfs_mark_buffer_dirty(right);
2489         else
2490                 clean_tree_block(trans, root, right);
2491
2492         btrfs_item_key(right, &disk_key, 0);
2493         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2494         if (wret)
2495                 ret = wret;
2496
2497         /* then fixup the leaf pointer in the path */
2498         if (path->slots[0] < push_items) {
2499                 path->slots[0] += old_left_nritems;
2500                 btrfs_tree_unlock(path->nodes[0]);
2501                 free_extent_buffer(path->nodes[0]);
2502                 path->nodes[0] = left;
2503                 path->slots[1] -= 1;
2504         } else {
2505                 btrfs_tree_unlock(left);
2506                 free_extent_buffer(left);
2507                 path->slots[0] -= push_items;
2508         }
2509         BUG_ON(path->slots[0] < 0);
2510         return ret;
2511 out:
2512         btrfs_tree_unlock(left);
2513         free_extent_buffer(left);
2514         return ret;
2515 }
2516
2517 /*
2518  * push some data in the path leaf to the left, trying to free up at
2519  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2520  *
2521  * max_slot can put a limit on how far into the leaf we'll push items.  The
2522  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
2523  * items
2524  */
2525 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2526                           *root, struct btrfs_path *path, int min_data_size,
2527                           int data_size, int empty, u32 max_slot)
2528 {
2529         struct extent_buffer *right = path->nodes[0];
2530         struct extent_buffer *left;
2531         int slot;
2532         int free_space;
2533         u32 right_nritems;
2534         int ret = 0;
2535
2536         slot = path->slots[1];
2537         if (slot == 0)
2538                 return 1;
2539         if (!path->nodes[1])
2540                 return 1;
2541
2542         right_nritems = btrfs_header_nritems(right);
2543         if (right_nritems == 0)
2544                 return 1;
2545
2546         btrfs_assert_tree_locked(path->nodes[1]);
2547
2548         left = read_node_slot(root, path->nodes[1], slot - 1);
2549         if (left == NULL)
2550                 return 1;
2551
2552         btrfs_tree_lock(left);
2553         btrfs_set_lock_blocking(left);
2554
2555         free_space = btrfs_leaf_free_space(root, left);
2556         if (free_space < data_size) {
2557                 ret = 1;
2558                 goto out;
2559         }
2560
2561         /* cow and double check */
2562         ret = btrfs_cow_block(trans, root, left,
2563                               path->nodes[1], slot - 1, &left);
2564         if (ret) {
2565                 /* we hit -ENOSPC, but it isn't fatal here */
2566                 ret = 1;
2567                 goto out;
2568         }
2569
2570         free_space = btrfs_leaf_free_space(root, left);
2571         if (free_space < data_size) {
2572                 ret = 1;
2573                 goto out;
2574         }
2575
2576         return __push_leaf_left(trans, root, path, min_data_size,
2577                                empty, left, free_space, right_nritems,
2578                                max_slot);
2579 out:
2580         btrfs_tree_unlock(left);
2581         free_extent_buffer(left);
2582         return ret;
2583 }
2584
2585 /*
2586  * split the path's leaf in two, making sure there is at least data_size
2587  * available for the resulting leaf level of the path.
2588  *
2589  * returns 0 if all went well and < 0 on failure.
2590  */
2591 static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2592                                struct btrfs_root *root,
2593                                struct btrfs_path *path,
2594                                struct extent_buffer *l,
2595                                struct extent_buffer *right,
2596                                int slot, int mid, int nritems)
2597 {
2598         int data_copy_size;
2599         int rt_data_off;
2600         int i;
2601         int ret = 0;
2602         int wret;
2603         struct btrfs_disk_key disk_key;
2604
2605         nritems = nritems - mid;
2606         btrfs_set_header_nritems(right, nritems);
2607         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2608
2609         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2610                            btrfs_item_nr_offset(mid),
2611                            nritems * sizeof(struct btrfs_item));
2612
2613         copy_extent_buffer(right, l,
2614                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2615                      data_copy_size, btrfs_leaf_data(l) +
2616                      leaf_data_end(root, l), data_copy_size);
2617
2618         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2619                       btrfs_item_end_nr(l, mid);
2620
2621         for (i = 0; i < nritems; i++) {
2622                 struct btrfs_item *item = btrfs_item_nr(right, i);
2623                 u32 ioff;
2624
2625                 ioff = btrfs_item_offset(right, item);
2626                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2627         }
2628
2629         btrfs_set_header_nritems(l, mid);
2630         ret = 0;
2631         btrfs_item_key(right, &disk_key, 0);
2632         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2633                           path->slots[1] + 1, 1);
2634         if (wret)
2635                 ret = wret;
2636
2637         btrfs_mark_buffer_dirty(right);
2638         btrfs_mark_buffer_dirty(l);
2639         BUG_ON(path->slots[0] != slot);
2640
2641         if (mid <= slot) {
2642                 btrfs_tree_unlock(path->nodes[0]);
2643                 free_extent_buffer(path->nodes[0]);
2644                 path->nodes[0] = right;
2645                 path->slots[0] -= mid;
2646                 path->slots[1] += 1;
2647         } else {
2648                 btrfs_tree_unlock(right);
2649                 free_extent_buffer(right);
2650         }
2651
2652         BUG_ON(path->slots[0] < 0);
2653
2654         return ret;
2655 }
2656
2657 /*
2658  * double splits happen when we need to insert a big item in the middle
2659  * of a leaf.  A double split can leave us with 3 mostly empty leaves:
2660  * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2661  *          A                 B                 C
2662  *
2663  * We avoid this by trying to push the items on either side of our target
2664  * into the adjacent leaves.  If all goes well we can avoid the double split
2665  * completely.
2666  */
2667 static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2668                                           struct btrfs_root *root,
2669                                           struct btrfs_path *path,
2670                                           int data_size)
2671 {
2672         int ret;
2673         int progress = 0;
2674         int slot;
2675         u32 nritems;
2676
2677         slot = path->slots[0];
2678
2679         /*
2680          * try to push all the items after our slot into the
2681          * right leaf
2682          */
2683         ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2684         if (ret < 0)
2685                 return ret;
2686
2687         if (ret == 0)
2688                 progress++;
2689
2690         nritems = btrfs_header_nritems(path->nodes[0]);
2691         /*
2692          * our goal is to get our slot at the start or end of a leaf.  If
2693          * we've done so we're done
2694          */
2695         if (path->slots[0] == 0 || path->slots[0] == nritems)
2696                 return 0;
2697
2698         if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2699                 return 0;
2700
2701         /* try to push all the items before our slot into the next leaf */
2702         slot = path->slots[0];
2703         ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2704         if (ret < 0)
2705                 return ret;
2706
2707         if (ret == 0)
2708                 progress++;
2709
2710         if (progress)
2711                 return 0;
2712         return 1;
2713 }
2714
2715 /*
2716  * split the path's leaf in two, making sure there is at least data_size
2717  * available for the resulting leaf level of the path.
2718  *
2719  * returns 0 if all went well and < 0 on failure.
2720  */
2721 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2722                                struct btrfs_root *root,
2723                                struct btrfs_key *ins_key,
2724                                struct btrfs_path *path, int data_size,
2725                                int extend)
2726 {
2727         struct btrfs_disk_key disk_key;
2728         struct extent_buffer *l;
2729         u32 nritems;
2730         int mid;
2731         int slot;
2732         struct extent_buffer *right;
2733         int ret = 0;
2734         int wret;
2735         int split;
2736         int num_doubles = 0;
2737         int tried_avoid_double = 0;
2738
2739         l = path->nodes[0];
2740         slot = path->slots[0];
2741         if (extend && data_size + btrfs_item_size_nr(l, slot) +
2742             sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2743                 return -EOVERFLOW;
2744
2745         /* first try to make some room by pushing left and right */
2746         if (data_size) {
2747                 wret = push_leaf_right(trans, root, path, data_size,
2748                                        data_size, 0, 0);
2749                 if (wret < 0)
2750                         return wret;
2751                 if (wret) {
2752                         wret = push_leaf_left(trans, root, path, data_size,
2753                                               data_size, 0, (u32)-1);
2754                         if (wret < 0)
2755                                 return wret;
2756                 }
2757                 l = path->nodes[0];
2758
2759                 /* did the pushes work? */
2760                 if (btrfs_leaf_free_space(root, l) >= data_size)
2761                         return 0;
2762         }
2763
2764         if (!path->nodes[1]) {
2765                 ret = insert_new_root(trans, root, path, 1);
2766                 if (ret)
2767                         return ret;
2768         }
2769 again:
2770         split = 1;
2771         l = path->nodes[0];
2772         slot = path->slots[0];
2773         nritems = btrfs_header_nritems(l);
2774         mid = (nritems + 1) / 2;
2775
2776         if (mid <= slot) {
2777                 if (nritems == 1 ||
2778                     leaf_space_used(l, mid, nritems - mid) + data_size >
2779                         BTRFS_LEAF_DATA_SIZE(root)) {
2780                         if (slot >= nritems) {
2781                                 split = 0;
2782                         } else {
2783                                 mid = slot;
2784                                 if (mid != nritems &&
2785                                     leaf_space_used(l, mid, nritems - mid) +
2786                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2787                                         if (data_size && !tried_avoid_double)
2788                                                 goto push_for_double;
2789                                         split = 2;
2790                                 }
2791                         }
2792                 }
2793         } else {
2794                 if (leaf_space_used(l, 0, mid) + data_size >
2795                         BTRFS_LEAF_DATA_SIZE(root)) {
2796                         if (!extend && data_size && slot == 0) {
2797                                 split = 0;
2798                         } else if ((extend || !data_size) && slot == 0) {
2799                                 mid = 1;
2800                         } else {
2801                                 mid = slot;
2802                                 if (mid != nritems &&
2803                                     leaf_space_used(l, mid, nritems - mid) +
2804                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2805                                         if (data_size && !tried_avoid_double)
2806                                                 goto push_for_double;
2807                                         split = 2 ;
2808                                 }
2809                         }
2810                 }
2811         }
2812
2813         if (split == 0)
2814                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2815         else
2816                 btrfs_item_key(l, &disk_key, mid);
2817
2818         right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2819                                         root->root_key.objectid,
2820                                         &disk_key, 0, l->start, 0);
2821         if (IS_ERR(right))
2822                 return PTR_ERR(right);
2823
2824         root_add_used(root, root->leafsize);
2825
2826         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2827         btrfs_set_header_bytenr(right, right->start);
2828         btrfs_set_header_generation(right, trans->transid);
2829         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2830         btrfs_set_header_owner(right, root->root_key.objectid);
2831         btrfs_set_header_level(right, 0);
2832         write_extent_buffer(right, root->fs_info->fsid,
2833                             (unsigned long)btrfs_header_fsid(right),
2834                             BTRFS_FSID_SIZE);
2835
2836         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2837                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2838                             BTRFS_UUID_SIZE);
2839
2840         if (split == 0) {
2841                 if (mid <= slot) {
2842                         btrfs_set_header_nritems(right, 0);
2843                         wret = insert_ptr(trans, root, path,
2844                                           &disk_key, right->start,
2845                                           path->slots[1] + 1, 1);
2846                         if (wret)
2847                                 ret = wret;
2848
2849                         btrfs_tree_unlock(path->nodes[0]);
2850                         free_extent_buffer(path->nodes[0]);
2851                         path->nodes[0] = right;
2852                         path->slots[0] = 0;
2853                         path->slots[1] += 1;
2854                 } else {
2855                         btrfs_set_header_nritems(right, 0);
2856                         wret = insert_ptr(trans, root, path,
2857                                           &disk_key,
2858                                           right->start,
2859                                           path->slots[1], 1);
2860                         if (wret)
2861                                 ret = wret;
2862                         btrfs_tree_unlock(path->nodes[0]);
2863                         free_extent_buffer(path->nodes[0]);
2864                         path->nodes[0] = right;
2865                         path->slots[0] = 0;
2866                         if (path->slots[1] == 0) {
2867                                 wret = fixup_low_keys(trans, root,
2868                                                 path, &disk_key, 1);
2869                                 if (wret)
2870                                         ret = wret;
2871                         }
2872                 }
2873                 btrfs_mark_buffer_dirty(right);
2874                 return ret;
2875         }
2876
2877         ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2878         BUG_ON(ret);
2879
2880         if (split == 2) {
2881                 BUG_ON(num_doubles != 0);
2882                 num_doubles++;
2883                 goto again;
2884         }
2885
2886         return ret;
2887
2888 push_for_double:
2889         push_for_double_split(trans, root, path, data_size);
2890         tried_avoid_double = 1;
2891         if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2892                 return 0;
2893         goto again;
2894 }
2895
2896 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
2897                                          struct btrfs_root *root,
2898                                          struct btrfs_path *path, int ins_len)
2899 {
2900         struct btrfs_key key;
2901         struct extent_buffer *leaf;
2902         struct btrfs_file_extent_item *fi;
2903         u64 extent_len = 0;
2904         u32 item_size;
2905         int ret;
2906
2907         leaf = path->nodes[0];
2908         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2909
2910         BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
2911                key.type != BTRFS_EXTENT_CSUM_KEY);
2912
2913         if (btrfs_leaf_free_space(root, leaf) >= ins_len)
2914                 return 0;
2915
2916         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2917         if (key.type == BTRFS_EXTENT_DATA_KEY) {
2918                 fi = btrfs_item_ptr(leaf, path->slots[0],
2919                                     struct btrfs_file_extent_item);
2920                 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2921         }
2922         btrfs_release_path(path);
2923
2924         path->keep_locks = 1;
2925         path->search_for_split = 1;
2926         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2927         path->search_for_split = 0;
2928         if (ret < 0)
2929                 goto err;
2930
2931         ret = -EAGAIN;
2932         leaf = path->nodes[0];
2933         /* if our item isn't there or got smaller, return now */
2934         if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
2935                 goto err;
2936
2937         /* the leaf has  changed, it now has room.  return now */
2938         if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
2939                 goto err;
2940
2941         if (key.type == BTRFS_EXTENT_DATA_KEY) {
2942                 fi = btrfs_item_ptr(leaf, path->slots[0],
2943                                     struct btrfs_file_extent_item);
2944                 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
2945                         goto err;
2946         }
2947
2948         btrfs_set_path_blocking(path);
2949         ret = split_leaf(trans, root, &key, path, ins_len, 1);
2950         if (ret)
2951                 goto err;
2952
2953         path->keep_locks = 0;
2954         btrfs_unlock_up_safe(path, 1);
2955         return 0;
2956 err:
2957         path->keep_locks = 0;
2958         return ret;
2959 }
2960
2961 static noinline int split_item(struct btrfs_trans_handle *trans,
2962                                struct btrfs_root *root,
2963                                struct btrfs_path *path,
2964                                struct btrfs_key *new_key,
2965                                unsigned long split_offset)
2966 {
2967         struct extent_buffer *leaf;
2968         struct btrfs_item *item;
2969         struct btrfs_item *new_item;
2970         int slot;
2971         char *buf;
2972         u32 nritems;
2973         u32 item_size;
2974         u32 orig_offset;
2975         struct btrfs_disk_key disk_key;
2976
2977         leaf = path->nodes[0];
2978         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2979
2980         btrfs_set_path_blocking(path);
2981
2982         item = btrfs_item_nr(leaf, path->slots[0]);
2983         orig_offset = btrfs_item_offset(leaf, item);
2984         item_size = btrfs_item_size(leaf, item);
2985
2986         buf = kmalloc(item_size, GFP_NOFS);
2987         if (!buf)
2988                 return -ENOMEM;
2989
2990         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2991                             path->slots[0]), item_size);
2992
2993         slot = path->slots[0] + 1;
2994         nritems = btrfs_header_nritems(leaf);
2995         if (slot != nritems) {
2996                 /* shift the items */
2997                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2998                                 btrfs_item_nr_offset(slot),
2999                                 (nritems - slot) * sizeof(struct btrfs_item));
3000         }
3001
3002         btrfs_cpu_key_to_disk(&disk_key, new_key);
3003         btrfs_set_item_key(leaf, &disk_key, slot);
3004
3005         new_item = btrfs_item_nr(leaf, slot);
3006
3007         btrfs_set_item_offset(leaf, new_item, orig_offset);
3008         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3009
3010         btrfs_set_item_offset(leaf, item,
3011                               orig_offset + item_size - split_offset);
3012         btrfs_set_item_size(leaf, item, split_offset);
3013
3014         btrfs_set_header_nritems(leaf, nritems + 1);
3015
3016         /* write the data for the start of the original item */
3017         write_extent_buffer(leaf, buf,
3018                             btrfs_item_ptr_offset(leaf, path->slots[0]),
3019                             split_offset);
3020
3021         /* write the data for the new item */
3022         write_extent_buffer(leaf, buf + split_offset,
3023                             btrfs_item_ptr_offset(leaf, slot),
3024                             item_size - split_offset);
3025         btrfs_mark_buffer_dirty(leaf);
3026
3027         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3028         kfree(buf);
3029         return 0;
3030 }
3031
3032 /*
3033  * This function splits a single item into two items,
3034  * giving 'new_key' to the new item and splitting the
3035  * old one at split_offset (from the start of the item).
3036  *
3037  * The path may be released by this operation.  After
3038  * the split, the path is pointing to the old item.  The
3039  * new item is going to be in the same node as the old one.
3040  *
3041  * Note, the item being split must be smaller enough to live alone on
3042  * a tree block with room for one extra struct btrfs_item
3043  *
3044  * This allows us to split the item in place, keeping a lock on the
3045  * leaf the entire time.
3046  */
3047 int btrfs_split_item(struct btrfs_trans_handle *trans,
3048                      struct btrfs_root *root,
3049                      struct btrfs_path *path,
3050                      struct btrfs_key *new_key,
3051                      unsigned long split_offset)
3052 {
3053         int ret;
3054         ret = setup_leaf_for_split(trans, root, path,
3055                                    sizeof(struct btrfs_item));
3056         if (ret)
3057                 return ret;
3058
3059         ret = split_item(trans, root, path, new_key, split_offset);
3060         return ret;
3061 }
3062
3063 /*
3064  * This function duplicate a item, giving 'new_key' to the new item.
3065  * It guarantees both items live in the same tree leaf and the new item
3066  * is contiguous with the original item.
3067  *
3068  * This allows us to split file extent in place, keeping a lock on the
3069  * leaf the entire time.
3070  */
3071 int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3072                          struct btrfs_root *root,
3073                          struct btrfs_path *path,
3074                          struct btrfs_key *new_key)
3075 {
3076         struct extent_buffer *leaf;
3077         int ret;
3078         u32 item_size;
3079
3080         leaf = path->nodes[0];
3081         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3082         ret = setup_leaf_for_split(trans, root, path,
3083                                    item_size + sizeof(struct btrfs_item));
3084         if (ret)
3085                 return ret;
3086
3087         path->slots[0]++;
3088         ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3089                                      item_size, item_size +
3090                                      sizeof(struct btrfs_item), 1);
3091         BUG_ON(ret);
3092
3093         leaf = path->nodes[0];
3094         memcpy_extent_buffer(leaf,
3095                              btrfs_item_ptr_offset(leaf, path->slots[0]),
3096                              btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3097                              item_size);
3098         return 0;
3099 }
3100
3101 /*
3102  * make the item pointed to by the path smaller.  new_size indicates
3103  * how small to make it, and from_end tells us if we just chop bytes
3104  * off the end of the item or if we shift the item to chop bytes off
3105  * the front.
3106  */
3107 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3108                         struct btrfs_root *root,
3109                         struct btrfs_path *path,
3110                         u32 new_size, int from_end)
3111 {
3112         int slot;
3113         struct extent_buffer *leaf;
3114         struct btrfs_item *item;
3115         u32 nritems;
3116         unsigned int data_end;
3117         unsigned int old_data_start;
3118         unsigned int old_size;
3119         unsigned int size_diff;
3120         int i;
3121
3122         leaf = path->nodes[0];
3123         slot = path->slots[0];
3124
3125         old_size = btrfs_item_size_nr(leaf, slot);
3126         if (old_size == new_size)
3127                 return 0;
3128
3129         nritems = btrfs_header_nritems(leaf);
3130         data_end = leaf_data_end(root, leaf);
3131
3132         old_data_start = btrfs_item_offset_nr(leaf, slot);
3133
3134         size_diff = old_size - new_size;
3135
3136         BUG_ON(slot < 0);
3137         BUG_ON(slot >= nritems);
3138
3139         /*
3140          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3141          */
3142         /* first correct the data pointers */
3143         for (i = slot; i < nritems; i++) {
3144                 u32 ioff;
3145                 item = btrfs_item_nr(leaf, i);
3146
3147                 ioff = btrfs_item_offset(leaf, item);
3148                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
3149         }
3150
3151         /* shift the data */
3152         if (from_end) {
3153                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3154                               data_end + size_diff, btrfs_leaf_data(leaf) +
3155                               data_end, old_data_start + new_size - data_end);
3156         } else {
3157                 struct btrfs_disk_key disk_key;
3158                 u64 offset;
3159
3160                 btrfs_item_key(leaf, &disk_key, slot);
3161
3162                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3163                         unsigned long ptr;
3164                         struct btrfs_file_extent_item *fi;
3165
3166                         fi = btrfs_item_ptr(leaf, slot,
3167                                             struct btrfs_file_extent_item);
3168                         fi = (struct btrfs_file_extent_item *)(
3169                              (unsigned long)fi - size_diff);
3170
3171                         if (btrfs_file_extent_type(leaf, fi) ==
3172                             BTRFS_FILE_EXTENT_INLINE) {
3173                                 ptr = btrfs_item_ptr_offset(leaf, slot);
3174                                 memmove_extent_buffer(leaf, ptr,
3175                                       (unsigned long)fi,
3176                                       offsetof(struct btrfs_file_extent_item,
3177                                                  disk_bytenr));
3178                         }
3179                 }
3180
3181                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3182                               data_end + size_diff, btrfs_leaf_data(leaf) +
3183                               data_end, old_data_start - data_end);
3184
3185                 offset = btrfs_disk_key_offset(&disk_key);
3186                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3187                 btrfs_set_item_key(leaf, &disk_key, slot);
3188                 if (slot == 0)
3189                         fixup_low_keys(trans, root, path, &disk_key, 1);
3190         }
3191
3192         item = btrfs_item_nr(leaf, slot);
3193         btrfs_set_item_size(leaf, item, new_size);
3194         btrfs_mark_buffer_dirty(leaf);
3195
3196         if (btrfs_leaf_free_space(root, leaf) < 0) {
3197                 btrfs_print_leaf(root, leaf);
3198                 BUG();
3199         }
3200         return 0;
3201 }
3202
3203 /*
3204  * make the item pointed to by the path bigger, data_size is the new size.
3205  */
3206 int btrfs_extend_item(struct btrfs_trans_handle *trans,
3207                       struct btrfs_root *root, struct btrfs_path *path,
3208                       u32 data_size)
3209 {
3210         int slot;
3211         struct extent_buffer *leaf;
3212         struct btrfs_item *item;
3213         u32 nritems;
3214         unsigned int data_end;
3215         unsigned int old_data;
3216         unsigned int old_size;
3217         int i;
3218
3219         leaf = path->nodes[0];
3220
3221         nritems = btrfs_header_nritems(leaf);
3222         data_end = leaf_data_end(root, leaf);
3223
3224         if (btrfs_leaf_free_space(root, leaf) < data_size) {
3225                 btrfs_print_leaf(root, leaf);
3226                 BUG();
3227         }
3228         slot = path->slots[0];
3229         old_data = btrfs_item_end_nr(leaf, slot);
3230
3231         BUG_ON(slot < 0);
3232         if (slot >= nritems) {
3233                 btrfs_print_leaf(root, leaf);
3234                 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3235                        slot, nritems);
3236                 BUG_ON(1);
3237         }
3238
3239         /*
3240          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3241          */
3242         /* first correct the data pointers */
3243         for (i = slot; i < nritems; i++) {
3244                 u32 ioff;
3245                 item = btrfs_item_nr(leaf, i);
3246
3247                 ioff = btrfs_item_offset(leaf, item);
3248                 btrfs_set_item_offset(leaf, item, ioff - data_size);
3249         }
3250
3251         /* shift the data */
3252         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3253                       data_end - data_size, btrfs_leaf_data(leaf) +
3254                       data_end, old_data - data_end);
3255
3256         data_end = old_data;
3257         old_size = btrfs_item_size_nr(leaf, slot);
3258         item = btrfs_item_nr(leaf, slot);
3259         btrfs_set_item_size(leaf, item, old_size + data_size);
3260         btrfs_mark_buffer_dirty(leaf);
3261
3262         if (btrfs_leaf_free_space(root, leaf) < 0) {
3263                 btrfs_print_leaf(root, leaf);
3264                 BUG();
3265         }
3266         return 0;
3267 }
3268
3269 /*
3270  * Given a key and some data, insert items into the tree.
3271  * This does all the path init required, making room in the tree if needed.
3272  * Returns the number of keys that were inserted.
3273  */
3274 int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3275                             struct btrfs_root *root,
3276                             struct btrfs_path *path,
3277                             struct btrfs_key *cpu_key, u32 *data_size,
3278                             int nr)
3279 {
3280         struct extent_buffer *leaf;
3281         struct btrfs_item *item;
3282         int ret = 0;
3283         int slot;
3284         int i;
3285         u32 nritems;
3286         u32 total_data = 0;
3287         u32 total_size = 0;
3288         unsigned int data_end;
3289         struct btrfs_disk_key disk_key;
3290         struct btrfs_key found_key;
3291
3292         for (i = 0; i < nr; i++) {
3293                 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3294                     BTRFS_LEAF_DATA_SIZE(root)) {
3295                         break;
3296                         nr = i;
3297                 }
3298                 total_data += data_size[i];
3299                 total_size += data_size[i] + sizeof(struct btrfs_item);
3300         }
3301         BUG_ON(nr == 0);
3302
3303         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3304         if (ret == 0)
3305                 return -EEXIST;
3306         if (ret < 0)
3307                 goto out;
3308
3309         leaf = path->nodes[0];
3310
3311         nritems = btrfs_header_nritems(leaf);
3312         data_end = leaf_data_end(root, leaf);
3313
3314         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3315                 for (i = nr; i >= 0; i--) {
3316                         total_data -= data_size[i];
3317                         total_size -= data_size[i] + sizeof(struct btrfs_item);
3318                         if (total_size < btrfs_leaf_free_space(root, leaf))
3319                                 break;
3320                 }
3321                 nr = i;
3322         }
3323
3324         slot = path->slots[0];
3325         BUG_ON(slot < 0);
3326
3327         if (slot != nritems) {
3328                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3329
3330                 item = btrfs_item_nr(leaf, slot);
3331                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3332
3333                 /* figure out how many keys we can insert in here */
3334                 total_data = data_size[0];
3335                 for (i = 1; i < nr; i++) {
3336                         if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3337                                 break;
3338                         total_data += data_size[i];
3339                 }
3340                 nr = i;
3341
3342                 if (old_data < data_end) {
3343                         btrfs_print_leaf(root, leaf);
3344                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3345                                slot, old_data, data_end);
3346                         BUG_ON(1);
3347                 }
3348                 /*
3349                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3350                  */
3351                 /* first correct the data pointers */
3352                 for (i = slot; i < nritems; i++) {
3353                         u32 ioff;
3354
3355                         item = btrfs_item_nr(leaf, i);
3356                         ioff = btrfs_item_offset(leaf, item);
3357                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3358                 }
3359                 /* shift the items */
3360                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3361                               btrfs_item_nr_offset(slot),
3362                               (nritems - slot) * sizeof(struct btrfs_item));
3363
3364                 /* shift the data */
3365                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3366                               data_end - total_data, btrfs_leaf_data(leaf) +
3367                               data_end, old_data - data_end);
3368                 data_end = old_data;
3369         } else {
3370                 /*
3371                  * this sucks but it has to be done, if we are inserting at
3372                  * the end of the leaf only insert 1 of the items, since we
3373                  * have no way of knowing whats on the next leaf and we'd have
3374                  * to drop our current locks to figure it out
3375                  */
3376                 nr = 1;
3377         }
3378
3379         /* setup the item for the new data */
3380         for (i = 0; i < nr; i++) {
3381                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3382                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3383                 item = btrfs_item_nr(leaf, slot + i);
3384                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3385                 data_end -= data_size[i];
3386                 btrfs_set_item_size(leaf, item, data_size[i]);
3387         }
3388         btrfs_set_header_nritems(leaf, nritems + nr);
3389         btrfs_mark_buffer_dirty(leaf);
3390
3391         ret = 0;
3392         if (slot == 0) {
3393                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3394                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3395         }
3396
3397         if (btrfs_leaf_free_space(root, leaf) < 0) {
3398                 btrfs_print_leaf(root, leaf);
3399                 BUG();
3400         }
3401 out:
3402         if (!ret)
3403                 ret = nr;
3404         return ret;
3405 }
3406
3407 /*
3408  * this is a helper for btrfs_insert_empty_items, the main goal here is
3409  * to save stack depth by doing the bulk of the work in a function
3410  * that doesn't call btrfs_search_slot
3411  */
3412 int setup_items_for_insert(struct btrfs_trans_handle *trans,
3413                            struct btrfs_root *root, struct btrfs_path *path,
3414                            struct btrfs_key *cpu_key, u32 *data_size,
3415                            u32 total_data, u32 total_size, int nr)
3416 {
3417         struct btrfs_item *item;
3418         int i;
3419         u32 nritems;
3420         unsigned int data_end;
3421         struct btrfs_disk_key disk_key;
3422         int ret;
3423         struct extent_buffer *leaf;
3424         int slot;
3425
3426         leaf = path->nodes[0];
3427         slot = path->slots[0];
3428
3429         nritems = btrfs_header_nritems(leaf);
3430         data_end = leaf_data_end(root, leaf);
3431
3432         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3433                 btrfs_print_leaf(root, leaf);
3434                 printk(KERN_CRIT "not enough freespace need %u have %d\n",
3435                        total_size, btrfs_leaf_free_space(root, leaf));
3436                 BUG();
3437         }
3438
3439         if (slot != nritems) {
3440                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3441
3442                 if (old_data < data_end) {
3443                         btrfs_print_leaf(root, leaf);
3444                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3445                                slot, old_data, data_end);
3446                         BUG_ON(1);
3447                 }
3448                 /*
3449                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3450                  */
3451                 /* first correct the data pointers */
3452                 for (i = slot; i < nritems; i++) {
3453                         u32 ioff;
3454
3455                         item = btrfs_item_nr(leaf, i);
3456                         ioff = btrfs_item_offset(leaf, item);
3457                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3458                 }
3459                 /* shift the items */
3460                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3461                               btrfs_item_nr_offset(slot),
3462                               (nritems - slot) * sizeof(struct btrfs_item));
3463
3464                 /* shift the data */
3465                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3466                               data_end - total_data, btrfs_leaf_data(leaf) +
3467                               data_end, old_data - data_end);
3468                 data_end = old_data;
3469         }
3470
3471         /* setup the item for the new data */
3472         for (i = 0; i < nr; i++) {
3473                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3474                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3475                 item = btrfs_item_nr(leaf, slot + i);
3476                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3477                 data_end -= data_size[i];
3478                 btrfs_set_item_size(leaf, item, data_size[i]);
3479         }
3480
3481         btrfs_set_header_nritems(leaf, nritems + nr);
3482
3483         ret = 0;
3484         if (slot == 0) {
3485                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3486                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3487         }
3488         btrfs_unlock_up_safe(path, 1);
3489         btrfs_mark_buffer_dirty(leaf);
3490
3491         if (btrfs_leaf_free_space(root, leaf) < 0) {
3492                 btrfs_print_leaf(root, leaf);
3493                 BUG();
3494         }
3495         return ret;
3496 }
3497
3498 /*
3499  * Given a key and some data, insert items into the tree.
3500  * This does all the path init required, making room in the tree if needed.
3501  */
3502 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3503                             struct btrfs_root *root,
3504                             struct btrfs_path *path,
3505                             struct btrfs_key *cpu_key, u32 *data_size,
3506                             int nr)
3507 {
3508         int ret = 0;
3509         int slot;
3510         int i;
3511         u32 total_size = 0;
3512         u32 total_data = 0;
3513
3514         for (i = 0; i < nr; i++)
3515                 total_data += data_size[i];
3516
3517         total_size = total_data + (nr * sizeof(struct btrfs_item));
3518         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3519         if (ret == 0)
3520                 return -EEXIST;
3521         if (ret < 0)
3522                 goto out;
3523
3524         slot = path->slots[0];
3525         BUG_ON(slot < 0);
3526
3527         ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
3528                                total_data, total_size, nr);
3529
3530 out:
3531         return ret;
3532 }
3533
3534 /*
3535  * Given a key and some data, insert an item into the tree.
3536  * This does all the path init required, making room in the tree if needed.
3537  */
3538 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3539                       *root, struct btrfs_key *cpu_key, void *data, u32
3540                       data_size)
3541 {
3542         int ret = 0;
3543         struct btrfs_path *path;
3544         struct extent_buffer *leaf;
3545         unsigned long ptr;
3546
3547         path = btrfs_alloc_path();
3548         if (!path)
3549                 return -ENOMEM;
3550         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3551         if (!ret) {
3552                 leaf = path->nodes[0];
3553                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3554                 write_extent_buffer(leaf, data, ptr, data_size);
3555                 btrfs_mark_buffer_dirty(leaf);
3556         }
3557         btrfs_free_path(path);
3558         return ret;
3559 }
3560
3561 /*
3562  * delete the pointer from a given node.
3563  *
3564  * the tree should have been previously balanced so the deletion does not
3565  * empty a node.
3566  */
3567 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3568                    struct btrfs_path *path, int level, int slot)
3569 {
3570         struct extent_buffer *parent = path->nodes[level];
3571         u32 nritems;
3572         int ret = 0;
3573         int wret;
3574
3575         nritems = btrfs_header_nritems(parent);
3576         if (slot != nritems - 1) {
3577                 memmove_extent_buffer(parent,
3578                               btrfs_node_key_ptr_offset(slot),
3579                               btrfs_node_key_ptr_offset(slot + 1),
3580                               sizeof(struct btrfs_key_ptr) *
3581                               (nritems - slot - 1));
3582         }
3583         nritems--;
3584         btrfs_set_header_nritems(parent, nritems);
3585         if (nritems == 0 && parent == root->node) {
3586                 BUG_ON(btrfs_header_level(root->node) != 1);
3587                 /* just turn the root into a leaf and break */
3588                 btrfs_set_header_level(root->node, 0);
3589         } else if (slot == 0) {
3590                 struct btrfs_disk_key disk_key;
3591
3592                 btrfs_node_key(parent, &disk_key, 0);
3593                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3594                 if (wret)
3595                         ret = wret;
3596         }
3597         btrfs_mark_buffer_dirty(parent);
3598         return ret;
3599 }
3600
3601 /*
3602  * a helper function to delete the leaf pointed to by path->slots[1] and
3603  * path->nodes[1].
3604  *
3605  * This deletes the pointer in path->nodes[1] and frees the leaf
3606  * block extent.  zero is returned if it all worked out, < 0 otherwise.
3607  *
3608  * The path must have already been setup for deleting the leaf, including
3609  * all the proper balancing.  path->nodes[1] must be locked.
3610  */
3611 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3612                                    struct btrfs_root *root,
3613                                    struct btrfs_path *path,
3614                                    struct extent_buffer *leaf)
3615 {
3616         int ret;
3617
3618         WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3619         ret = del_ptr(trans, root, path, 1, path->slots[1]);
3620         if (ret)
3621                 return ret;
3622
3623         /*
3624          * btrfs_free_extent is expensive, we want to make sure we
3625          * aren't holding any locks when we call it
3626          */
3627         btrfs_unlock_up_safe(path, 0);
3628
3629         root_sub_used(root, leaf->len);
3630
3631         btrfs_free_tree_block(trans, root, leaf, 0, 1);
3632         return 0;
3633 }
3634 /*
3635  * delete the item at the leaf level in path.  If that empties
3636  * the leaf, remove it from the tree
3637  */
3638 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3639                     struct btrfs_path *path, int slot, int nr)
3640 {
3641         struct extent_buffer *leaf;
3642         struct btrfs_item *item;
3643         int last_off;
3644         int dsize = 0;
3645         int ret = 0;
3646         int wret;
3647         int i;
3648         u32 nritems;
3649
3650         leaf = path->nodes[0];
3651         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3652
3653         for (i = 0; i < nr; i++)
3654                 dsize += btrfs_item_size_nr(leaf, slot + i);
3655
3656         nritems = btrfs_header_nritems(leaf);
3657
3658         if (slot + nr != nritems) {
3659                 int data_end = leaf_data_end(root, leaf);
3660
3661                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3662                               data_end + dsize,
3663                               btrfs_leaf_data(leaf) + data_end,
3664                               last_off - data_end);
3665
3666                 for (i = slot + nr; i < nritems; i++) {
3667                         u32 ioff;
3668
3669                         item = btrfs_item_nr(leaf, i);
3670                         ioff = btrfs_item_offset(leaf, item);
3671                         btrfs_set_item_offset(leaf, item, ioff + dsize);
3672                 }
3673
3674                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3675                               btrfs_item_nr_offset(slot + nr),
3676                               sizeof(struct btrfs_item) *
3677                               (nritems - slot - nr));
3678         }
3679         btrfs_set_header_nritems(leaf, nritems - nr);
3680         nritems -= nr;
3681
3682         /* delete the leaf if we've emptied it */
3683         if (nritems == 0) {
3684                 if (leaf == root->node) {
3685                         btrfs_set_header_level(leaf, 0);
3686                 } else {
3687                         btrfs_set_path_blocking(path);
3688                         clean_tree_block(trans, root, leaf);
3689                         ret = btrfs_del_leaf(trans, root, path, leaf);
3690                         BUG_ON(ret);
3691                 }
3692         } else {
3693                 int used = leaf_space_used(leaf, 0, nritems);
3694                 if (slot == 0) {
3695                         struct btrfs_disk_key disk_key;
3696
3697                         btrfs_item_key(leaf, &disk_key, 0);
3698                         wret = fixup_low_keys(trans, root, path,
3699                                               &disk_key, 1);
3700                         if (wret)
3701                                 ret = wret;
3702                 }
3703
3704                 /* delete the leaf if it is mostly empty */
3705                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
3706                         /* push_leaf_left fixes the path.
3707                          * make sure the path still points to our leaf
3708                          * for possible call to del_ptr below
3709                          */
3710                         slot = path->slots[1];
3711                         extent_buffer_get(leaf);
3712
3713                         btrfs_set_path_blocking(path);
3714                         wret = push_leaf_left(trans, root, path, 1, 1,
3715                                               1, (u32)-1);
3716                         if (wret < 0 && wret != -ENOSPC)
3717                                 ret = wret;
3718
3719                         if (path->nodes[0] == leaf &&
3720                             btrfs_header_nritems(leaf)) {
3721                                 wret = push_leaf_right(trans, root, path, 1,
3722                                                        1, 1, 0);
3723                                 if (wret < 0 && wret != -ENOSPC)
3724                                         ret = wret;
3725                         }
3726
3727                         if (btrfs_header_nritems(leaf) == 0) {
3728                                 path->slots[1] = slot;
3729                                 ret = btrfs_del_leaf(trans, root, path, leaf);
3730                                 BUG_ON(ret);
3731                                 free_extent_buffer(leaf);
3732                         } else {
3733                                 /* if we're still in the path, make sure
3734                                  * we're dirty.  Otherwise, one of the
3735                                  * push_leaf functions must have already
3736                                  * dirtied this buffer
3737                                  */
3738                                 if (path->nodes[0] == leaf)
3739                                         btrfs_mark_buffer_dirty(leaf);
3740                                 free_extent_buffer(leaf);
3741                         }
3742                 } else {
3743                         btrfs_mark_buffer_dirty(leaf);
3744                 }
3745         }
3746         return ret;
3747 }
3748
3749 /*
3750  * search the tree again to find a leaf with lesser keys
3751  * returns 0 if it found something or 1 if there are no lesser leaves.
3752  * returns < 0 on io errors.
3753  *
3754  * This may release the path, and so you may lose any locks held at the
3755  * time you call it.
3756  */
3757 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3758 {
3759         struct btrfs_key key;
3760         struct btrfs_disk_key found_key;
3761         int ret;
3762
3763         btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3764
3765         if (key.offset > 0)
3766                 key.offset--;
3767         else if (key.type > 0)
3768                 key.type--;
3769         else if (key.objectid > 0)
3770                 key.objectid--;
3771         else
3772                 return 1;
3773
3774         btrfs_release_path(path);
3775         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3776         if (ret < 0)
3777                 return ret;
3778         btrfs_item_key(path->nodes[0], &found_key, 0);
3779         ret = comp_keys(&found_key, &key);
3780         if (ret < 0)
3781                 return 0;
3782         return 1;
3783 }
3784
3785 /*
3786  * A helper function to walk down the tree starting at min_key, and looking
3787  * for nodes or leaves that are either in cache or have a minimum
3788  * transaction id.  This is used by the btree defrag code, and tree logging
3789  *
3790  * This does not cow, but it does stuff the starting key it finds back
3791  * into min_key, so you can call btrfs_search_slot with cow=1 on the
3792  * key and get a writable path.
3793  *
3794  * This does lock as it descends, and path->keep_locks should be set
3795  * to 1 by the caller.
3796  *
3797  * This honors path->lowest_level to prevent descent past a given level
3798  * of the tree.
3799  *
3800  * min_trans indicates the oldest transaction that you are interested
3801  * in walking through.  Any nodes or leaves older than min_trans are
3802  * skipped over (without reading them).
3803  *
3804  * returns zero if something useful was found, < 0 on error and 1 if there
3805  * was nothing in the tree that matched the search criteria.
3806  */
3807 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3808                          struct btrfs_key *max_key,
3809                          struct btrfs_path *path, int cache_only,
3810                          u64 min_trans)
3811 {
3812         struct extent_buffer *cur;
3813         struct btrfs_key found_key;
3814         int slot;
3815         int sret;
3816         u32 nritems;
3817         int level;
3818         int ret = 1;
3819
3820         WARN_ON(!path->keep_locks);
3821 again:
3822         cur = btrfs_lock_root_node(root);
3823         level = btrfs_header_level(cur);
3824         WARN_ON(path->nodes[level]);
3825         path->nodes[level] = cur;
3826         path->locks[level] = 1;
3827
3828         if (btrfs_header_generation(cur) < min_trans) {
3829                 ret = 1;
3830                 goto out;
3831         }
3832         while (1) {
3833                 nritems = btrfs_header_nritems(cur);
3834                 level = btrfs_header_level(cur);
3835                 sret = bin_search(cur, min_key, level, &slot);
3836
3837                 /* at the lowest level, we're done, setup the path and exit */
3838                 if (level == path->lowest_level) {
3839                         if (slot >= nritems)
3840                                 goto find_next_key;
3841                         ret = 0;
3842                         path->slots[level] = slot;
3843                         btrfs_item_key_to_cpu(cur, &found_key, slot);
3844                         goto out;
3845                 }
3846                 if (sret && slot > 0)
3847                         slot--;
3848                 /*
3849                  * check this node pointer against the cache_only and
3850                  * min_trans parameters.  If it isn't in cache or is too
3851                  * old, skip to the next one.
3852                  */
3853                 while (slot < nritems) {
3854                         u64 blockptr;
3855                         u64 gen;
3856                         struct extent_buffer *tmp;
3857                         struct btrfs_disk_key disk_key;
3858
3859                         blockptr = btrfs_node_blockptr(cur, slot);
3860                         gen = btrfs_node_ptr_generation(cur, slot);
3861                         if (gen < min_trans) {
3862                                 slot++;
3863                                 continue;
3864                         }
3865                         if (!cache_only)
3866                                 break;
3867
3868                         if (max_key) {
3869                                 btrfs_node_key(cur, &disk_key, slot);
3870                                 if (comp_keys(&disk_key, max_key) >= 0) {
3871                                         ret = 1;
3872                                         goto out;
3873                                 }
3874                         }
3875
3876                         tmp = btrfs_find_tree_block(root, blockptr,
3877                                             btrfs_level_size(root, level - 1));
3878
3879                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3880                                 free_extent_buffer(tmp);
3881                                 break;
3882                         }
3883                         if (tmp)
3884                                 free_extent_buffer(tmp);
3885                         slot++;
3886                 }
3887 find_next_key:
3888                 /*
3889                  * we didn't find a candidate key in this node, walk forward
3890                  * and find another one
3891                  */
3892                 if (slot >= nritems) {
3893                         path->slots[level] = slot;
3894                         btrfs_set_path_blocking(path);
3895                         sret = btrfs_find_next_key(root, path, min_key, level,
3896                                                   cache_only, min_trans);
3897                         if (sret == 0) {
3898                                 btrfs_release_path(path);
3899                                 goto again;
3900                         } else {
3901                                 goto out;
3902                         }
3903                 }
3904                 /* save our key for returning back */
3905                 btrfs_node_key_to_cpu(cur, &found_key, slot);
3906                 path->slots[level] = slot;
3907                 if (level == path->lowest_level) {
3908                         ret = 0;
3909                         unlock_up(path, level, 1);
3910                         goto out;
3911                 }
3912                 btrfs_set_path_blocking(path);
3913                 cur = read_node_slot(root, cur, slot);
3914                 BUG_ON(!cur);
3915
3916                 btrfs_tree_lock(cur);
3917
3918                 path->locks[level - 1] = 1;
3919                 path->nodes[level - 1] = cur;
3920                 unlock_up(path, level, 1);
3921                 btrfs_clear_path_blocking(path, NULL);
3922         }
3923 out:
3924         if (ret == 0)
3925                 memcpy(min_key, &found_key, sizeof(found_key));
3926         btrfs_set_path_blocking(path);
3927         return ret;
3928 }
3929
3930 /*
3931  * this is similar to btrfs_next_leaf, but does not try to preserve
3932  * and fixup the path.  It looks for and returns the next key in the
3933  * tree based on the current path and the cache_only and min_trans
3934  * parameters.
3935  *
3936  * 0 is returned if another key is found, < 0 if there are any errors
3937  * and 1 is returned if there are no higher keys in the tree
3938  *
3939  * path->keep_locks should be set to 1 on the search made before
3940  * calling this function.
3941  */
3942 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3943                         struct btrfs_key *key, int level,
3944                         int cache_only, u64 min_trans)
3945 {
3946         int slot;
3947         struct extent_buffer *c;
3948
3949         WARN_ON(!path->keep_locks);
3950         while (level < BTRFS_MAX_LEVEL) {
3951                 if (!path->nodes[level])
3952                         return 1;
3953
3954                 slot = path->slots[level] + 1;
3955                 c = path->nodes[level];
3956 next:
3957                 if (slot >= btrfs_header_nritems(c)) {
3958                         int ret;
3959                         int orig_lowest;
3960                         struct btrfs_key cur_key;
3961                         if (level + 1 >= BTRFS_MAX_LEVEL ||
3962                             !path->nodes[level + 1])
3963                                 return 1;
3964
3965                         if (path->locks[level + 1]) {
3966                                 level++;
3967                                 continue;
3968                         }
3969
3970                         slot = btrfs_header_nritems(c) - 1;
3971                         if (level == 0)
3972                                 btrfs_item_key_to_cpu(c, &cur_key, slot);
3973                         else
3974                                 btrfs_node_key_to_cpu(c, &cur_key, slot);
3975
3976                         orig_lowest = path->lowest_level;
3977                         btrfs_release_path(path);
3978                         path->lowest_level = level;
3979                         ret = btrfs_search_slot(NULL, root, &cur_key, path,
3980                                                 0, 0);
3981                         path->lowest_level = orig_lowest;
3982                         if (ret < 0)
3983                                 return ret;
3984
3985                         c = path->nodes[level];
3986                         slot = path->slots[level];
3987                         if (ret == 0)
3988                                 slot++;
3989                         goto next;
3990                 }
3991
3992                 if (level == 0)
3993                         btrfs_item_key_to_cpu(c, key, slot);
3994                 else {
3995                         u64 blockptr = btrfs_node_blockptr(c, slot);
3996                         u64 gen = btrfs_node_ptr_generation(c, slot);
3997
3998                         if (cache_only) {
3999                                 struct extent_buffer *cur;
4000                                 cur = btrfs_find_tree_block(root, blockptr,
4001                                             btrfs_level_size(root, level - 1));
4002                                 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4003                                         slot++;
4004                                         if (cur)
4005                                                 free_extent_buffer(cur);
4006                                         goto next;
4007                                 }
4008                                 free_extent_buffer(cur);
4009                         }
4010                         if (gen < min_trans) {
4011                                 slot++;
4012                                 goto next;
4013                         }
4014                         btrfs_node_key_to_cpu(c, key, slot);
4015                 }
4016                 return 0;
4017         }
4018         return 1;
4019 }
4020
4021 /*
4022  * search the tree again to find a leaf with greater keys
4023  * returns 0 if it found something or 1 if there are no greater leaves.
4024  * returns < 0 on io errors.
4025  */
4026 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4027 {
4028         int slot;
4029         int level;
4030         struct extent_buffer *c;
4031         struct extent_buffer *next;
4032         struct btrfs_key key;
4033         u32 nritems;
4034         int ret;
4035         int old_spinning = path->leave_spinning;
4036         int force_blocking = 0;
4037
4038         nritems = btrfs_header_nritems(path->nodes[0]);
4039         if (nritems == 0)
4040                 return 1;
4041
4042         /*
4043          * we take the blocks in an order that upsets lockdep.  Using
4044          * blocking mode is the only way around it.
4045          */
4046 #ifdef CONFIG_DEBUG_LOCK_ALLOC
4047         force_blocking = 1;
4048 #endif
4049
4050         btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4051 again:
4052         level = 1;
4053         next = NULL;
4054         btrfs_release_path(path);
4055
4056         path->keep_locks = 1;
4057
4058         if (!force_blocking)
4059                 path->leave_spinning = 1;
4060
4061         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4062         path->keep_locks = 0;
4063
4064         if (ret < 0)
4065                 return ret;
4066
4067         nritems = btrfs_header_nritems(path->nodes[0]);
4068         /*
4069          * by releasing the path above we dropped all our locks.  A balance
4070          * could have added more items next to the key that used to be
4071          * at the very end of the block.  So, check again here and
4072          * advance the path if there are now more items available.
4073          */
4074         if (nritems > 0 && path->slots[0] < nritems - 1) {
4075                 if (ret == 0)
4076                         path->slots[0]++;
4077                 ret = 0;
4078                 goto done;
4079         }
4080
4081         while (level < BTRFS_MAX_LEVEL) {
4082                 if (!path->nodes[level]) {
4083                         ret = 1;
4084                         goto done;
4085                 }
4086
4087                 slot = path->slots[level] + 1;
4088                 c = path->nodes[level];
4089                 if (slot >= btrfs_header_nritems(c)) {
4090                         level++;
4091                         if (level == BTRFS_MAX_LEVEL) {
4092                                 ret = 1;
4093                                 goto done;
4094                         }
4095                         continue;
4096                 }
4097
4098                 if (next) {
4099                         btrfs_tree_unlock(next);
4100                         free_extent_buffer(next);
4101                 }
4102
4103                 next = c;
4104                 ret = read_block_for_search(NULL, root, path, &next, level,
4105                                             slot, &key);
4106                 if (ret == -EAGAIN)
4107                         goto again;
4108
4109                 if (ret < 0) {
4110                         btrfs_release_path(path);
4111                         goto done;
4112                 }
4113
4114                 if (!path->skip_locking) {
4115                         ret = btrfs_try_spin_lock(next);
4116                         if (!ret) {
4117                                 btrfs_set_path_blocking(path);
4118                                 btrfs_tree_lock(next);
4119                                 if (!force_blocking)
4120                                         btrfs_clear_path_blocking(path, next);
4121                         }
4122                         if (force_blocking)
4123                                 btrfs_set_lock_blocking(next);
4124                 }
4125                 break;
4126         }
4127         path->slots[level] = slot;
4128         while (1) {
4129                 level--;
4130                 c = path->nodes[level];
4131                 if (path->locks[level])
4132                         btrfs_tree_unlock(c);
4133
4134                 free_extent_buffer(c);
4135                 path->nodes[level] = next;
4136                 path->slots[level] = 0;
4137                 if (!path->skip_locking)
4138                         path->locks[level] = 1;
4139
4140                 if (!level)
4141                         break;
4142
4143                 ret = read_block_for_search(NULL, root, path, &next, level,
4144                                             0, &key);
4145                 if (ret == -EAGAIN)
4146                         goto again;
4147
4148                 if (ret < 0) {
4149                         btrfs_release_path(path);
4150                         goto done;
4151                 }
4152
4153                 if (!path->skip_locking) {
4154                         btrfs_assert_tree_locked(path->nodes[level]);
4155                         ret = btrfs_try_spin_lock(next);
4156                         if (!ret) {
4157                                 btrfs_set_path_blocking(path);
4158                                 btrfs_tree_lock(next);
4159                                 if (!force_blocking)
4160                                         btrfs_clear_path_blocking(path, next);
4161                         }
4162                         if (force_blocking)
4163                                 btrfs_set_lock_blocking(next);
4164                 }
4165         }
4166         ret = 0;
4167 done:
4168         unlock_up(path, 0, 1);
4169         path->leave_spinning = old_spinning;
4170         if (!old_spinning)
4171                 btrfs_set_path_blocking(path);
4172
4173         return ret;
4174 }
4175
4176 /*
4177  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4178  * searching until it gets past min_objectid or finds an item of 'type'
4179  *
4180  * returns 0 if something is found, 1 if nothing was found and < 0 on error
4181  */
4182 int btrfs_previous_item(struct btrfs_root *root,
4183                         struct btrfs_path *path, u64 min_objectid,
4184                         int type)
4185 {
4186         struct btrfs_key found_key;
4187         struct extent_buffer *leaf;
4188         u32 nritems;
4189         int ret;
4190
4191         while (1) {
4192                 if (path->slots[0] == 0) {
4193                         btrfs_set_path_blocking(path);
4194                         ret = btrfs_prev_leaf(root, path);
4195                         if (ret != 0)
4196                                 return ret;
4197                 } else {
4198                         path->slots[0]--;
4199                 }
4200                 leaf = path->nodes[0];
4201                 nritems = btrfs_header_nritems(leaf);
4202                 if (nritems == 0)
4203                         return 1;
4204                 if (path->slots[0] == nritems)
4205                         path->slots[0]--;
4206
4207                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4208                 if (found_key.objectid < min_objectid)
4209                         break;
4210                 if (found_key.type == type)
4211                         return 0;
4212                 if (found_key.objectid == min_objectid &&
4213                     found_key.type < type)
4214                         break;
4215         }
4216         return 1;
4217 }