]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/extent-tree.c
Btrfs: sparse files!
[mv-sheeva.git] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins, int data);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 static void reada_extent_leaves(struct btrfs_root *root,
16                                 struct btrfs_path *path, u64 limit)
17 {
18         struct btrfs_node *node;
19         int i;
20         int nritems;
21         u64 item_objectid;
22         u64 blocknr;
23         int slot;
24         int ret;
25
26         if (!path->nodes[1])
27                 return;
28         node = btrfs_buffer_node(path->nodes[1]);
29         slot = path->slots[1] + 1;
30         nritems = btrfs_header_nritems(&node->header);
31         for (i = slot; i < nritems && i < slot + 8; i++) {
32                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
33                 if (item_objectid > limit)
34                         break;
35                 blocknr = btrfs_node_blockptr(node, i);
36                 ret = readahead_tree_block(root, blocknr);
37                 if (ret)
38                         break;
39         }
40 }
41
42 static int cache_block_group(struct btrfs_root *root,
43                              struct btrfs_block_group_cache *block_group)
44 {
45         struct btrfs_path *path;
46         int ret;
47         struct btrfs_key key;
48         struct btrfs_leaf *leaf;
49         struct radix_tree_root *extent_radix;
50         int slot;
51         u64 i;
52         u64 last = 0;
53         u64 hole_size;
54         u64 limit;
55         int found = 0;
56
57         root = root->fs_info->extent_root;
58         extent_radix = &root->fs_info->extent_map_radix;
59
60         if (block_group->cached)
61                 return 0;
62         if (block_group->data)
63                 return 0;
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67 printk("cache block group %Lu\n", block_group->key.objectid);
68         key.objectid = block_group->key.objectid;
69         key.flags = 0;
70         key.offset = 0;
71         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
72         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
73         if (ret < 0)
74                 return ret;
75         if (ret && path->slots[0] > 0)
76                 path->slots[0]--;
77         limit = block_group->key.objectid + block_group->key.offset;
78         reada_extent_leaves(root, path, limit);
79         while(1) {
80                 leaf = btrfs_buffer_leaf(path->nodes[0]);
81                 slot = path->slots[0];
82                 if (slot >= btrfs_header_nritems(&leaf->header)) {
83                         reada_extent_leaves(root, path, limit);
84                         ret = btrfs_next_leaf(root, path);
85                         if (ret == 0) {
86                                 continue;
87                         } else {
88                                 if (found) {
89                                         hole_size = block_group->key.objectid +
90                                                 block_group->key.offset - last;
91                                 } else {
92                                         last = block_group->key.objectid;
93                                         hole_size = block_group->key.offset;
94                                 }
95                                 for (i = 0; i < hole_size; i++) {
96                                         set_radix_bit(extent_radix,
97                                                       last + i);
98                                 }
99                                 break;
100                         }
101                 }
102                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
103                 if (key.objectid >= block_group->key.objectid +
104                     block_group->key.offset) {
105                         if (found) {
106                                 hole_size = block_group->key.objectid +
107                                         block_group->key.offset - last;
108                         } else {
109                                 last = block_group->key.objectid;
110                                 hole_size = block_group->key.offset;
111                         }
112                         for (i = 0; i < hole_size; i++) {
113                                 set_radix_bit(extent_radix, last + i);
114                         }
115                         break;
116                 }
117                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
118                         if (!found) {
119                                 last = key.objectid + key.offset;
120                                 found = 1;
121                         } else {
122                                 hole_size = key.objectid - last;
123                                 for (i = 0; i < hole_size; i++) {
124                                         set_radix_bit(extent_radix, last + i);
125                                 }
126                                 last = key.objectid + key.offset;
127                         }
128                 }
129                 path->slots[0]++;
130         }
131
132         block_group->cached = 1;
133         btrfs_free_path(path);
134         return 0;
135 }
136
137 static struct btrfs_block_group_cache *lookup_block_group(struct
138                                                           btrfs_fs_info *info,
139                                                           u64 blocknr)
140 {
141         struct btrfs_block_group_cache *block_group;
142         int ret;
143
144         ret = radix_tree_gang_lookup(&info->block_group_radix,
145                                      (void **)&block_group,
146                                      blocknr, 1);
147         if (ret) {
148                 if (block_group->key.objectid <= blocknr && blocknr <=
149                     block_group->key.objectid + block_group->key.offset)
150                         return block_group;
151         }
152         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
153                                      (void **)&block_group,
154                                      blocknr, 1);
155         if (ret) {
156                 if (block_group->key.objectid <= blocknr && blocknr <=
157                     block_group->key.objectid + block_group->key.offset)
158                         return block_group;
159         }
160         WARN_ON(1);
161         printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
162         printk("last ret was %d\n", ret);
163         if (ret) {
164                 printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset);
165         }
166         return NULL;
167 }
168
169 static u64 leaf_range(struct btrfs_root *root)
170 {
171         u64 size = BTRFS_LEAF_DATA_SIZE(root);
172         size = size / (sizeof(struct btrfs_extent_item) +
173                        sizeof(struct btrfs_item));
174         return size;
175 }
176
177 static u64 find_search_start(struct btrfs_root *root,
178                              struct btrfs_block_group_cache **cache_ret,
179                              u64 search_start, int num)
180 {
181         unsigned long gang[8];
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         u64 last = max(search_start, cache->key.objectid);
185
186         if (cache->data)
187                 goto out;
188         if (num > 1) {
189                 last = max(last, cache->last_prealloc);
190         }
191 again:
192         cache_block_group(root, cache);
193         while(1) {
194                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
195                                            gang, last, ARRAY_SIZE(gang));
196                 if (!ret)
197                         goto out;
198                 last = gang[ret-1] + 1;
199                 if (num > 1) {
200                         if (ret != ARRAY_SIZE(gang)) {
201                                 goto new_group;
202                         }
203                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
204                                 continue;
205                         }
206                 }
207                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
208                         goto new_group;
209                 }
210                 return gang[0];
211         }
212 out:
213         return max(cache->last_alloc, search_start);
214
215 new_group:
216         cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1);
217         if (!cache) {
218                 return max((*cache_ret)->last_alloc, search_start);
219         }
220         cache = btrfs_find_block_group(root, cache,
221                                        last + cache->key.offset - 1, 0, 0);
222         *cache_ret = cache;
223         goto again;
224 }
225
226 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
227                                                  struct btrfs_block_group_cache
228                                                  *hint, u64 search_start,
229                                                  int data, int owner)
230 {
231         struct btrfs_block_group_cache *cache[8];
232         struct btrfs_block_group_cache *found_group = NULL;
233         struct btrfs_fs_info *info = root->fs_info;
234         struct radix_tree_root *radix;
235         u64 used;
236         u64 last = 0;
237         u64 hint_last;
238         int i;
239         int ret;
240         int full_search = 0;
241         int factor = 8;
242
243         if (!owner)
244                 factor = 5;
245
246         if (data)
247                 radix = &info->block_group_data_radix;
248         else
249                 radix = &info->block_group_radix;
250
251         if (search_start) {
252                 struct btrfs_block_group_cache *shint;
253                 shint = lookup_block_group(info, search_start);
254                 if (shint->data == data) {
255                         used = btrfs_block_group_used(&shint->item);
256                         if (used + shint->pinned <
257                             (shint->key.offset * factor) / 10) {
258                                 return shint;
259                         }
260                 }
261         }
262         if (hint && hint->data == data) {
263                 used = btrfs_block_group_used(&hint->item);
264                 if (used + hint->pinned < (hint->key.offset * factor) / 10) {
265                         return hint;
266                 }
267                 if (used >= (hint->key.offset * 8) / 10) {
268                         radix_tree_tag_clear(radix,
269                                              hint->key.objectid +
270                                              hint->key.offset - 1,
271                                              BTRFS_BLOCK_GROUP_AVAIL);
272                 }
273                 last = hint->key.offset * 3;
274                 if (hint->key.objectid >= last)
275                         last = max(search_start + hint->key.offset - 1,
276                                    hint->key.objectid - last);
277                 else
278                         last = hint->key.objectid + hint->key.offset;
279                 hint_last = last;
280         } else {
281                 if (hint)
282                         hint_last = max(hint->key.objectid, search_start);
283                 else
284                         hint_last = search_start;
285
286                 last = hint_last;
287         }
288         while(1) {
289                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
290                                                  last, ARRAY_SIZE(cache),
291                                                  BTRFS_BLOCK_GROUP_AVAIL);
292                 if (!ret)
293                         break;
294                 for (i = 0; i < ret; i++) {
295                         last = cache[i]->key.objectid +
296                                 cache[i]->key.offset;
297                         used = btrfs_block_group_used(&cache[i]->item);
298                         if (used + cache[i]->pinned <
299                             (cache[i]->key.offset * factor) / 10) {
300                                 found_group = cache[i];
301                                 goto found;
302                         }
303                         if (used >= (cache[i]->key.offset * 8) / 10) {
304                                 radix_tree_tag_clear(radix,
305                                                      cache[i]->key.objectid +
306                                                      cache[i]->key.offset - 1,
307                                                      BTRFS_BLOCK_GROUP_AVAIL);
308                         }
309                 }
310                 cond_resched();
311         }
312         last = hint_last;
313 again:
314         while(1) {
315                 ret = radix_tree_gang_lookup(radix, (void **)cache,
316                                              last, ARRAY_SIZE(cache));
317                 if (!ret)
318                         break;
319                 for (i = 0; i < ret; i++) {
320                         last = cache[i]->key.objectid +
321                                 cache[i]->key.offset;
322                         used = btrfs_block_group_used(&cache[i]->item);
323                         if (used + cache[i]->pinned < cache[i]->key.offset) {
324                                 found_group = cache[i];
325                                 goto found;
326                         }
327                         if (used >= cache[i]->key.offset) {
328                                 radix_tree_tag_clear(radix,
329                                                      cache[i]->key.objectid +
330                                                      cache[i]->key.offset - 1,
331                                                      BTRFS_BLOCK_GROUP_AVAIL);
332                         }
333                 }
334                 cond_resched();
335         }
336         if (!full_search) {
337 printk("find block group doing full search data %d start %Lu\n", data, search_start);
338                 last = search_start;
339                 full_search = 1;
340                 goto again;
341         }
342         if (!found_group) {
343 printk("find block group bailing to zero data %d\n", data);
344                 ret = radix_tree_gang_lookup(radix,
345                                              (void **)&found_group, 0, 1);
346                 BUG_ON(ret != 1);
347         }
348 found:
349         return found_group;
350 }
351
352 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
353                                 struct btrfs_root *root,
354                                 u64 blocknr, u64 num_blocks)
355 {
356         struct btrfs_path *path;
357         int ret;
358         struct btrfs_key key;
359         struct btrfs_leaf *l;
360         struct btrfs_extent_item *item;
361         struct btrfs_key ins;
362         u32 refs;
363
364         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
365                          &ins, 0);
366         path = btrfs_alloc_path();
367         BUG_ON(!path);
368         btrfs_init_path(path);
369         key.objectid = blocknr;
370         key.flags = 0;
371         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
372         key.offset = num_blocks;
373         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
374                                 0, 1);
375         if (ret != 0) {
376 printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
377                 BUG();
378         }
379         BUG_ON(ret != 0);
380         l = btrfs_buffer_leaf(path->nodes[0]);
381         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
382         refs = btrfs_extent_refs(item);
383         btrfs_set_extent_refs(item, refs + 1);
384         btrfs_mark_buffer_dirty(path->nodes[0]);
385
386         btrfs_release_path(root->fs_info->extent_root, path);
387         btrfs_free_path(path);
388         finish_current_insert(trans, root->fs_info->extent_root);
389         del_pending_extents(trans, root->fs_info->extent_root);
390         return 0;
391 }
392
393 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
394                              struct btrfs_root *root, u64 blocknr,
395                              u64 num_blocks, u32 *refs)
396 {
397         struct btrfs_path *path;
398         int ret;
399         struct btrfs_key key;
400         struct btrfs_leaf *l;
401         struct btrfs_extent_item *item;
402
403         path = btrfs_alloc_path();
404         btrfs_init_path(path);
405         key.objectid = blocknr;
406         key.offset = num_blocks;
407         key.flags = 0;
408         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410                                 0, 0);
411         if (ret != 0)
412                 BUG();
413         l = btrfs_buffer_leaf(path->nodes[0]);
414         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
415         *refs = btrfs_extent_refs(item);
416         btrfs_release_path(root->fs_info->extent_root, path);
417         btrfs_free_path(path);
418         return 0;
419 }
420
421 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
422                        struct btrfs_root *root)
423 {
424         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
425 }
426
427 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
428                   struct buffer_head *buf)
429 {
430         u64 blocknr;
431         struct btrfs_node *buf_node;
432         struct btrfs_leaf *buf_leaf;
433         struct btrfs_disk_key *key;
434         struct btrfs_file_extent_item *fi;
435         int i;
436         int leaf;
437         int ret;
438
439         if (!root->ref_cows)
440                 return 0;
441         buf_node = btrfs_buffer_node(buf);
442         leaf = btrfs_is_leaf(buf_node);
443         buf_leaf = btrfs_buffer_leaf(buf);
444         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
445                 if (leaf) {
446                         u64 disk_blocknr;
447                         key = &buf_leaf->items[i].key;
448                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
449                                 continue;
450                         fi = btrfs_item_ptr(buf_leaf, i,
451                                             struct btrfs_file_extent_item);
452                         if (btrfs_file_extent_type(fi) ==
453                             BTRFS_FILE_EXTENT_INLINE)
454                                 continue;
455                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
456                         if (disk_blocknr == 0)
457                                 continue;
458                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
459                                     btrfs_file_extent_disk_num_blocks(fi));
460                         BUG_ON(ret);
461                 } else {
462                         blocknr = btrfs_node_blockptr(buf_node, i);
463                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
464                         BUG_ON(ret);
465                 }
466         }
467         return 0;
468 }
469
470 static int write_one_cache_group(struct btrfs_trans_handle *trans,
471                                  struct btrfs_root *root,
472                                  struct btrfs_path *path,
473                                  struct btrfs_block_group_cache *cache)
474 {
475         int ret;
476         int pending_ret;
477         struct btrfs_root *extent_root = root->fs_info->extent_root;
478         struct btrfs_block_group_item *bi;
479         struct btrfs_key ins;
480
481         find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0);
482         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
483         BUG_ON(ret);
484         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
485                             struct btrfs_block_group_item);
486         memcpy(bi, &cache->item, sizeof(*bi));
487         mark_buffer_dirty(path->nodes[0]);
488         btrfs_release_path(extent_root, path);
489
490         finish_current_insert(trans, extent_root);
491         pending_ret = del_pending_extents(trans, extent_root);
492         if (ret)
493                 return ret;
494         if (pending_ret)
495                 return pending_ret;
496         if (cache->data)
497                 cache->last_alloc = cache->first_free;
498         return 0;
499
500 }
501
502 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
503                                    struct btrfs_root *root,
504                                    struct radix_tree_root *radix)
505 {
506         struct btrfs_block_group_cache *cache[8];
507         int ret;
508         int err = 0;
509         int werr = 0;
510         int i;
511         struct btrfs_path *path;
512
513         path = btrfs_alloc_path();
514         if (!path)
515                 return -ENOMEM;
516
517         while(1) {
518                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
519                                                  0, ARRAY_SIZE(cache),
520                                                  BTRFS_BLOCK_GROUP_DIRTY);
521                 if (!ret)
522                         break;
523                 for (i = 0; i < ret; i++) {
524                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
525                                              cache[i]->key.offset - 1,
526                                              BTRFS_BLOCK_GROUP_DIRTY);
527                         err = write_one_cache_group(trans, root,
528                                                     path, cache[i]);
529                         if (err)
530                                 werr = err;
531                 }
532         }
533         btrfs_free_path(path);
534         return werr;
535 }
536
537 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
538                                    struct btrfs_root *root)
539 {
540         int ret;
541         int ret2;
542         ret = write_dirty_block_radix(trans, root,
543                                       &root->fs_info->block_group_radix);
544         ret2 = write_dirty_block_radix(trans, root,
545                                       &root->fs_info->block_group_data_radix);
546         if (ret)
547                 return ret;
548         if (ret2)
549                 return ret2;
550         return 0;
551 }
552
553 static int update_block_group(struct btrfs_trans_handle *trans,
554                               struct btrfs_root *root,
555                               u64 blocknr, u64 num, int alloc, int mark_free)
556 {
557         struct btrfs_block_group_cache *cache;
558         struct btrfs_fs_info *info = root->fs_info;
559         u64 total = num;
560         u64 old_val;
561         u64 block_in_group;
562         u64 i;
563
564         while(total) {
565                 cache = lookup_block_group(info, blocknr);
566                 if (!cache) {
567                         printk(KERN_CRIT "blocknr %Lu lookup failed\n",
568                                blocknr);
569                         return -1;
570                 }
571                 block_in_group = blocknr - cache->key.objectid;
572                 WARN_ON(block_in_group > cache->key.offset);
573                 radix_tree_tag_set(cache->radix, cache->key.objectid +
574                                    cache->key.offset - 1,
575                                    BTRFS_BLOCK_GROUP_DIRTY);
576
577                 old_val = btrfs_block_group_used(&cache->item);
578                 num = min(total, cache->key.offset - block_in_group);
579                 if (alloc) {
580                         old_val += num;
581                         if (blocknr > cache->last_alloc)
582                                 cache->last_alloc = blocknr;
583                         if (!cache->data) {
584                                 for (i = 0; i < num; i++) {
585                                         clear_radix_bit(&info->extent_map_radix,
586                                                         blocknr + i);
587                                 }
588                         }
589                 } else {
590                         old_val -= num;
591                         if (blocknr < cache->first_free)
592                                 cache->first_free = blocknr;
593                         if (!cache->data && mark_free) {
594                                 for (i = 0; i < num; i++) {
595                                         set_radix_bit(&info->extent_map_radix,
596                                                       blocknr + i);
597                                 }
598                         }
599                         if (old_val < (cache->key.offset * 5) / 10 &&
600                             old_val + num >= (cache->key.offset * 5) / 10) {
601 printk("group %Lu now available\n", cache->key.objectid);
602                                 radix_tree_tag_set(cache->radix,
603                                                    cache->key.objectid +
604                                                    cache->key.offset - 1,
605                                                    BTRFS_BLOCK_GROUP_AVAIL);
606                         }
607                 }
608                 btrfs_set_block_group_used(&cache->item, old_val);
609                 total -= num;
610                 blocknr += num;
611         }
612         return 0;
613 }
614
615 static int try_remove_page(struct address_space *mapping, unsigned long index)
616 {
617         int ret;
618         ret = invalidate_mapping_pages(mapping, index, index);
619         return ret;
620 }
621
622 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
623                                btrfs_root *root)
624 {
625         unsigned long gang[8];
626         struct inode *btree_inode = root->fs_info->btree_inode;
627         struct btrfs_block_group_cache *block_group;
628         u64 first = 0;
629         int ret;
630         int i;
631         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
632         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
633
634         while(1) {
635                 ret = find_first_radix_bit(pinned_radix, gang, 0,
636                                            ARRAY_SIZE(gang));
637                 if (!ret)
638                         break;
639                 if (!first)
640                         first = gang[0];
641                 for (i = 0; i < ret; i++) {
642                         clear_radix_bit(pinned_radix, gang[i]);
643                         block_group = lookup_block_group(root->fs_info,
644                                                          gang[i]);
645                         if (block_group) {
646                                 WARN_ON(block_group->pinned == 0);
647                                 block_group->pinned--;
648                                 if (gang[i] < block_group->last_alloc)
649                                         block_group->last_alloc = gang[i];
650                                 if (gang[i] < block_group->last_prealloc)
651                                         block_group->last_prealloc = gang[i];
652                                 if (!block_group->data)
653                                         set_radix_bit(extent_radix, gang[i]);
654                         }
655                         try_remove_page(btree_inode->i_mapping,
656                                         gang[i] << (PAGE_CACHE_SHIFT -
657                                                     btree_inode->i_blkbits));
658                 }
659         }
660         return 0;
661 }
662
663 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
664                                  btrfs_root *extent_root)
665 {
666         struct btrfs_key ins;
667         struct btrfs_extent_item extent_item;
668         int i;
669         int ret;
670         u64 super_blocks_used;
671         struct btrfs_fs_info *info = extent_root->fs_info;
672
673         btrfs_set_extent_refs(&extent_item, 1);
674         ins.offset = 1;
675         ins.flags = 0;
676         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
677         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
678
679         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
680                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
681                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
682                 btrfs_set_super_blocks_used(info->disk_super,
683                                             super_blocks_used + 1);
684                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
685                                         sizeof(extent_item));
686                 BUG_ON(ret);
687         }
688         extent_root->fs_info->extent_tree_insert_nr = 0;
689         extent_root->fs_info->extent_tree_prealloc_nr = 0;
690         return 0;
691 }
692
693 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
694 {
695         int err;
696         struct btrfs_header *header;
697         struct buffer_head *bh;
698
699         if (!pending) {
700                 bh = btrfs_find_tree_block(root, blocknr);
701                 if (bh) {
702                         if (buffer_uptodate(bh)) {
703                                 u64 transid =
704                                     root->fs_info->running_transaction->transid;
705                                 header = btrfs_buffer_header(bh);
706                                 if (btrfs_header_generation(header) ==
707                                     transid) {
708                                         btrfs_block_release(root, bh);
709                                         return 0;
710                                 }
711                         }
712                         btrfs_block_release(root, bh);
713                 }
714                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
715                 if (!err) {
716                         struct btrfs_block_group_cache *cache;
717                         cache = lookup_block_group(root->fs_info, blocknr);
718                         if (cache)
719                                 cache->pinned++;
720                 }
721         } else {
722                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
723         }
724         BUG_ON(err < 0);
725         return 0;
726 }
727
728 /*
729  * remove an extent from the root, returns 0 on success
730  */
731 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
732                          *root, u64 blocknr, u64 num_blocks, int pin,
733                          int mark_free)
734 {
735         struct btrfs_path *path;
736         struct btrfs_key key;
737         struct btrfs_fs_info *info = root->fs_info;
738         struct btrfs_root *extent_root = info->extent_root;
739         int ret;
740         struct btrfs_extent_item *ei;
741         struct btrfs_key ins;
742         u32 refs;
743
744         key.objectid = blocknr;
745         key.flags = 0;
746         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
747         key.offset = num_blocks;
748
749         find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0);
750         path = btrfs_alloc_path();
751         BUG_ON(!path);
752         btrfs_init_path(path);
753
754         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
755         if (ret) {
756                 printk("failed to find %Lu\n", key.objectid);
757                 btrfs_print_tree(extent_root, extent_root->node);
758                 printk("failed to find %Lu\n", key.objectid);
759                 BUG();
760         }
761         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
762                             struct btrfs_extent_item);
763         BUG_ON(ei->refs == 0);
764         refs = btrfs_extent_refs(ei) - 1;
765         btrfs_set_extent_refs(ei, refs);
766         btrfs_mark_buffer_dirty(path->nodes[0]);
767         if (refs == 0) {
768                 u64 super_blocks_used;
769
770                 if (pin) {
771                         ret = pin_down_block(root, blocknr, 0);
772                         BUG_ON(ret);
773                 }
774
775                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
776                 btrfs_set_super_blocks_used(info->disk_super,
777                                             super_blocks_used - num_blocks);
778                 ret = btrfs_del_item(trans, extent_root, path);
779                 if (ret)
780                         BUG();
781                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
782                                          mark_free);
783                 BUG_ON(ret);
784         }
785         btrfs_free_path(path);
786         finish_current_insert(trans, extent_root);
787         return ret;
788 }
789
790 /*
791  * find all the blocks marked as pending in the radix tree and remove
792  * them from the extent map
793  */
794 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
795                                btrfs_root *extent_root)
796 {
797         int ret;
798         int wret;
799         int err = 0;
800         unsigned long gang[4];
801         int i;
802         struct radix_tree_root *pending_radix;
803         struct radix_tree_root *pinned_radix;
804         struct btrfs_block_group_cache *cache;
805
806         pending_radix = &extent_root->fs_info->pending_del_radix;
807         pinned_radix = &extent_root->fs_info->pinned_radix;
808
809         while(1) {
810                 ret = find_first_radix_bit(pending_radix, gang, 0,
811                                            ARRAY_SIZE(gang));
812                 if (!ret)
813                         break;
814                 for (i = 0; i < ret; i++) {
815                         wret = set_radix_bit(pinned_radix, gang[i]);
816                         if (wret == 0) {
817                                 cache = lookup_block_group(extent_root->fs_info,
818                                                            gang[i]);
819                                 if (cache)
820                                         cache->pinned++;
821                         }
822                         if (wret < 0) {
823                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
824                                        wret);
825                                 BUG_ON(wret < 0);
826                         }
827                         wret = clear_radix_bit(pending_radix, gang[i]);
828                         BUG_ON(wret);
829                         wret = __free_extent(trans, extent_root,
830                                              gang[i], 1, 0, 0);
831                         if (wret)
832                                 err = wret;
833                 }
834         }
835         return err;
836 }
837
838 /*
839  * remove an extent from the root, returns 0 on success
840  */
841 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
842                       *root, u64 blocknr, u64 num_blocks, int pin)
843 {
844         struct btrfs_root *extent_root = root->fs_info->extent_root;
845         int pending_ret;
846         int ret;
847
848         if (root == extent_root) {
849                 pin_down_block(root, blocknr, 1);
850                 return 0;
851         }
852         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
853         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
854         return ret ? ret : pending_ret;
855 }
856
857 /*
858  * walks the btree of allocated extents and find a hole of a given size.
859  * The key ins is changed to record the hole:
860  * ins->objectid == block start
861  * ins->flags = BTRFS_EXTENT_ITEM_KEY
862  * ins->offset == number of blocks
863  * Any available blocks before search_start are skipped.
864  */
865 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
866                             *orig_root, u64 num_blocks, u64 search_start, u64
867                             search_end, struct btrfs_key *ins, int data)
868 {
869         struct btrfs_path *path;
870         struct btrfs_key key;
871         int ret;
872         u64 hole_size = 0;
873         int slot = 0;
874         u64 last_block = 0;
875         u64 test_block;
876         u64 orig_search_start = search_start;
877         int start_found;
878         struct btrfs_leaf *l;
879         struct btrfs_root * root = orig_root->fs_info->extent_root;
880         struct btrfs_fs_info *info = root->fs_info;
881         int total_needed = num_blocks;
882         int total_found = 0;
883         int fill_prealloc = 0;
884         int level;
885         struct btrfs_block_group_cache *block_group;
886         int full_scan = 0;
887         u64 limit;
888
889         path = btrfs_alloc_path();
890         ins->flags = 0;
891         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
892
893         level = btrfs_header_level(btrfs_buffer_header(root->node));
894         if (num_blocks == 0) {
895                 fill_prealloc = 1;
896                 num_blocks = 1;
897                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
898         }
899         if (search_end == (u64)-1)
900                 search_end = btrfs_super_total_blocks(info->disk_super);
901         if (search_start) {
902                 block_group = lookup_block_group(info, search_start);
903                 block_group = btrfs_find_block_group(root, block_group,
904                                                      search_start, data, 1);
905         } else {
906                 block_group = btrfs_find_block_group(root,
907                                                      trans->block_group, 0,
908                                                      data, 1);
909         }
910
911 check_failed:
912         if (!full_scan && block_group->data != data)
913                 WARN_ON(1);
914
915         if (!data)
916                 search_start = find_search_start(root, &block_group,
917                                                  search_start, total_needed);
918         else
919                 search_start = max(block_group->last_alloc, search_start);
920
921         btrfs_init_path(path);
922         ins->objectid = search_start;
923         ins->offset = 0;
924         start_found = 0;
925
926         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
927         if (ret < 0)
928                 goto error;
929
930         if (path->slots[0] > 0) {
931                 path->slots[0]--;
932         }
933
934         l = btrfs_buffer_leaf(path->nodes[0]);
935         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
936         /*
937          * a rare case, go back one key if we hit a block group item
938          * instead of an extent item
939          */
940         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
941             key.objectid + key.offset >= search_start) {
942                 ins->objectid = key.objectid;
943                 ins->offset = key.offset - 1;
944                 btrfs_release_path(root, path);
945                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
946                 if (ret < 0)
947                         goto error;
948
949                 if (path->slots[0] > 0) {
950                         path->slots[0]--;
951                 }
952         }
953
954         while (1) {
955                 l = btrfs_buffer_leaf(path->nodes[0]);
956                 slot = path->slots[0];
957                 if (slot >= btrfs_header_nritems(&l->header)) {
958                         if (fill_prealloc) {
959                                 info->extent_tree_prealloc_nr = 0;
960                                 total_found = 0;
961                         }
962                         if (start_found)
963                                 limit = last_block +
964                                         block_group->key.offset / 2;
965                         else
966                                 limit = search_start +
967                                         block_group->key.offset / 2;
968                         ret = btrfs_next_leaf(root, path);
969                         if (ret == 0)
970                                 continue;
971                         if (ret < 0)
972                                 goto error;
973                         if (!start_found) {
974                                 ins->objectid = search_start;
975                                 ins->offset = search_end - search_start;
976                                 start_found = 1;
977                                 goto check_pending;
978                         }
979                         ins->objectid = last_block > search_start ?
980                                         last_block : search_start;
981                         ins->offset = search_end - ins->objectid;
982                         goto check_pending;
983                 }
984
985                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
986                 if (key.objectid >= search_start && key.objectid > last_block &&
987                     start_found) {
988                         if (last_block < search_start)
989                                 last_block = search_start;
990                         hole_size = key.objectid - last_block;
991                         if (hole_size >= num_blocks) {
992                                 ins->objectid = last_block;
993                                 ins->offset = hole_size;
994                                 goto check_pending;
995                         }
996                 }
997
998                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
999                         goto next;
1000
1001                 start_found = 1;
1002                 last_block = key.objectid + key.offset;
1003                 if (last_block >= block_group->key.objectid +
1004                     block_group->key.offset) {
1005                         btrfs_release_path(root, path);
1006                         search_start = block_group->key.objectid +
1007                                 block_group->key.offset * 2;
1008                         goto new_group;
1009                 }
1010 next:
1011                 path->slots[0]++;
1012                 cond_resched();
1013         }
1014         // FIXME -ENOSPC
1015 check_pending:
1016         /* we have to make sure we didn't find an extent that has already
1017          * been allocated by the map tree or the original allocation
1018          */
1019         btrfs_release_path(root, path);
1020         BUG_ON(ins->objectid < search_start);
1021
1022         if (ins->objectid + num_blocks >= search_end) {
1023                 if (full_scan)
1024                         return -ENOSPC;
1025                 search_start = orig_search_start;
1026                 full_scan = 1;
1027                 goto new_group;
1028         }
1029         for (test_block = ins->objectid;
1030              test_block < ins->objectid + num_blocks; test_block++) {
1031                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1032                         search_start = test_block + 1;
1033                         goto new_group;
1034                 }
1035         }
1036         if (!fill_prealloc && info->extent_tree_insert_nr) {
1037                 u64 last =
1038                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1039                 if (ins->objectid + num_blocks >
1040                     info->extent_tree_insert[0] &&
1041                     ins->objectid <= last) {
1042                         search_start = last + 1;
1043                         WARN_ON(!full_scan);
1044                         goto new_group;
1045                 }
1046         }
1047         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1048                 u64 first =
1049                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1050                 if (ins->objectid + num_blocks > first &&
1051                     ins->objectid <= info->extent_tree_prealloc[0]) {
1052                         search_start = info->extent_tree_prealloc[0] + 1;
1053                         WARN_ON(!full_scan);
1054                         goto new_group;
1055                 }
1056         }
1057         if (fill_prealloc) {
1058                 int nr;
1059                 test_block = ins->objectid;
1060                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1061                     leaf_range(root)) {
1062                         total_found = 0;
1063                         info->extent_tree_prealloc_nr = total_found;
1064                 }
1065                 while(test_block < ins->objectid + ins->offset &&
1066                       total_found < total_needed) {
1067                         nr = total_needed - total_found - 1;
1068                         BUG_ON(nr < 0);
1069                         info->extent_tree_prealloc[nr] = test_block;
1070                         total_found++;
1071                         test_block++;
1072                 }
1073                 if (total_found < total_needed) {
1074                         search_start = test_block;
1075                         goto new_group;
1076                 }
1077                 info->extent_tree_prealloc_nr = total_found;
1078         }
1079         if (!data) {
1080                 block_group = lookup_block_group(info, ins->objectid);
1081                 if (block_group) {
1082                         if (fill_prealloc)
1083                                 block_group->last_prealloc =
1084                                      info->extent_tree_prealloc[total_needed-1];
1085                         else
1086                                 trans->block_group = block_group;
1087                 }
1088         }
1089         ins->offset = num_blocks;
1090         btrfs_free_path(path);
1091         return 0;
1092
1093 new_group:
1094         if (search_start + num_blocks >= search_end) {
1095                 search_start = orig_search_start;
1096 printk("doing full scan!\n");
1097                 full_scan = 1;
1098         }
1099         block_group = lookup_block_group(info, search_start);
1100         if (!full_scan)
1101                 block_group = btrfs_find_block_group(root, block_group,
1102                                                      search_start, data, 0);
1103         cond_resched();
1104         goto check_failed;
1105
1106 error:
1107         btrfs_release_path(root, path);
1108         btrfs_free_path(path);
1109         return ret;
1110 }
1111 /*
1112  * finds a free extent and does all the dirty work required for allocation
1113  * returns the key for the extent through ins, and a tree buffer for
1114  * the first block of the extent through buf.
1115  *
1116  * returns 0 if everything worked, non-zero otherwise.
1117  */
1118 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1119                        struct btrfs_root *root, u64 owner,
1120                        u64 num_blocks, u64 search_start,
1121                        u64 search_end, struct btrfs_key *ins, int data)
1122 {
1123         int ret;
1124         int pending_ret;
1125         u64 super_blocks_used;
1126         struct btrfs_fs_info *info = root->fs_info;
1127         struct btrfs_root *extent_root = info->extent_root;
1128         struct btrfs_extent_item extent_item;
1129         struct btrfs_key prealloc_key;
1130
1131         btrfs_set_extent_refs(&extent_item, 1);
1132         btrfs_set_extent_owner(&extent_item, owner);
1133
1134         if (root == extent_root) {
1135                 int nr;
1136                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1137                 BUG_ON(num_blocks != 1);
1138                 ins->offset = 1;
1139                 info->extent_tree_prealloc_nr--;
1140                 nr = info->extent_tree_prealloc_nr;
1141                 ins->objectid = info->extent_tree_prealloc[nr];
1142                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1143                         ins->objectid;
1144                 ret = update_block_group(trans, root,
1145                                          ins->objectid, ins->offset, 1, 0);
1146                 BUG_ON(ret);
1147                 return 0;
1148         }
1149
1150         /*
1151          * if we're doing a data allocation, preallocate room in the
1152          * extent tree first.  This way the extent tree blocks end up
1153          * in the correct block group.
1154          */
1155         if (data) {
1156                 ret = find_free_extent(trans, root, 0, 0,
1157                                        search_end, &prealloc_key, 0);
1158                 if (ret) {
1159                         return ret;
1160                 }
1161                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1162                         int nr = info->extent_tree_prealloc_nr;
1163                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1164                 } else {
1165                         search_start = info->extent_tree_prealloc[0] + 1;
1166                 }
1167         }
1168         /* do the real allocation */
1169         ret = find_free_extent(trans, root, num_blocks, search_start,
1170                                search_end, ins, data);
1171         if (ret) {
1172                 return ret;
1173         }
1174
1175         /*
1176          * if we're doing a metadata allocation, preallocate space in the
1177          * extent tree second.  This way, we don't create a tiny hole
1178          * in the allocation map between any unused preallocation blocks
1179          * and the metadata block we're actually allocating.  On disk,
1180          * it'll go:
1181          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1182          * The unused prealloc will get reused the next time around.
1183          */
1184         if (!data) {
1185                 if (ins->objectid + ins->offset >= search_end)
1186                         search_end = ins->objectid - 1;
1187                 else
1188                         search_start = ins->objectid + ins->offset;
1189
1190                 ret = find_free_extent(trans, root, 0, search_start,
1191                                        search_end, &prealloc_key, 0);
1192                 if (ret) {
1193                         return ret;
1194                 }
1195         }
1196
1197         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1198         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1199                                     num_blocks);
1200         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1201                                 sizeof(extent_item));
1202
1203         finish_current_insert(trans, extent_root);
1204         pending_ret = del_pending_extents(trans, extent_root);
1205         if (ret) {
1206                 return ret;
1207         }
1208         if (pending_ret) {
1209                 return pending_ret;
1210         }
1211         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1212         return 0;
1213 }
1214
1215 /*
1216  * helper function to allocate a block for a given tree
1217  * returns the tree buffer or NULL.
1218  */
1219 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1220                                            struct btrfs_root *root, u64 hint)
1221 {
1222         struct btrfs_key ins;
1223         int ret;
1224         struct buffer_head *buf;
1225
1226         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1227                                  1, hint, (unsigned long)-1, &ins, 0);
1228         if (ret) {
1229                 BUG();
1230                 return NULL;
1231         }
1232         BUG_ON(ret);
1233         buf = btrfs_find_create_tree_block(root, ins.objectid);
1234         set_buffer_uptodate(buf);
1235         set_buffer_checked(buf);
1236         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1237         return buf;
1238 }
1239
1240 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1241                          struct btrfs_root *root, struct buffer_head *cur)
1242 {
1243         struct btrfs_disk_key *key;
1244         struct btrfs_leaf *leaf;
1245         struct btrfs_file_extent_item *fi;
1246         int i;
1247         int nritems;
1248         int ret;
1249
1250         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1251         leaf = btrfs_buffer_leaf(cur);
1252         nritems = btrfs_header_nritems(&leaf->header);
1253         for (i = 0; i < nritems; i++) {
1254                 u64 disk_blocknr;
1255                 key = &leaf->items[i].key;
1256                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1257                         continue;
1258                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1259                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1260                         continue;
1261                 /*
1262                  * FIXME make sure to insert a trans record that
1263                  * repeats the snapshot del on crash
1264                  */
1265                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1266                 if (disk_blocknr == 0)
1267                         continue;
1268                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1269                                         btrfs_file_extent_disk_num_blocks(fi),
1270                                         0);
1271                 BUG_ON(ret);
1272         }
1273         return 0;
1274 }
1275
1276 /*
1277  * helper function for drop_snapshot, this walks down the tree dropping ref
1278  * counts as it goes.
1279  */
1280 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1281                           *root, struct btrfs_path *path, int *level)
1282 {
1283         struct buffer_head *next;
1284         struct buffer_head *cur;
1285         u64 blocknr;
1286         int ret;
1287         u32 refs;
1288
1289         WARN_ON(*level < 0);
1290         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1291         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1292                                1, &refs);
1293         BUG_ON(ret);
1294         if (refs > 1)
1295                 goto out;
1296         /*
1297          * walk down to the last node level and free all the leaves
1298          */
1299         while(*level >= 0) {
1300                 WARN_ON(*level < 0);
1301                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1302                 cur = path->nodes[*level];
1303                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1304                         WARN_ON(1);
1305                 if (path->slots[*level] >=
1306                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1307                         break;
1308                 if (*level == 0) {
1309                         ret = drop_leaf_ref(trans, root, cur);
1310                         BUG_ON(ret);
1311                         break;
1312                 }
1313                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1314                                               path->slots[*level]);
1315                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1316                 BUG_ON(ret);
1317                 if (refs != 1) {
1318                         path->slots[*level]++;
1319                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1320                         BUG_ON(ret);
1321                         continue;
1322                 }
1323                 next = read_tree_block(root, blocknr);
1324                 WARN_ON(*level <= 0);
1325                 if (path->nodes[*level-1])
1326                         btrfs_block_release(root, path->nodes[*level-1]);
1327                 path->nodes[*level-1] = next;
1328                 *level = btrfs_header_level(btrfs_buffer_header(next));
1329                 path->slots[*level] = 0;
1330         }
1331 out:
1332         WARN_ON(*level < 0);
1333         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1334         ret = btrfs_free_extent(trans, root,
1335                                 bh_blocknr(path->nodes[*level]), 1, 1);
1336         btrfs_block_release(root, path->nodes[*level]);
1337         path->nodes[*level] = NULL;
1338         *level += 1;
1339         BUG_ON(ret);
1340         return 0;
1341 }
1342
1343 /*
1344  * helper for dropping snapshots.  This walks back up the tree in the path
1345  * to find the first node higher up where we haven't yet gone through
1346  * all the slots
1347  */
1348 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1349                         *root, struct btrfs_path *path, int *level)
1350 {
1351         int i;
1352         int slot;
1353         int ret;
1354         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1355                 slot = path->slots[i];
1356                 if (slot < btrfs_header_nritems(
1357                     btrfs_buffer_header(path->nodes[i])) - 1) {
1358                         path->slots[i]++;
1359                         *level = i;
1360                         return 0;
1361                 } else {
1362                         ret = btrfs_free_extent(trans, root,
1363                                                 bh_blocknr(path->nodes[*level]),
1364                                                 1, 1);
1365                         BUG_ON(ret);
1366                         btrfs_block_release(root, path->nodes[*level]);
1367                         path->nodes[*level] = NULL;
1368                         *level = i + 1;
1369                 }
1370         }
1371         return 1;
1372 }
1373
1374 /*
1375  * drop the reference count on the tree rooted at 'snap'.  This traverses
1376  * the tree freeing any blocks that have a ref count of zero after being
1377  * decremented.
1378  */
1379 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1380                         *root, struct buffer_head *snap)
1381 {
1382         int ret = 0;
1383         int wret;
1384         int level;
1385         struct btrfs_path *path;
1386         int i;
1387         int orig_level;
1388
1389         path = btrfs_alloc_path();
1390         BUG_ON(!path);
1391         btrfs_init_path(path);
1392
1393         level = btrfs_header_level(btrfs_buffer_header(snap));
1394         orig_level = level;
1395         path->nodes[level] = snap;
1396         path->slots[level] = 0;
1397         while(1) {
1398                 wret = walk_down_tree(trans, root, path, &level);
1399                 if (wret > 0)
1400                         break;
1401                 if (wret < 0)
1402                         ret = wret;
1403
1404                 wret = walk_up_tree(trans, root, path, &level);
1405                 if (wret > 0)
1406                         break;
1407                 if (wret < 0)
1408                         ret = wret;
1409                 btrfs_btree_balance_dirty(root);
1410         }
1411         for (i = 0; i <= orig_level; i++) {
1412                 if (path->nodes[i]) {
1413                         btrfs_block_release(root, path->nodes[i]);
1414                 }
1415         }
1416         btrfs_free_path(path);
1417         return ret;
1418 }
1419
1420 static int free_block_group_radix(struct radix_tree_root *radix)
1421 {
1422         int ret;
1423         struct btrfs_block_group_cache *cache[8];
1424         int i;
1425
1426         while(1) {
1427                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1428                                              ARRAY_SIZE(cache));
1429                 if (!ret)
1430                         break;
1431                 for (i = 0; i < ret; i++) {
1432                         radix_tree_delete(radix, cache[i]->key.objectid +
1433                                           cache[i]->key.offset - 1);
1434                         kfree(cache[i]);
1435                 }
1436         }
1437         return 0;
1438 }
1439
1440 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1441 {
1442         int ret;
1443         int ret2;
1444         unsigned long gang[16];
1445         int i;
1446
1447         ret = free_block_group_radix(&info->block_group_radix);
1448         ret2 = free_block_group_radix(&info->block_group_data_radix);
1449         if (ret)
1450                 return ret;
1451         if (ret2)
1452                 return ret2;
1453
1454         while(1) {
1455                 ret = find_first_radix_bit(&info->extent_map_radix,
1456                                            gang, 0, ARRAY_SIZE(gang));
1457                 if (!ret)
1458                         break;
1459                 for (i = 0; i < ret; i++) {
1460                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1461                 }
1462         }
1463         return 0;
1464 }
1465
1466 int btrfs_read_block_groups(struct btrfs_root *root)
1467 {
1468         struct btrfs_path *path;
1469         int ret;
1470         int err = 0;
1471         struct btrfs_block_group_item *bi;
1472         struct btrfs_block_group_cache *cache;
1473         struct btrfs_fs_info *info = root->fs_info;
1474         struct radix_tree_root *radix;
1475         struct btrfs_key key;
1476         struct btrfs_key found_key;
1477         struct btrfs_leaf *leaf;
1478         u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1479         u64 used;
1480         u64 nr = 0;
1481
1482         root = info->extent_root;
1483         key.objectid = 0;
1484         key.offset = group_size_blocks;
1485         key.flags = 0;
1486         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1487
1488         path = btrfs_alloc_path();
1489         if (!path)
1490                 return -ENOMEM;
1491
1492         while(1) {
1493                 ret = btrfs_search_slot(NULL, info->extent_root,
1494                                         &key, path, 0, 0);
1495                 if (ret != 0) {
1496                         err = ret;
1497                         break;
1498                 }
1499                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1500                 btrfs_disk_key_to_cpu(&found_key,
1501                                       &leaf->items[path->slots[0]].key);
1502                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1503                 if (!cache) {
1504                         err = -1;
1505                         break;
1506                 }
1507
1508                 if (nr % 3)
1509                         radix = &info->block_group_data_radix;
1510                 else
1511                         radix = &info->block_group_radix;
1512
1513                 bi = btrfs_item_ptr(leaf, path->slots[0],
1514                                     struct btrfs_block_group_item);
1515                 memcpy(&cache->item, bi, sizeof(*bi));
1516                 memcpy(&cache->key, &found_key, sizeof(found_key));
1517                 cache->last_alloc = cache->key.objectid;
1518                 cache->first_free = cache->key.objectid;
1519                 cache->last_prealloc = cache->key.objectid;
1520                 cache->pinned = 0;
1521                 cache->cached = 0;
1522
1523                 if (nr % 3)
1524                         cache->data = 1;
1525                 else
1526                         cache->data = 0;
1527                 cache->radix = radix;
1528
1529                 key.objectid = found_key.objectid + found_key.offset;
1530                 btrfs_release_path(root, path);
1531                 ret = radix_tree_insert(radix, found_key.objectid +
1532                                         found_key.offset - 1,
1533                                         (void *)cache);
1534                 BUG_ON(ret);
1535                 used = btrfs_block_group_used(bi);
1536                 if (used < (key.offset * 8) / 10) {
1537                         radix_tree_tag_set(radix, found_key.objectid +
1538                                            found_key.offset - 1,
1539                                            BTRFS_BLOCK_GROUP_AVAIL);
1540                 }
1541                 if (key.objectid >=
1542                     btrfs_super_total_blocks(info->disk_super))
1543                         break;
1544                 nr++;
1545         }
1546
1547         btrfs_free_path(path);
1548         return 0;
1549 }