]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/extent-tree.c
Btrfs: Verify checksums on tree blocks found without read_tree_block
[karo-tx-linux.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28
29 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
32
33 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
34
35 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
36                                  btrfs_root *extent_root);
37 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
38                                btrfs_root *extent_root);
39 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
40                            struct btrfs_root *root, u64 bytes_used,
41                            u64 type, u64 chunk_tree, u64 chunk_objectid,
42                            u64 size);
43
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits) == bits;
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         struct extent_io_tree *free_space_cache;
185         struct extent_state *state;
186         u64 last;
187         u64 start = 0;
188         u64 cache_miss = 0;
189         u64 total_fs_bytes;
190         u64 search_start = *start_ret;
191         int wrapped = 0;
192
193         if (!cache)
194                 goto out;
195         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
196         free_space_cache = &root->fs_info->free_space_cache;
197
198 again:
199         ret = cache_block_group(root, cache);
200         if (ret)
201                 goto out;
202
203         last = max(search_start, cache->key.objectid);
204         if (!block_group_bits(cache, data)) {
205                 goto new_group;
206         }
207
208         spin_lock_irq(&free_space_cache->lock);
209         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
210         while(1) {
211                 if (!state) {
212                         if (!cache_miss)
213                                 cache_miss = last;
214                         spin_unlock_irq(&free_space_cache->lock);
215                         goto new_group;
216                 }
217
218                 start = max(last, state->start);
219                 last = state->end + 1;
220                 if (last - start < num) {
221                         if (last == cache->key.objectid + cache->key.offset)
222                                 cache_miss = start;
223                         do {
224                                 state = extent_state_next(state);
225                         } while(state && !(state->state & EXTENT_DIRTY));
226                         continue;
227                 }
228                 spin_unlock_irq(&free_space_cache->lock);
229                 if (start + num > cache->key.objectid + cache->key.offset)
230                         goto new_group;
231                 if (start + num  > total_fs_bytes)
232                         goto new_group;
233                 *start_ret = start;
234                 return 0;
235         } out:
236         cache = btrfs_lookup_block_group(root->fs_info, search_start);
237         if (!cache) {
238                 printk("Unable to find block group for %Lu\n", search_start);
239                 WARN_ON(1);
240         }
241         return -ENOSPC;
242
243 new_group:
244         last = cache->key.objectid + cache->key.offset;
245 wrapped:
246         cache = btrfs_lookup_block_group(root->fs_info, last);
247         if (!cache || cache->key.objectid >= total_fs_bytes) {
248 no_cache:
249                 if (!wrapped) {
250                         wrapped = 1;
251                         last = search_start;
252                         goto wrapped;
253                 }
254                 goto out;
255         }
256         if (cache_miss && !cache->cached) {
257                 cache_block_group(root, cache);
258                 last = cache_miss;
259                 cache = btrfs_lookup_block_group(root->fs_info, last);
260         }
261         cache = btrfs_find_block_group(root, cache, last, data, 0);
262         if (!cache)
263                 goto no_cache;
264         *cache_ret = cache;
265         cache_miss = 0;
266         goto again;
267 }
268
269 static u64 div_factor(u64 num, int factor)
270 {
271         if (factor == 10)
272                 return num;
273         num *= factor;
274         do_div(num, 10);
275         return num;
276 }
277
278 static int block_group_state_bits(u64 flags)
279 {
280         int bits = 0;
281         if (flags & BTRFS_BLOCK_GROUP_DATA)
282                 bits |= BLOCK_GROUP_DATA;
283         if (flags & BTRFS_BLOCK_GROUP_METADATA)
284                 bits |= BLOCK_GROUP_METADATA;
285         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
286                 bits |= BLOCK_GROUP_SYSTEM;
287         return bits;
288 }
289
290 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
291                                                  struct btrfs_block_group_cache
292                                                  *hint, u64 search_start,
293                                                  int data, int owner)
294 {
295         struct btrfs_block_group_cache *cache;
296         struct extent_io_tree *block_group_cache;
297         struct btrfs_block_group_cache *found_group = NULL;
298         struct btrfs_fs_info *info = root->fs_info;
299         u64 used;
300         u64 last = 0;
301         u64 hint_last;
302         u64 start;
303         u64 end;
304         u64 free_check;
305         u64 ptr;
306         u64 total_fs_bytes;
307         int bit;
308         int ret;
309         int full_search = 0;
310         int factor = 8;
311
312         block_group_cache = &info->block_group_cache;
313         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
314
315         if (!owner)
316                 factor = 8;
317
318         bit = block_group_state_bits(data);
319
320         if (search_start && search_start < total_fs_bytes) {
321                 struct btrfs_block_group_cache *shint;
322                 shint = btrfs_lookup_block_group(info, search_start);
323                 if (shint && block_group_bits(shint, data)) {
324                         used = btrfs_block_group_used(&shint->item);
325                         if (used + shint->pinned <
326                             div_factor(shint->key.offset, factor)) {
327                                 return shint;
328                         }
329                 }
330         }
331         if (hint && block_group_bits(hint, data) &&
332             hint->key.objectid < total_fs_bytes) {
333                 used = btrfs_block_group_used(&hint->item);
334                 if (used + hint->pinned <
335                     div_factor(hint->key.offset, factor)) {
336                         return hint;
337                 }
338                 last = hint->key.objectid + hint->key.offset;
339                 hint_last = last;
340         } else {
341                 if (hint)
342                         hint_last = max(hint->key.objectid, search_start);
343                 else
344                         hint_last = search_start;
345
346                 if (hint_last >= total_fs_bytes)
347                         hint_last = search_start;
348                 last = hint_last;
349         }
350 again:
351         while(1) {
352                 ret = find_first_extent_bit(block_group_cache, last,
353                                             &start, &end, bit);
354                 if (ret)
355                         break;
356
357                 ret = get_state_private(block_group_cache, start, &ptr);
358                 if (ret)
359                         break;
360
361                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
362                 last = cache->key.objectid + cache->key.offset;
363                 used = btrfs_block_group_used(&cache->item);
364
365                 if (cache->key.objectid > total_fs_bytes)
366                         break;
367
368                 if (full_search)
369                         free_check = cache->key.offset;
370                 else
371                         free_check = div_factor(cache->key.offset, factor);
372
373                 if (used + cache->pinned < free_check) {
374                         found_group = cache;
375                         goto found;
376                 }
377                 cond_resched();
378         }
379         if (!full_search) {
380                 last = search_start;
381                 full_search = 1;
382                 goto again;
383         }
384 found:
385         return found_group;
386 }
387
388 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
389                            u64 owner, u64 owner_offset)
390 {
391         u32 high_crc = ~(u32)0;
392         u32 low_crc = ~(u32)0;
393         __le64 lenum;
394
395         lenum = cpu_to_le64(root_objectid);
396         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
397         lenum = cpu_to_le64(ref_generation);
398         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
399         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
400                 lenum = cpu_to_le64(owner);
401                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
402                 lenum = cpu_to_le64(owner_offset);
403                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
404         }
405         return ((u64)high_crc << 32) | (u64)low_crc;
406 }
407
408 static int match_extent_ref(struct extent_buffer *leaf,
409                             struct btrfs_extent_ref *disk_ref,
410                             struct btrfs_extent_ref *cpu_ref)
411 {
412         int ret;
413         int len;
414
415         if (cpu_ref->objectid)
416                 len = sizeof(*cpu_ref);
417         else
418                 len = 2 * sizeof(u64);
419         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
420                                    len);
421         return ret == 0;
422 }
423
424 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
425                                           struct btrfs_root *root,
426                                           struct btrfs_path *path, u64 bytenr,
427                                           u64 root_objectid,
428                                           u64 ref_generation, u64 owner,
429                                           u64 owner_offset, int del)
430 {
431         u64 hash;
432         struct btrfs_key key;
433         struct btrfs_key found_key;
434         struct btrfs_extent_ref ref;
435         struct extent_buffer *leaf;
436         struct btrfs_extent_ref *disk_ref;
437         int ret;
438         int ret2;
439
440         btrfs_set_stack_ref_root(&ref, root_objectid);
441         btrfs_set_stack_ref_generation(&ref, ref_generation);
442         btrfs_set_stack_ref_objectid(&ref, owner);
443         btrfs_set_stack_ref_offset(&ref, owner_offset);
444
445         hash = hash_extent_ref(root_objectid, ref_generation, owner,
446                                owner_offset);
447         key.offset = hash;
448         key.objectid = bytenr;
449         key.type = BTRFS_EXTENT_REF_KEY;
450
451         while (1) {
452                 ret = btrfs_search_slot(trans, root, &key, path,
453                                         del ? -1 : 0, del);
454                 if (ret < 0)
455                         goto out;
456                 leaf = path->nodes[0];
457                 if (ret != 0) {
458                         u32 nritems = btrfs_header_nritems(leaf);
459                         if (path->slots[0] >= nritems) {
460                                 ret2 = btrfs_next_leaf(root, path);
461                                 if (ret2)
462                                         goto out;
463                                 leaf = path->nodes[0];
464                         }
465                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
466                         if (found_key.objectid != bytenr ||
467                             found_key.type != BTRFS_EXTENT_REF_KEY)
468                                 goto out;
469                         key.offset = found_key.offset;
470                         if (del) {
471                                 btrfs_release_path(root, path);
472                                 continue;
473                         }
474                 }
475                 disk_ref = btrfs_item_ptr(path->nodes[0],
476                                           path->slots[0],
477                                           struct btrfs_extent_ref);
478                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
479                         ret = 0;
480                         goto out;
481                 }
482                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
483                 key.offset = found_key.offset + 1;
484                 btrfs_release_path(root, path);
485         }
486 out:
487         return ret;
488 }
489
490 /*
491  * Back reference rules.  Back refs have three main goals:
492  *
493  * 1) differentiate between all holders of references to an extent so that
494  *    when a reference is dropped we can make sure it was a valid reference
495  *    before freeing the extent.
496  *
497  * 2) Provide enough information to quickly find the holders of an extent
498  *    if we notice a given block is corrupted or bad.
499  *
500  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
501  *    maintenance.  This is actually the same as #2, but with a slightly
502  *    different use case.
503  *
504  * File extents can be referenced by:
505  *
506  * - multiple snapshots, subvolumes, or different generations in one subvol
507  * - different files inside a single subvolume (in theory, not implemented yet)
508  * - different offsets inside a file (bookend extents in file.c)
509  *
510  * The extent ref structure has fields for:
511  *
512  * - Objectid of the subvolume root
513  * - Generation number of the tree holding the reference
514  * - objectid of the file holding the reference
515  * - offset in the file corresponding to the key holding the reference
516  *
517  * When a file extent is allocated the fields are filled in:
518  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
519  *
520  * When a leaf is cow'd new references are added for every file extent found
521  * in the leaf.  It looks the same as the create case, but trans->transid
522  * will be different when the block is cow'd.
523  *
524  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
525  *
526  * When a file extent is removed either during snapshot deletion or file
527  * truncation, the corresponding back reference is found
528  * by searching for:
529  *
530  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
531  *      inode objectid, offset in file)
532  *
533  * Btree extents can be referenced by:
534  *
535  * - Different subvolumes
536  * - Different generations of the same subvolume
537  *
538  * Storing sufficient information for a full reverse mapping of a btree
539  * block would require storing the lowest key of the block in the backref,
540  * and it would require updating that lowest key either before write out or
541  * every time it changed.  Instead, the objectid of the lowest key is stored
542  * along with the level of the tree block.  This provides a hint
543  * about where in the btree the block can be found.  Searches through the
544  * btree only need to look for a pointer to that block, so they stop one
545  * level higher than the level recorded in the backref.
546  *
547  * Some btrees do not do reference counting on their extents.  These
548  * include the extent tree and the tree of tree roots.  Backrefs for these
549  * trees always have a generation of zero.
550  *
551  * When a tree block is created, back references are inserted:
552  *
553  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
554  *
555  * When a tree block is cow'd in a reference counted root,
556  * new back references are added for all the blocks it points to.
557  * These are of the form (trans->transid will have increased since creation):
558  *
559  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
560  *
561  * Because the lowest_key_objectid and the level are just hints
562  * they are not used when backrefs are deleted.  When a backref is deleted:
563  *
564  * if backref was for a tree root:
565  *     root_objectid = root->root_key.objectid
566  * else
567  *     root_objectid = btrfs_header_owner(parent)
568  *
569  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
570  *
571  * Back Reference Key hashing:
572  *
573  * Back references have four fields, each 64 bits long.  Unfortunately,
574  * This is hashed into a single 64 bit number and placed into the key offset.
575  * The key objectid corresponds to the first byte in the extent, and the
576  * key type is set to BTRFS_EXTENT_REF_KEY
577  */
578 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
579                                  struct btrfs_root *root,
580                                  struct btrfs_path *path, u64 bytenr,
581                                  u64 root_objectid, u64 ref_generation,
582                                  u64 owner, u64 owner_offset)
583 {
584         u64 hash;
585         struct btrfs_key key;
586         struct btrfs_extent_ref ref;
587         struct btrfs_extent_ref *disk_ref;
588         int ret;
589
590         btrfs_set_stack_ref_root(&ref, root_objectid);
591         btrfs_set_stack_ref_generation(&ref, ref_generation);
592         btrfs_set_stack_ref_objectid(&ref, owner);
593         btrfs_set_stack_ref_offset(&ref, owner_offset);
594
595         hash = hash_extent_ref(root_objectid, ref_generation, owner,
596                                owner_offset);
597         key.offset = hash;
598         key.objectid = bytenr;
599         key.type = BTRFS_EXTENT_REF_KEY;
600
601         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
602         while (ret == -EEXIST) {
603                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
604                                           struct btrfs_extent_ref);
605                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
606                         goto out;
607                 key.offset++;
608                 btrfs_release_path(root, path);
609                 ret = btrfs_insert_empty_item(trans, root, path, &key,
610                                               sizeof(ref));
611         }
612         if (ret)
613                 goto out;
614         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
615                                   struct btrfs_extent_ref);
616         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
617                             sizeof(ref));
618         btrfs_mark_buffer_dirty(path->nodes[0]);
619 out:
620         btrfs_release_path(root, path);
621         return ret;
622 }
623
624 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
625                                 struct btrfs_root *root,
626                                 u64 bytenr, u64 num_bytes,
627                                 u64 root_objectid, u64 ref_generation,
628                                 u64 owner, u64 owner_offset)
629 {
630         struct btrfs_path *path;
631         int ret;
632         struct btrfs_key key;
633         struct extent_buffer *l;
634         struct btrfs_extent_item *item;
635         u32 refs;
636
637         WARN_ON(num_bytes < root->sectorsize);
638         path = btrfs_alloc_path();
639         if (!path)
640                 return -ENOMEM;
641
642         path->reada = 0;
643         key.objectid = bytenr;
644         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
645         key.offset = num_bytes;
646         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
647                                 0, 1);
648         if (ret < 0)
649                 return ret;
650         if (ret != 0) {
651                 BUG();
652         }
653         BUG_ON(ret != 0);
654         l = path->nodes[0];
655         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
656         refs = btrfs_extent_refs(l, item);
657         btrfs_set_extent_refs(l, item, refs + 1);
658         btrfs_mark_buffer_dirty(path->nodes[0]);
659
660         btrfs_release_path(root->fs_info->extent_root, path);
661
662         path->reada = 0;
663         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
664                                           path, bytenr, root_objectid,
665                                           ref_generation, owner, owner_offset);
666         BUG_ON(ret);
667         finish_current_insert(trans, root->fs_info->extent_root);
668         del_pending_extents(trans, root->fs_info->extent_root);
669
670         btrfs_free_path(path);
671         return 0;
672 }
673
674 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
675                          struct btrfs_root *root)
676 {
677         finish_current_insert(trans, root->fs_info->extent_root);
678         del_pending_extents(trans, root->fs_info->extent_root);
679         return 0;
680 }
681
682 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
683                              struct btrfs_root *root, u64 bytenr,
684                              u64 num_bytes, u32 *refs)
685 {
686         struct btrfs_path *path;
687         int ret;
688         struct btrfs_key key;
689         struct extent_buffer *l;
690         struct btrfs_extent_item *item;
691
692         WARN_ON(num_bytes < root->sectorsize);
693         path = btrfs_alloc_path();
694         path->reada = 0;
695         key.objectid = bytenr;
696         key.offset = num_bytes;
697         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
698         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
699                                 0, 0);
700         if (ret < 0)
701                 goto out;
702         if (ret != 0) {
703                 btrfs_print_leaf(root, path->nodes[0]);
704                 printk("failed to find block number %Lu\n", bytenr);
705                 BUG();
706         }
707         l = path->nodes[0];
708         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
709         *refs = btrfs_extent_refs(l, item);
710 out:
711         btrfs_free_path(path);
712         return 0;
713 }
714
715 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
716                                   struct btrfs_path *count_path,
717                                   u64 first_extent)
718 {
719         struct btrfs_root *extent_root = root->fs_info->extent_root;
720         struct btrfs_path *path;
721         u64 bytenr;
722         u64 found_objectid;
723         u64 root_objectid = root->root_key.objectid;
724         u32 total_count = 0;
725         u32 cur_count;
726         u32 nritems;
727         int ret;
728         struct btrfs_key key;
729         struct btrfs_key found_key;
730         struct extent_buffer *l;
731         struct btrfs_extent_item *item;
732         struct btrfs_extent_ref *ref_item;
733         int level = -1;
734
735         path = btrfs_alloc_path();
736 again:
737         if (level == -1)
738                 bytenr = first_extent;
739         else
740                 bytenr = count_path->nodes[level]->start;
741
742         cur_count = 0;
743         key.objectid = bytenr;
744         key.offset = 0;
745
746         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
747         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
748         if (ret < 0)
749                 goto out;
750         BUG_ON(ret == 0);
751
752         l = path->nodes[0];
753         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
754
755         if (found_key.objectid != bytenr ||
756             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
757                 goto out;
758         }
759
760         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
761         while (1) {
762                 l = path->nodes[0];
763                 nritems = btrfs_header_nritems(l);
764                 if (path->slots[0] >= nritems) {
765                         ret = btrfs_next_leaf(extent_root, path);
766                         if (ret == 0)
767                                 continue;
768                         break;
769                 }
770                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
771                 if (found_key.objectid != bytenr)
772                         break;
773
774                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
775                         path->slots[0]++;
776                         continue;
777                 }
778
779                 cur_count++;
780                 ref_item = btrfs_item_ptr(l, path->slots[0],
781                                           struct btrfs_extent_ref);
782                 found_objectid = btrfs_ref_root(l, ref_item);
783
784                 if (found_objectid != root_objectid) {
785                         total_count = 2;
786                         goto out;
787                 }
788                 total_count = 1;
789                 path->slots[0]++;
790         }
791         if (cur_count == 0) {
792                 total_count = 0;
793                 goto out;
794         }
795         if (level >= 0 && root->node == count_path->nodes[level])
796                 goto out;
797         level++;
798         btrfs_release_path(root, path);
799         goto again;
800
801 out:
802         btrfs_free_path(path);
803         return total_count;
804 }
805 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
806                        struct btrfs_root *root, u64 owner_objectid)
807 {
808         u64 generation;
809         u64 key_objectid;
810         u64 level;
811         u32 nritems;
812         struct btrfs_disk_key disk_key;
813
814         level = btrfs_header_level(root->node);
815         generation = trans->transid;
816         nritems = btrfs_header_nritems(root->node);
817         if (nritems > 0) {
818                 if (level == 0)
819                         btrfs_item_key(root->node, &disk_key, 0);
820                 else
821                         btrfs_node_key(root->node, &disk_key, 0);
822                 key_objectid = btrfs_disk_key_objectid(&disk_key);
823         } else {
824                 key_objectid = 0;
825         }
826         return btrfs_inc_extent_ref(trans, root, root->node->start,
827                                     root->node->len, owner_objectid,
828                                     generation, level, key_objectid);
829 }
830
831 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
832                   struct extent_buffer *buf)
833 {
834         u64 bytenr;
835         u32 nritems;
836         struct btrfs_key key;
837         struct btrfs_file_extent_item *fi;
838         int i;
839         int level;
840         int ret;
841         int faili;
842
843         if (!root->ref_cows)
844                 return 0;
845
846         level = btrfs_header_level(buf);
847         nritems = btrfs_header_nritems(buf);
848         for (i = 0; i < nritems; i++) {
849                 if (level == 0) {
850                         u64 disk_bytenr;
851                         btrfs_item_key_to_cpu(buf, &key, i);
852                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
853                                 continue;
854                         fi = btrfs_item_ptr(buf, i,
855                                             struct btrfs_file_extent_item);
856                         if (btrfs_file_extent_type(buf, fi) ==
857                             BTRFS_FILE_EXTENT_INLINE)
858                                 continue;
859                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
860                         if (disk_bytenr == 0)
861                                 continue;
862                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
863                                     btrfs_file_extent_disk_num_bytes(buf, fi),
864                                     root->root_key.objectid, trans->transid,
865                                     key.objectid, key.offset);
866                         if (ret) {
867                                 faili = i;
868                                 goto fail;
869                         }
870                 } else {
871                         bytenr = btrfs_node_blockptr(buf, i);
872                         btrfs_node_key_to_cpu(buf, &key, i);
873                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
874                                            btrfs_level_size(root, level - 1),
875                                            root->root_key.objectid,
876                                            trans->transid,
877                                            level - 1, key.objectid);
878                         if (ret) {
879                                 faili = i;
880                                 goto fail;
881                         }
882                 }
883         }
884         return 0;
885 fail:
886         WARN_ON(1);
887 #if 0
888         for (i =0; i < faili; i++) {
889                 if (level == 0) {
890                         u64 disk_bytenr;
891                         btrfs_item_key_to_cpu(buf, &key, i);
892                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
893                                 continue;
894                         fi = btrfs_item_ptr(buf, i,
895                                             struct btrfs_file_extent_item);
896                         if (btrfs_file_extent_type(buf, fi) ==
897                             BTRFS_FILE_EXTENT_INLINE)
898                                 continue;
899                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
900                         if (disk_bytenr == 0)
901                                 continue;
902                         err = btrfs_free_extent(trans, root, disk_bytenr,
903                                     btrfs_file_extent_disk_num_bytes(buf,
904                                                                       fi), 0);
905                         BUG_ON(err);
906                 } else {
907                         bytenr = btrfs_node_blockptr(buf, i);
908                         err = btrfs_free_extent(trans, root, bytenr,
909                                         btrfs_level_size(root, level - 1), 0);
910                         BUG_ON(err);
911                 }
912         }
913 #endif
914         return ret;
915 }
916
917 static int write_one_cache_group(struct btrfs_trans_handle *trans,
918                                  struct btrfs_root *root,
919                                  struct btrfs_path *path,
920                                  struct btrfs_block_group_cache *cache)
921 {
922         int ret;
923         int pending_ret;
924         struct btrfs_root *extent_root = root->fs_info->extent_root;
925         unsigned long bi;
926         struct extent_buffer *leaf;
927
928         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
929         if (ret < 0)
930                 goto fail;
931         BUG_ON(ret);
932
933         leaf = path->nodes[0];
934         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
935         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
936         btrfs_mark_buffer_dirty(leaf);
937         btrfs_release_path(extent_root, path);
938 fail:
939         finish_current_insert(trans, extent_root);
940         pending_ret = del_pending_extents(trans, extent_root);
941         if (ret)
942                 return ret;
943         if (pending_ret)
944                 return pending_ret;
945         return 0;
946
947 }
948
949 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
950                                    struct btrfs_root *root)
951 {
952         struct extent_io_tree *block_group_cache;
953         struct btrfs_block_group_cache *cache;
954         int ret;
955         int err = 0;
956         int werr = 0;
957         struct btrfs_path *path;
958         u64 last = 0;
959         u64 start;
960         u64 end;
961         u64 ptr;
962
963         block_group_cache = &root->fs_info->block_group_cache;
964         path = btrfs_alloc_path();
965         if (!path)
966                 return -ENOMEM;
967
968         while(1) {
969                 ret = find_first_extent_bit(block_group_cache, last,
970                                             &start, &end, BLOCK_GROUP_DIRTY);
971                 if (ret)
972                         break;
973
974                 last = end + 1;
975                 ret = get_state_private(block_group_cache, start, &ptr);
976                 if (ret)
977                         break;
978
979                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
980                 err = write_one_cache_group(trans, root,
981                                             path, cache);
982                 /*
983                  * if we fail to write the cache group, we want
984                  * to keep it marked dirty in hopes that a later
985                  * write will work
986                  */
987                 if (err) {
988                         werr = err;
989                         continue;
990                 }
991                 clear_extent_bits(block_group_cache, start, end,
992                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
993         }
994         btrfs_free_path(path);
995         return werr;
996 }
997
998 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
999                                                   u64 flags)
1000 {
1001         struct list_head *head = &info->space_info;
1002         struct list_head *cur;
1003         struct btrfs_space_info *found;
1004         list_for_each(cur, head) {
1005                 found = list_entry(cur, struct btrfs_space_info, list);
1006                 if (found->flags == flags)
1007                         return found;
1008         }
1009         return NULL;
1010
1011 }
1012
1013 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1014                              u64 total_bytes, u64 bytes_used,
1015                              struct btrfs_space_info **space_info)
1016 {
1017         struct btrfs_space_info *found;
1018
1019         found = __find_space_info(info, flags);
1020         if (found) {
1021                 found->total_bytes += total_bytes;
1022                 found->bytes_used += bytes_used;
1023                 WARN_ON(found->total_bytes < found->bytes_used);
1024                 *space_info = found;
1025                 return 0;
1026         }
1027         found = kmalloc(sizeof(*found), GFP_NOFS);
1028         if (!found)
1029                 return -ENOMEM;
1030
1031         list_add(&found->list, &info->space_info);
1032         found->flags = flags;
1033         found->total_bytes = total_bytes;
1034         found->bytes_used = bytes_used;
1035         found->bytes_pinned = 0;
1036         found->full = 0;
1037         *space_info = found;
1038         return 0;
1039 }
1040
1041
1042 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1043                           struct btrfs_root *extent_root, u64 alloc_bytes,
1044                           u64 flags)
1045 {
1046         struct btrfs_space_info *space_info;
1047         u64 thresh;
1048         u64 start;
1049         u64 num_bytes;
1050         int ret;
1051
1052         space_info = __find_space_info(extent_root->fs_info, flags);
1053         if (!space_info) {
1054                 ret = update_space_info(extent_root->fs_info, flags,
1055                                         0, 0, &space_info);
1056                 BUG_ON(ret);
1057         }
1058         BUG_ON(!space_info);
1059
1060         if (space_info->full)
1061                 return 0;
1062
1063         thresh = div_factor(space_info->total_bytes, 7);
1064         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1065             thresh)
1066                 return 0;
1067
1068         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1069         if (ret == -ENOSPC) {
1070 printk("space info full %Lu\n", flags);
1071                 space_info->full = 1;
1072                 return 0;
1073         }
1074
1075         BUG_ON(ret);
1076
1077         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1078                      extent_root->fs_info->chunk_root->root_key.objectid,
1079                      start, num_bytes);
1080         BUG_ON(ret);
1081
1082         if (flags & BTRFS_BLOCK_GROUP_RAID0) {
1083                 if (flags & BTRFS_BLOCK_GROUP_DATA) {
1084                         extent_root->fs_info->extra_data_alloc_bits =
1085                                 BTRFS_BLOCK_GROUP_RAID0;
1086                 }
1087                 if (flags & BTRFS_BLOCK_GROUP_METADATA) {
1088                         extent_root->fs_info->extra_alloc_bits =
1089                                 BTRFS_BLOCK_GROUP_RAID0;
1090                 }
1091         }
1092         return 0;
1093 }
1094
1095 static int update_block_group(struct btrfs_trans_handle *trans,
1096                               struct btrfs_root *root,
1097                               u64 bytenr, u64 num_bytes, int alloc,
1098                               int mark_free)
1099 {
1100         struct btrfs_block_group_cache *cache;
1101         struct btrfs_fs_info *info = root->fs_info;
1102         u64 total = num_bytes;
1103         u64 old_val;
1104         u64 byte_in_group;
1105         u64 start;
1106         u64 end;
1107
1108         while(total) {
1109                 cache = btrfs_lookup_block_group(info, bytenr);
1110                 if (!cache) {
1111                         return -1;
1112                 }
1113                 byte_in_group = bytenr - cache->key.objectid;
1114                 WARN_ON(byte_in_group > cache->key.offset);
1115                 start = cache->key.objectid;
1116                 end = start + cache->key.offset - 1;
1117                 set_extent_bits(&info->block_group_cache, start, end,
1118                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1119
1120                 old_val = btrfs_block_group_used(&cache->item);
1121                 num_bytes = min(total, cache->key.offset - byte_in_group);
1122                 if (alloc) {
1123                         old_val += num_bytes;
1124                         cache->space_info->bytes_used += num_bytes;
1125                 } else {
1126                         old_val -= num_bytes;
1127                         cache->space_info->bytes_used -= num_bytes;
1128                         if (mark_free) {
1129                                 set_extent_dirty(&info->free_space_cache,
1130                                                  bytenr, bytenr + num_bytes - 1,
1131                                                  GFP_NOFS);
1132                         }
1133                 }
1134                 btrfs_set_block_group_used(&cache->item, old_val);
1135                 total -= num_bytes;
1136                 bytenr += num_bytes;
1137         }
1138         return 0;
1139 }
1140
1141 static int update_pinned_extents(struct btrfs_root *root,
1142                                 u64 bytenr, u64 num, int pin)
1143 {
1144         u64 len;
1145         struct btrfs_block_group_cache *cache;
1146         struct btrfs_fs_info *fs_info = root->fs_info;
1147
1148         if (pin) {
1149                 set_extent_dirty(&fs_info->pinned_extents,
1150                                 bytenr, bytenr + num - 1, GFP_NOFS);
1151         } else {
1152                 clear_extent_dirty(&fs_info->pinned_extents,
1153                                 bytenr, bytenr + num - 1, GFP_NOFS);
1154         }
1155         while (num > 0) {
1156                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1157                 WARN_ON(!cache);
1158                 len = min(num, cache->key.offset -
1159                           (bytenr - cache->key.objectid));
1160                 if (pin) {
1161                         cache->pinned += len;
1162                         cache->space_info->bytes_pinned += len;
1163                         fs_info->total_pinned += len;
1164                 } else {
1165                         cache->pinned -= len;
1166                         cache->space_info->bytes_pinned -= len;
1167                         fs_info->total_pinned -= len;
1168                 }
1169                 bytenr += len;
1170                 num -= len;
1171         }
1172         return 0;
1173 }
1174
1175 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1176 {
1177         u64 last = 0;
1178         u64 start;
1179         u64 end;
1180         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1181         int ret;
1182
1183         while(1) {
1184                 ret = find_first_extent_bit(pinned_extents, last,
1185                                             &start, &end, EXTENT_DIRTY);
1186                 if (ret)
1187                         break;
1188                 set_extent_dirty(copy, start, end, GFP_NOFS);
1189                 last = end + 1;
1190         }
1191         return 0;
1192 }
1193
1194 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1195                                struct btrfs_root *root,
1196                                struct extent_io_tree *unpin)
1197 {
1198         u64 start;
1199         u64 end;
1200         int ret;
1201         struct extent_io_tree *free_space_cache;
1202         free_space_cache = &root->fs_info->free_space_cache;
1203
1204         while(1) {
1205                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1206                                             EXTENT_DIRTY);
1207                 if (ret)
1208                         break;
1209                 update_pinned_extents(root, start, end + 1 - start, 0);
1210                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1211                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1212         }
1213         return 0;
1214 }
1215
1216 static int finish_current_insert(struct btrfs_trans_handle *trans,
1217                                  struct btrfs_root *extent_root)
1218 {
1219         u64 start;
1220         u64 end;
1221         struct btrfs_fs_info *info = extent_root->fs_info;
1222         struct extent_buffer *eb;
1223         struct btrfs_path *path;
1224         struct btrfs_key ins;
1225         struct btrfs_disk_key first;
1226         struct btrfs_extent_item extent_item;
1227         int ret;
1228         int level;
1229         int err = 0;
1230
1231         btrfs_set_stack_extent_refs(&extent_item, 1);
1232         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1233         path = btrfs_alloc_path();
1234
1235         while(1) {
1236                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1237                                             &end, EXTENT_LOCKED);
1238                 if (ret)
1239                         break;
1240
1241                 ins.objectid = start;
1242                 ins.offset = end + 1 - start;
1243                 err = btrfs_insert_item(trans, extent_root, &ins,
1244                                         &extent_item, sizeof(extent_item));
1245                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1246                                   GFP_NOFS);
1247                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1248                 level = btrfs_header_level(eb);
1249                 if (level == 0) {
1250                         btrfs_item_key(eb, &first, 0);
1251                 } else {
1252                         btrfs_node_key(eb, &first, 0);
1253                 }
1254                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1255                                           start, extent_root->root_key.objectid,
1256                                           0, level,
1257                                           btrfs_disk_key_objectid(&first));
1258                 BUG_ON(err);
1259                 free_extent_buffer(eb);
1260         }
1261         btrfs_free_path(path);
1262         return 0;
1263 }
1264
1265 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1266                           int pending)
1267 {
1268         int err = 0;
1269         struct extent_buffer *buf;
1270
1271         if (!pending) {
1272                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1273                 if (buf) {
1274                         if (btrfs_buffer_uptodate(buf)) {
1275                                 u64 transid =
1276                                     root->fs_info->running_transaction->transid;
1277                                 u64 header_transid =
1278                                         btrfs_header_generation(buf);
1279                                 if (header_transid == transid) {
1280                                         clean_tree_block(NULL, root, buf);
1281                                         free_extent_buffer(buf);
1282                                         return 1;
1283                                 }
1284                         }
1285                         free_extent_buffer(buf);
1286                 }
1287                 update_pinned_extents(root, bytenr, num_bytes, 1);
1288         } else {
1289                 set_extent_bits(&root->fs_info->pending_del,
1290                                 bytenr, bytenr + num_bytes - 1,
1291                                 EXTENT_LOCKED, GFP_NOFS);
1292         }
1293         BUG_ON(err < 0);
1294         return 0;
1295 }
1296
1297 /*
1298  * remove an extent from the root, returns 0 on success
1299  */
1300 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1301                          *root, u64 bytenr, u64 num_bytes,
1302                          u64 root_objectid, u64 ref_generation,
1303                          u64 owner_objectid, u64 owner_offset, int pin,
1304                          int mark_free)
1305 {
1306         struct btrfs_path *path;
1307         struct btrfs_key key;
1308         struct btrfs_fs_info *info = root->fs_info;
1309         struct btrfs_root *extent_root = info->extent_root;
1310         struct extent_buffer *leaf;
1311         int ret;
1312         int extent_slot = 0;
1313         int found_extent = 0;
1314         int num_to_del = 1;
1315         struct btrfs_extent_item *ei;
1316         u32 refs;
1317
1318         key.objectid = bytenr;
1319         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1320         key.offset = num_bytes;
1321         path = btrfs_alloc_path();
1322         if (!path)
1323                 return -ENOMEM;
1324
1325         path->reada = 0;
1326         ret = lookup_extent_backref(trans, extent_root, path,
1327                                     bytenr, root_objectid,
1328                                     ref_generation,
1329                                     owner_objectid, owner_offset, 1);
1330         if (ret == 0) {
1331                 struct btrfs_key found_key;
1332                 extent_slot = path->slots[0];
1333                 while(extent_slot > 0) {
1334                         extent_slot--;
1335                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1336                                               extent_slot);
1337                         if (found_key.objectid != bytenr)
1338                                 break;
1339                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1340                             found_key.offset == num_bytes) {
1341                                 found_extent = 1;
1342                                 break;
1343                         }
1344                         if (path->slots[0] - extent_slot > 5)
1345                                 break;
1346                 }
1347                 if (!found_extent)
1348                         ret = btrfs_del_item(trans, extent_root, path);
1349         } else {
1350                 btrfs_print_leaf(extent_root, path->nodes[0]);
1351                 WARN_ON(1);
1352                 printk("Unable to find ref byte nr %Lu root %Lu "
1353                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1354                        root_objectid, ref_generation, owner_objectid,
1355                        owner_offset);
1356         }
1357         if (!found_extent) {
1358                 btrfs_release_path(extent_root, path);
1359                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1360                 if (ret < 0)
1361                         return ret;
1362                 BUG_ON(ret);
1363                 extent_slot = path->slots[0];
1364         }
1365
1366         leaf = path->nodes[0];
1367         ei = btrfs_item_ptr(leaf, extent_slot,
1368                             struct btrfs_extent_item);
1369         refs = btrfs_extent_refs(leaf, ei);
1370         BUG_ON(refs == 0);
1371         refs -= 1;
1372         btrfs_set_extent_refs(leaf, ei, refs);
1373
1374         btrfs_mark_buffer_dirty(leaf);
1375
1376         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1377                 /* if the back ref and the extent are next to each other
1378                  * they get deleted below in one shot
1379                  */
1380                 path->slots[0] = extent_slot;
1381                 num_to_del = 2;
1382         } else if (found_extent) {
1383                 /* otherwise delete the extent back ref */
1384                 ret = btrfs_del_item(trans, extent_root, path);
1385                 BUG_ON(ret);
1386                 /* if refs are 0, we need to setup the path for deletion */
1387                 if (refs == 0) {
1388                         btrfs_release_path(extent_root, path);
1389                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1390                                                 -1, 1);
1391                         if (ret < 0)
1392                                 return ret;
1393                         BUG_ON(ret);
1394                 }
1395         }
1396
1397         if (refs == 0) {
1398                 u64 super_used;
1399                 u64 root_used;
1400
1401                 if (pin) {
1402                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1403                         if (ret > 0)
1404                                 mark_free = 1;
1405                         BUG_ON(ret < 0);
1406                 }
1407
1408                 /* block accounting for super block */
1409                 super_used = btrfs_super_bytes_used(&info->super_copy);
1410                 btrfs_set_super_bytes_used(&info->super_copy,
1411                                            super_used - num_bytes);
1412
1413                 /* block accounting for root item */
1414                 root_used = btrfs_root_used(&root->root_item);
1415                 btrfs_set_root_used(&root->root_item,
1416                                            root_used - num_bytes);
1417                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1418                                       num_to_del);
1419                 if (ret) {
1420                         return ret;
1421                 }
1422                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1423                                          mark_free);
1424                 BUG_ON(ret);
1425         }
1426         btrfs_free_path(path);
1427         finish_current_insert(trans, extent_root);
1428         return ret;
1429 }
1430
1431 /*
1432  * find all the blocks marked as pending in the radix tree and remove
1433  * them from the extent map
1434  */
1435 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1436                                btrfs_root *extent_root)
1437 {
1438         int ret;
1439         int err = 0;
1440         u64 start;
1441         u64 end;
1442         struct extent_io_tree *pending_del;
1443         struct extent_io_tree *pinned_extents;
1444
1445         pending_del = &extent_root->fs_info->pending_del;
1446         pinned_extents = &extent_root->fs_info->pinned_extents;
1447
1448         while(1) {
1449                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1450                                             EXTENT_LOCKED);
1451                 if (ret)
1452                         break;
1453                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1454                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1455                                   GFP_NOFS);
1456                 ret = __free_extent(trans, extent_root,
1457                                      start, end + 1 - start,
1458                                      extent_root->root_key.objectid,
1459                                      0, 0, 0, 0, 0);
1460                 if (ret)
1461                         err = ret;
1462         }
1463         return err;
1464 }
1465
1466 /*
1467  * remove an extent from the root, returns 0 on success
1468  */
1469 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1470                       *root, u64 bytenr, u64 num_bytes,
1471                       u64 root_objectid, u64 ref_generation,
1472                       u64 owner_objectid, u64 owner_offset, int pin)
1473 {
1474         struct btrfs_root *extent_root = root->fs_info->extent_root;
1475         int pending_ret;
1476         int ret;
1477
1478         WARN_ON(num_bytes < root->sectorsize);
1479         if (!root->ref_cows)
1480                 ref_generation = 0;
1481
1482         if (root == extent_root) {
1483                 pin_down_bytes(root, bytenr, num_bytes, 1);
1484                 return 0;
1485         }
1486         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1487                             ref_generation, owner_objectid, owner_offset,
1488                             pin, pin == 0);
1489         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1490         return ret ? ret : pending_ret;
1491 }
1492
1493 static u64 stripe_align(struct btrfs_root *root, u64 val)
1494 {
1495         u64 mask = ((u64)root->stripesize - 1);
1496         u64 ret = (val + mask) & ~mask;
1497         return ret;
1498 }
1499
1500 /*
1501  * walks the btree of allocated extents and find a hole of a given size.
1502  * The key ins is changed to record the hole:
1503  * ins->objectid == block start
1504  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1505  * ins->offset == number of blocks
1506  * Any available blocks before search_start are skipped.
1507  */
1508 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1509                                      struct btrfs_root *orig_root,
1510                                      u64 num_bytes, u64 empty_size,
1511                                      u64 search_start, u64 search_end,
1512                                      u64 hint_byte, struct btrfs_key *ins,
1513                                      u64 exclude_start, u64 exclude_nr,
1514                                      int data)
1515 {
1516         int ret;
1517         u64 orig_search_start = search_start;
1518         struct btrfs_root * root = orig_root->fs_info->extent_root;
1519         struct btrfs_fs_info *info = root->fs_info;
1520         u64 total_needed = num_bytes;
1521         u64 *last_ptr = NULL;
1522         struct btrfs_block_group_cache *block_group;
1523         int full_scan = 0;
1524         int wrapped = 0;
1525         int empty_cluster = 2 * 1024 * 1024;
1526
1527         WARN_ON(num_bytes < root->sectorsize);
1528         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1529
1530         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1531                 last_ptr = &root->fs_info->last_alloc;
1532         }
1533
1534         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1535                 last_ptr = &root->fs_info->last_data_alloc;
1536         }
1537
1538         if (last_ptr) {
1539                 if (*last_ptr)
1540                         hint_byte = *last_ptr;
1541                 else {
1542                         empty_size += empty_cluster;
1543                 }
1544         }
1545
1546         if (search_end == (u64)-1)
1547                 search_end = btrfs_super_total_bytes(&info->super_copy);
1548
1549         if (hint_byte) {
1550                 block_group = btrfs_lookup_block_group(info, hint_byte);
1551                 if (!block_group)
1552                         hint_byte = search_start;
1553                 block_group = btrfs_find_block_group(root, block_group,
1554                                                      hint_byte, data, 1);
1555                 if (last_ptr && *last_ptr == 0 && block_group)
1556                         hint_byte = block_group->key.objectid;
1557         } else {
1558                 block_group = btrfs_find_block_group(root,
1559                                                      trans->block_group,
1560                                                      search_start, data, 1);
1561         }
1562         search_start = max(search_start, hint_byte);
1563
1564         total_needed += empty_size;
1565
1566 check_failed:
1567         if (!block_group) {
1568                 block_group = btrfs_lookup_block_group(info, search_start);
1569                 if (!block_group)
1570                         block_group = btrfs_lookup_block_group(info,
1571                                                        orig_search_start);
1572         }
1573         ret = find_search_start(root, &block_group, &search_start,
1574                                 total_needed, data);
1575         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1576                 *last_ptr = 0;
1577                 block_group = btrfs_lookup_block_group(info,
1578                                                        orig_search_start);
1579                 search_start = orig_search_start;
1580                 ret = find_search_start(root, &block_group, &search_start,
1581                                         total_needed, data);
1582         }
1583         if (ret == -ENOSPC)
1584                 goto enospc;
1585         if (ret)
1586                 goto error;
1587
1588         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1589                 *last_ptr = 0;
1590                 if (!empty_size) {
1591                         empty_size += empty_cluster;
1592                         total_needed += empty_size;
1593                 }
1594                 block_group = btrfs_lookup_block_group(info,
1595                                                        orig_search_start);
1596                 search_start = orig_search_start;
1597                 ret = find_search_start(root, &block_group,
1598                                         &search_start, total_needed, data);
1599                 if (ret == -ENOSPC)
1600                         goto enospc;
1601                 if (ret)
1602                         goto error;
1603         }
1604
1605         search_start = stripe_align(root, search_start);
1606         ins->objectid = search_start;
1607         ins->offset = num_bytes;
1608
1609         if (ins->objectid + num_bytes >= search_end)
1610                 goto enospc;
1611
1612         if (ins->objectid + num_bytes >
1613             block_group->key.objectid + block_group->key.offset) {
1614                 search_start = block_group->key.objectid +
1615                         block_group->key.offset;
1616                 goto new_group;
1617         }
1618
1619         if (test_range_bit(&info->extent_ins, ins->objectid,
1620                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1621                 search_start = ins->objectid + num_bytes;
1622                 goto new_group;
1623         }
1624
1625         if (test_range_bit(&info->pinned_extents, ins->objectid,
1626                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1627                 search_start = ins->objectid + num_bytes;
1628                 goto new_group;
1629         }
1630
1631         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1632             ins->objectid < exclude_start + exclude_nr)) {
1633                 search_start = exclude_start + exclude_nr;
1634                 goto new_group;
1635         }
1636
1637         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1638                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1639                 if (block_group)
1640                         trans->block_group = block_group;
1641         }
1642         ins->offset = num_bytes;
1643         if (last_ptr) {
1644                 *last_ptr = ins->objectid + ins->offset;
1645                 if (*last_ptr ==
1646                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1647                         *last_ptr = 0;
1648                 }
1649         }
1650         return 0;
1651
1652 new_group:
1653         if (search_start + num_bytes >= search_end) {
1654 enospc:
1655                 search_start = orig_search_start;
1656                 if (full_scan) {
1657                         ret = -ENOSPC;
1658                         goto error;
1659                 }
1660                 if (wrapped) {
1661                         if (!full_scan)
1662                                 total_needed -= empty_size;
1663                         full_scan = 1;
1664                 } else
1665                         wrapped = 1;
1666         }
1667         block_group = btrfs_lookup_block_group(info, search_start);
1668         cond_resched();
1669         block_group = btrfs_find_block_group(root, block_group,
1670                                              search_start, data, 0);
1671         goto check_failed;
1672
1673 error:
1674         return ret;
1675 }
1676 /*
1677  * finds a free extent and does all the dirty work required for allocation
1678  * returns the key for the extent through ins, and a tree buffer for
1679  * the first block of the extent through buf.
1680  *
1681  * returns 0 if everything worked, non-zero otherwise.
1682  */
1683 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1684                        struct btrfs_root *root,
1685                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1686                        u64 owner, u64 owner_offset,
1687                        u64 empty_size, u64 hint_byte,
1688                        u64 search_end, struct btrfs_key *ins, int data)
1689 {
1690         int ret;
1691         int pending_ret;
1692         u64 super_used;
1693         u64 root_used;
1694         u64 search_start = 0;
1695         u64 new_hint;
1696         u32 sizes[2];
1697         struct btrfs_fs_info *info = root->fs_info;
1698         struct btrfs_root *extent_root = info->extent_root;
1699         struct btrfs_extent_item *extent_item;
1700         struct btrfs_extent_ref *ref;
1701         struct btrfs_path *path;
1702         struct btrfs_key keys[2];
1703         int extra_chunk_alloc_bits = 0;
1704
1705         if (data) {
1706                 data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits;
1707         } else if (root == root->fs_info->chunk_root) {
1708                 data = BTRFS_BLOCK_GROUP_SYSTEM;
1709         } else {
1710                 data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits;
1711         }
1712         if (btrfs_super_num_devices(&info->super_copy) > 1 &&
1713             !(data & BTRFS_BLOCK_GROUP_SYSTEM))
1714                 extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0;
1715
1716         if (root->ref_cows) {
1717                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1718                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1719                                              2 * 1024 * 1024,
1720                                              BTRFS_BLOCK_GROUP_METADATA |
1721                                              info->extra_alloc_bits |
1722                                              extra_chunk_alloc_bits);
1723                         BUG_ON(ret);
1724                 }
1725                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1726                                      num_bytes + 2 * 1024 * 1024, data |
1727                                      extra_chunk_alloc_bits);
1728                 BUG_ON(ret);
1729         }
1730
1731         new_hint = max(hint_byte, root->fs_info->alloc_start);
1732         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1733                 hint_byte = new_hint;
1734
1735         WARN_ON(num_bytes < root->sectorsize);
1736         ret = find_free_extent(trans, root, num_bytes, empty_size,
1737                                search_start, search_end, hint_byte, ins,
1738                                trans->alloc_exclude_start,
1739                                trans->alloc_exclude_nr, data);
1740         BUG_ON(ret);
1741         if (ret)
1742                 return ret;
1743
1744         /* block accounting for super block */
1745         super_used = btrfs_super_bytes_used(&info->super_copy);
1746         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1747
1748         /* block accounting for root item */
1749         root_used = btrfs_root_used(&root->root_item);
1750         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1751
1752         clear_extent_dirty(&root->fs_info->free_space_cache,
1753                            ins->objectid, ins->objectid + ins->offset - 1,
1754                            GFP_NOFS);
1755
1756         if (root == extent_root) {
1757                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1758                                 ins->objectid + ins->offset - 1,
1759                                 EXTENT_LOCKED, GFP_NOFS);
1760                 goto update_block;
1761         }
1762
1763         WARN_ON(trans->alloc_exclude_nr);
1764         trans->alloc_exclude_start = ins->objectid;
1765         trans->alloc_exclude_nr = ins->offset;
1766
1767         memcpy(&keys[0], ins, sizeof(*ins));
1768         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1769                                          owner, owner_offset);
1770         keys[1].objectid = ins->objectid;
1771         keys[1].type = BTRFS_EXTENT_REF_KEY;
1772         sizes[0] = sizeof(*extent_item);
1773         sizes[1] = sizeof(*ref);
1774
1775         path = btrfs_alloc_path();
1776         BUG_ON(!path);
1777
1778         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1779                                        sizes, 2);
1780
1781         BUG_ON(ret);
1782         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1783                                      struct btrfs_extent_item);
1784         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1785         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1786                              struct btrfs_extent_ref);
1787
1788         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1789         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1790         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1791         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1792
1793         btrfs_mark_buffer_dirty(path->nodes[0]);
1794
1795         trans->alloc_exclude_start = 0;
1796         trans->alloc_exclude_nr = 0;
1797         btrfs_free_path(path);
1798         finish_current_insert(trans, extent_root);
1799         pending_ret = del_pending_extents(trans, extent_root);
1800
1801         if (ret) {
1802                 return ret;
1803         }
1804         if (pending_ret) {
1805                 return pending_ret;
1806         }
1807
1808 update_block:
1809         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1810         if (ret) {
1811                 printk("update block group failed for %Lu %Lu\n",
1812                        ins->objectid, ins->offset);
1813                 BUG();
1814         }
1815         return 0;
1816 }
1817
1818 /*
1819  * helper function to allocate a block for a given tree
1820  * returns the tree buffer or NULL.
1821  */
1822 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1823                                              struct btrfs_root *root,
1824                                              u32 blocksize,
1825                                              u64 root_objectid, u64 hint,
1826                                              u64 empty_size)
1827 {
1828         u64 ref_generation;
1829
1830         if (root->ref_cows)
1831                 ref_generation = trans->transid;
1832         else
1833                 ref_generation = 0;
1834
1835
1836         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1837                                         ref_generation, 0, 0, hint, empty_size);
1838 }
1839
1840 /*
1841  * helper function to allocate a block for a given tree
1842  * returns the tree buffer or NULL.
1843  */
1844 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1845                                              struct btrfs_root *root,
1846                                              u32 blocksize,
1847                                              u64 root_objectid,
1848                                              u64 ref_generation,
1849                                              u64 first_objectid,
1850                                              int level,
1851                                              u64 hint,
1852                                              u64 empty_size)
1853 {
1854         struct btrfs_key ins;
1855         int ret;
1856         struct extent_buffer *buf;
1857
1858         ret = btrfs_alloc_extent(trans, root, blocksize,
1859                                  root_objectid, ref_generation,
1860                                  level, first_objectid, empty_size, hint,
1861                                  (u64)-1, &ins, 0);
1862         if (ret) {
1863                 BUG_ON(ret > 0);
1864                 return ERR_PTR(ret);
1865         }
1866         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1867         if (!buf) {
1868                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1869                                   root->root_key.objectid, ref_generation,
1870                                   0, 0, 0);
1871                 return ERR_PTR(-ENOMEM);
1872         }
1873         btrfs_set_header_generation(buf, trans->transid);
1874         clean_tree_block(trans, root, buf);
1875         wait_on_tree_block_writeback(root, buf);
1876         btrfs_set_buffer_uptodate(buf);
1877
1878         if (PageDirty(buf->first_page)) {
1879                 printk("page %lu dirty\n", buf->first_page->index);
1880                 WARN_ON(1);
1881         }
1882
1883         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1884                          buf->start + buf->len - 1, GFP_NOFS);
1885         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
1886                         buf->start, buf->start + buf->len - 1,
1887                         EXTENT_CSUM, GFP_NOFS);
1888         buf->flags |= EXTENT_CSUM;
1889         if (!btrfs_test_opt(root, SSD))
1890                 btrfs_set_buffer_defrag(buf);
1891         trans->blocks_used++;
1892         return buf;
1893 }
1894
1895 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1896                                   struct btrfs_root *root,
1897                                   struct extent_buffer *leaf)
1898 {
1899         u64 leaf_owner;
1900         u64 leaf_generation;
1901         struct btrfs_key key;
1902         struct btrfs_file_extent_item *fi;
1903         int i;
1904         int nritems;
1905         int ret;
1906
1907         BUG_ON(!btrfs_is_leaf(leaf));
1908         nritems = btrfs_header_nritems(leaf);
1909         leaf_owner = btrfs_header_owner(leaf);
1910         leaf_generation = btrfs_header_generation(leaf);
1911
1912         for (i = 0; i < nritems; i++) {
1913                 u64 disk_bytenr;
1914
1915                 btrfs_item_key_to_cpu(leaf, &key, i);
1916                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1917                         continue;
1918                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1919                 if (btrfs_file_extent_type(leaf, fi) ==
1920                     BTRFS_FILE_EXTENT_INLINE)
1921                         continue;
1922                 /*
1923                  * FIXME make sure to insert a trans record that
1924                  * repeats the snapshot del on crash
1925                  */
1926                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1927                 if (disk_bytenr == 0)
1928                         continue;
1929                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1930                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1931                                 leaf_owner, leaf_generation,
1932                                 key.objectid, key.offset, 0);
1933                 BUG_ON(ret);
1934         }
1935         return 0;
1936 }
1937
1938 static void noinline reada_walk_down(struct btrfs_root *root,
1939                                      struct extent_buffer *node,
1940                                      int slot)
1941 {
1942         u64 bytenr;
1943         u64 last = 0;
1944         u32 nritems;
1945         u32 refs;
1946         u32 blocksize;
1947         int ret;
1948         int i;
1949         int level;
1950         int skipped = 0;
1951
1952         nritems = btrfs_header_nritems(node);
1953         level = btrfs_header_level(node);
1954         if (level)
1955                 return;
1956
1957         for (i = slot; i < nritems && skipped < 32; i++) {
1958                 bytenr = btrfs_node_blockptr(node, i);
1959                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1960                              (last > bytenr && last - bytenr > 32 * 1024))) {
1961                         skipped++;
1962                         continue;
1963                 }
1964                 blocksize = btrfs_level_size(root, level - 1);
1965                 if (i != slot) {
1966                         ret = lookup_extent_ref(NULL, root, bytenr,
1967                                                 blocksize, &refs);
1968                         BUG_ON(ret);
1969                         if (refs != 1) {
1970                                 skipped++;
1971                                 continue;
1972                         }
1973                 }
1974                 mutex_unlock(&root->fs_info->fs_mutex);
1975                 ret = readahead_tree_block(root, bytenr, blocksize);
1976                 last = bytenr + blocksize;
1977                 cond_resched();
1978                 mutex_lock(&root->fs_info->fs_mutex);
1979                 if (ret)
1980                         break;
1981         }
1982 }
1983
1984 /*
1985  * helper function for drop_snapshot, this walks down the tree dropping ref
1986  * counts as it goes.
1987  */
1988 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1989                                    struct btrfs_root *root,
1990                                    struct btrfs_path *path, int *level)
1991 {
1992         u64 root_owner;
1993         u64 root_gen;
1994         u64 bytenr;
1995         struct extent_buffer *next;
1996         struct extent_buffer *cur;
1997         struct extent_buffer *parent;
1998         u32 blocksize;
1999         int ret;
2000         u32 refs;
2001
2002         WARN_ON(*level < 0);
2003         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2004         ret = lookup_extent_ref(trans, root,
2005                                 path->nodes[*level]->start,
2006                                 path->nodes[*level]->len, &refs);
2007         BUG_ON(ret);
2008         if (refs > 1)
2009                 goto out;
2010
2011         /*
2012          * walk down to the last node level and free all the leaves
2013          */
2014         while(*level >= 0) {
2015                 WARN_ON(*level < 0);
2016                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2017                 cur = path->nodes[*level];
2018
2019                 if (btrfs_header_level(cur) != *level)
2020                         WARN_ON(1);
2021
2022                 if (path->slots[*level] >=
2023                     btrfs_header_nritems(cur))
2024                         break;
2025                 if (*level == 0) {
2026                         ret = drop_leaf_ref(trans, root, cur);
2027                         BUG_ON(ret);
2028                         break;
2029                 }
2030                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2031                 blocksize = btrfs_level_size(root, *level - 1);
2032                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2033                 BUG_ON(ret);
2034                 if (refs != 1) {
2035                         parent = path->nodes[*level];
2036                         root_owner = btrfs_header_owner(parent);
2037                         root_gen = btrfs_header_generation(parent);
2038                         path->slots[*level]++;
2039                         ret = btrfs_free_extent(trans, root, bytenr,
2040                                                 blocksize, root_owner,
2041                                                 root_gen, 0, 0, 1);
2042                         BUG_ON(ret);
2043                         continue;
2044                 }
2045                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2046                 if (!next || !btrfs_buffer_uptodate(next)) {
2047                         free_extent_buffer(next);
2048                         reada_walk_down(root, cur, path->slots[*level]);
2049                         next = read_tree_block(root, bytenr, blocksize);
2050
2051                         /* we used to drop the lock above, keep the
2052                          * code to double check so that we won't forget
2053                          * when we drop the lock again in the future
2054                          */
2055                         ret = lookup_extent_ref(trans, root, bytenr,
2056                                                 blocksize, &refs);
2057                         BUG_ON(ret);
2058                         if (refs != 1) {
2059                                 parent = path->nodes[*level];
2060                                 root_owner = btrfs_header_owner(parent);
2061                                 root_gen = btrfs_header_generation(parent);
2062
2063                                 path->slots[*level]++;
2064                                 free_extent_buffer(next);
2065                                 ret = btrfs_free_extent(trans, root, bytenr,
2066                                                         blocksize,
2067                                                         root_owner,
2068                                                         root_gen, 0, 0, 1);
2069                                 BUG_ON(ret);
2070                                 continue;
2071                         }
2072                 } else if (next) {
2073                         btrfs_verify_block_csum(root, next);
2074                 }
2075                 WARN_ON(*level <= 0);
2076                 if (path->nodes[*level-1])
2077                         free_extent_buffer(path->nodes[*level-1]);
2078                 path->nodes[*level-1] = next;
2079                 *level = btrfs_header_level(next);
2080                 path->slots[*level] = 0;
2081         }
2082 out:
2083         WARN_ON(*level < 0);
2084         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2085
2086         if (path->nodes[*level] == root->node) {
2087                 root_owner = root->root_key.objectid;
2088                 parent = path->nodes[*level];
2089         } else {
2090                 parent = path->nodes[*level + 1];
2091                 root_owner = btrfs_header_owner(parent);
2092         }
2093
2094         root_gen = btrfs_header_generation(parent);
2095         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2096                                 path->nodes[*level]->len,
2097                                 root_owner, root_gen, 0, 0, 1);
2098         free_extent_buffer(path->nodes[*level]);
2099         path->nodes[*level] = NULL;
2100         *level += 1;
2101         BUG_ON(ret);
2102         return 0;
2103 }
2104
2105 /*
2106  * helper for dropping snapshots.  This walks back up the tree in the path
2107  * to find the first node higher up where we haven't yet gone through
2108  * all the slots
2109  */
2110 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2111                                  struct btrfs_root *root,
2112                                  struct btrfs_path *path, int *level)
2113 {
2114         u64 root_owner;
2115         u64 root_gen;
2116         struct btrfs_root_item *root_item = &root->root_item;
2117         int i;
2118         int slot;
2119         int ret;
2120
2121         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2122                 slot = path->slots[i];
2123                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2124                         struct extent_buffer *node;
2125                         struct btrfs_disk_key disk_key;
2126                         node = path->nodes[i];
2127                         path->slots[i]++;
2128                         *level = i;
2129                         WARN_ON(*level == 0);
2130                         btrfs_node_key(node, &disk_key, path->slots[i]);
2131                         memcpy(&root_item->drop_progress,
2132                                &disk_key, sizeof(disk_key));
2133                         root_item->drop_level = i;
2134                         return 0;
2135                 } else {
2136                         if (path->nodes[*level] == root->node) {
2137                                 root_owner = root->root_key.objectid;
2138                                 root_gen =
2139                                    btrfs_header_generation(path->nodes[*level]);
2140                         } else {
2141                                 struct extent_buffer *node;
2142                                 node = path->nodes[*level + 1];
2143                                 root_owner = btrfs_header_owner(node);
2144                                 root_gen = btrfs_header_generation(node);
2145                         }
2146                         ret = btrfs_free_extent(trans, root,
2147                                                 path->nodes[*level]->start,
2148                                                 path->nodes[*level]->len,
2149                                                 root_owner, root_gen, 0, 0, 1);
2150                         BUG_ON(ret);
2151                         free_extent_buffer(path->nodes[*level]);
2152                         path->nodes[*level] = NULL;
2153                         *level = i + 1;
2154                 }
2155         }
2156         return 1;
2157 }
2158
2159 /*
2160  * drop the reference count on the tree rooted at 'snap'.  This traverses
2161  * the tree freeing any blocks that have a ref count of zero after being
2162  * decremented.
2163  */
2164 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2165                         *root)
2166 {
2167         int ret = 0;
2168         int wret;
2169         int level;
2170         struct btrfs_path *path;
2171         int i;
2172         int orig_level;
2173         struct btrfs_root_item *root_item = &root->root_item;
2174
2175         path = btrfs_alloc_path();
2176         BUG_ON(!path);
2177
2178         level = btrfs_header_level(root->node);
2179         orig_level = level;
2180         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2181                 path->nodes[level] = root->node;
2182                 extent_buffer_get(root->node);
2183                 path->slots[level] = 0;
2184         } else {
2185                 struct btrfs_key key;
2186                 struct btrfs_disk_key found_key;
2187                 struct extent_buffer *node;
2188
2189                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2190                 level = root_item->drop_level;
2191                 path->lowest_level = level;
2192                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2193                 if (wret < 0) {
2194                         ret = wret;
2195                         goto out;
2196                 }
2197                 node = path->nodes[level];
2198                 btrfs_node_key(node, &found_key, path->slots[level]);
2199                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2200                                sizeof(found_key)));
2201         }
2202         while(1) {
2203                 wret = walk_down_tree(trans, root, path, &level);
2204                 if (wret > 0)
2205                         break;
2206                 if (wret < 0)
2207                         ret = wret;
2208
2209                 wret = walk_up_tree(trans, root, path, &level);
2210                 if (wret > 0)
2211                         break;
2212                 if (wret < 0)
2213                         ret = wret;
2214                 ret = -EAGAIN;
2215                 break;
2216         }
2217         for (i = 0; i <= orig_level; i++) {
2218                 if (path->nodes[i]) {
2219                         free_extent_buffer(path->nodes[i]);
2220                         path->nodes[i] = NULL;
2221                 }
2222         }
2223 out:
2224         btrfs_free_path(path);
2225         return ret;
2226 }
2227
2228 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2229 {
2230         u64 start;
2231         u64 end;
2232         u64 ptr;
2233         int ret;
2234         while(1) {
2235                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2236                                             &start, &end, (unsigned int)-1);
2237                 if (ret)
2238                         break;
2239                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2240                 if (!ret)
2241                         kfree((void *)(unsigned long)ptr);
2242                 clear_extent_bits(&info->block_group_cache, start,
2243                                   end, (unsigned int)-1, GFP_NOFS);
2244         }
2245         while(1) {
2246                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2247                                             &start, &end, EXTENT_DIRTY);
2248                 if (ret)
2249                         break;
2250                 clear_extent_dirty(&info->free_space_cache, start,
2251                                    end, GFP_NOFS);
2252         }
2253         return 0;
2254 }
2255
2256 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2257                                          u64 len)
2258 {
2259         u64 page_start;
2260         u64 page_end;
2261         u64 delalloc_start;
2262         u64 existing_delalloc;
2263         unsigned long last_index;
2264         unsigned long i;
2265         struct page *page;
2266         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2267         struct file_ra_state *ra;
2268
2269         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2270
2271         mutex_lock(&inode->i_mutex);
2272         i = start >> PAGE_CACHE_SHIFT;
2273         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2274
2275         file_ra_state_init(ra, inode->i_mapping);
2276         btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2277         kfree(ra);
2278
2279         for (; i <= last_index; i++) {
2280                 page = grab_cache_page(inode->i_mapping, i);
2281                 if (!page)
2282                         goto out_unlock;
2283                 if (!PageUptodate(page)) {
2284                         btrfs_readpage(NULL, page);
2285                         lock_page(page);
2286                         if (!PageUptodate(page)) {
2287                                 unlock_page(page);
2288                                 page_cache_release(page);
2289                                 goto out_unlock;
2290                         }
2291                 }
2292                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2293                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2294
2295                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2296
2297                 delalloc_start = page_start;
2298                 existing_delalloc = count_range_bits(io_tree,
2299                                              &delalloc_start, page_end,
2300                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2301
2302                 set_extent_delalloc(io_tree, page_start,
2303                                     page_end, GFP_NOFS);
2304
2305                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2306                 set_page_dirty(page);
2307                 unlock_page(page);
2308                 page_cache_release(page);
2309         }
2310
2311 out_unlock:
2312         mutex_unlock(&inode->i_mutex);
2313         return 0;
2314 }
2315
2316 /*
2317  * note, this releases the path
2318  */
2319 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2320                                   struct btrfs_path *path,
2321                                   struct btrfs_key *extent_key)
2322 {
2323         struct inode *inode;
2324         struct btrfs_root *found_root;
2325         struct btrfs_key *root_location;
2326         struct btrfs_extent_ref *ref;
2327         u64 ref_root;
2328         u64 ref_gen;
2329         u64 ref_objectid;
2330         u64 ref_offset;
2331         int ret;
2332
2333         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2334                              struct btrfs_extent_ref);
2335         ref_root = btrfs_ref_root(path->nodes[0], ref);
2336         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2337         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2338         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2339         btrfs_release_path(extent_root, path);
2340
2341         root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2342         root_location->objectid = ref_root;
2343         if (ref_gen == 0)
2344                 root_location->offset = 0;
2345         else
2346                 root_location->offset = (u64)-1;
2347         root_location->type = BTRFS_ROOT_ITEM_KEY;
2348
2349         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2350                                                 root_location);
2351         BUG_ON(!found_root);
2352         kfree(root_location);
2353
2354         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2355                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2356                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2357                                           ref_objectid, found_root);
2358                 if (inode->i_state & I_NEW) {
2359                         /* the inode and parent dir are two different roots */
2360                         BTRFS_I(inode)->root = found_root;
2361                         BTRFS_I(inode)->location.objectid = ref_objectid;
2362                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2363                         BTRFS_I(inode)->location.offset = 0;
2364                         btrfs_read_locked_inode(inode);
2365                         unlock_new_inode(inode);
2366
2367                 }
2368                 /* this can happen if the reference is not against
2369                  * the latest version of the tree root
2370                  */
2371                 if (is_bad_inode(inode)) {
2372                         mutex_lock(&extent_root->fs_info->fs_mutex);
2373                         goto out;
2374                 }
2375                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2376                 /* FIXME, data=ordered will help get rid of this */
2377                 filemap_fdatawrite(inode->i_mapping);
2378                 iput(inode);
2379                 mutex_lock(&extent_root->fs_info->fs_mutex);
2380         } else {
2381                 struct btrfs_trans_handle *trans;
2382                 struct btrfs_key found_key;
2383                 struct extent_buffer *eb;
2384                 int level;
2385                 int i;
2386
2387                 trans = btrfs_start_transaction(found_root, 1);
2388                 eb = read_tree_block(found_root, extent_key->objectid,
2389                                      extent_key->offset);
2390                 level = btrfs_header_level(eb);
2391
2392                 if (level == 0)
2393                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2394                 else
2395                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2396
2397                 free_extent_buffer(eb);
2398
2399                 path->lowest_level = level;
2400                 path->reada = 2;
2401                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2402                                         0, 1);
2403                 path->lowest_level = 0;
2404                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2405                         if (!path->nodes[i])
2406                                 break;
2407                         free_extent_buffer(path->nodes[i]);
2408                         path->nodes[i] = NULL;
2409                 }
2410                 btrfs_release_path(found_root, path);
2411                 btrfs_end_transaction(trans, found_root);
2412         }
2413
2414 out:
2415         return 0;
2416 }
2417
2418 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2419                                         struct btrfs_path *path,
2420                                         struct btrfs_key *extent_key)
2421 {
2422         struct btrfs_key key;
2423         struct btrfs_key found_key;
2424         struct extent_buffer *leaf;
2425         u32 nritems;
2426         u32 item_size;
2427         int ret = 0;
2428
2429         key.objectid = extent_key->objectid;
2430         key.type = BTRFS_EXTENT_REF_KEY;
2431         key.offset = 0;
2432
2433         while(1) {
2434                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2435
2436                 if (ret < 0)
2437                         goto out;
2438
2439                 ret = 0;
2440                 leaf = path->nodes[0];
2441                 nritems = btrfs_header_nritems(leaf);
2442                 if (path->slots[0] == nritems)
2443                         goto out;
2444
2445                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2446                 if (found_key.objectid != extent_key->objectid)
2447                         break;
2448
2449                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2450                         break;
2451
2452                 key.offset = found_key.offset + 1;
2453                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2454
2455                 ret = relocate_one_reference(extent_root, path, extent_key);
2456                 if (ret)
2457                         goto out;
2458         }
2459         ret = 0;
2460 out:
2461         btrfs_release_path(extent_root, path);
2462         return ret;
2463 }
2464
2465 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2466 {
2467         struct btrfs_trans_handle *trans;
2468         struct btrfs_root *tree_root = root->fs_info->tree_root;
2469         struct btrfs_path *path;
2470         u64 cur_byte;
2471         u64 total_found;
2472         struct btrfs_fs_info *info = root->fs_info;
2473         struct extent_io_tree *block_group_cache;
2474         struct btrfs_key key;
2475         struct btrfs_key found_key;
2476         struct extent_buffer *leaf;
2477         u32 nritems;
2478         int ret;
2479         int progress = 0;
2480
2481         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2482         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2483                            GFP_NOFS);
2484         block_group_cache = &info->block_group_cache;
2485         path = btrfs_alloc_path();
2486         root = root->fs_info->extent_root;
2487         path->reada = 2;
2488
2489 again:
2490         total_found = 0;
2491         key.objectid = new_size;
2492         key.offset = 0;
2493         key.type = 0;
2494         cur_byte = key.objectid;
2495
2496         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2497         if (ret < 0)
2498                 goto out;
2499
2500         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2501         if (ret < 0)
2502                 goto out;
2503         if (ret == 0) {
2504                 leaf = path->nodes[0];
2505                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2506                 if (found_key.objectid + found_key.offset > new_size) {
2507                         cur_byte = found_key.objectid;
2508                         key.objectid = cur_byte;
2509                 }
2510         }
2511         btrfs_release_path(root, path);
2512
2513         while(1) {
2514                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2515                 if (ret < 0)
2516                         goto out;
2517
2518                 leaf = path->nodes[0];
2519                 nritems = btrfs_header_nritems(leaf);
2520 next:
2521                 if (path->slots[0] >= nritems) {
2522                         ret = btrfs_next_leaf(root, path);
2523                         if (ret < 0)
2524                                 goto out;
2525                         if (ret == 1) {
2526                                 ret = 0;
2527                                 break;
2528                         }
2529                         leaf = path->nodes[0];
2530                         nritems = btrfs_header_nritems(leaf);
2531                 }
2532
2533                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2534
2535                 if (progress && need_resched()) {
2536                         memcpy(&key, &found_key, sizeof(key));
2537                         mutex_unlock(&root->fs_info->fs_mutex);
2538                         cond_resched();
2539                         mutex_lock(&root->fs_info->fs_mutex);
2540                         btrfs_release_path(root, path);
2541                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2542                         progress = 0;
2543                         goto next;
2544                 }
2545                 progress = 1;
2546
2547                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2548                     found_key.objectid + found_key.offset <= cur_byte) {
2549                         path->slots[0]++;
2550                         goto next;
2551                 }
2552
2553                 total_found++;
2554                 cur_byte = found_key.objectid + found_key.offset;
2555                 key.objectid = cur_byte;
2556                 btrfs_release_path(root, path);
2557                 ret = relocate_one_extent(root, path, &found_key);
2558         }
2559
2560         btrfs_release_path(root, path);
2561
2562         if (total_found > 0) {
2563                 trans = btrfs_start_transaction(tree_root, 1);
2564                 btrfs_commit_transaction(trans, tree_root);
2565
2566                 mutex_unlock(&root->fs_info->fs_mutex);
2567                 btrfs_clean_old_snapshots(tree_root);
2568                 mutex_lock(&root->fs_info->fs_mutex);
2569
2570                 trans = btrfs_start_transaction(tree_root, 1);
2571                 btrfs_commit_transaction(trans, tree_root);
2572                 goto again;
2573         }
2574
2575         trans = btrfs_start_transaction(root, 1);
2576         key.objectid = new_size;
2577         key.offset = 0;
2578         key.type = 0;
2579         while(1) {
2580                 u64 ptr;
2581
2582                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2583                 if (ret < 0)
2584                         goto out;
2585
2586                 leaf = path->nodes[0];
2587                 nritems = btrfs_header_nritems(leaf);
2588 bg_next:
2589                 if (path->slots[0] >= nritems) {
2590                         ret = btrfs_next_leaf(root, path);
2591                         if (ret < 0)
2592                                 break;
2593                         if (ret == 1) {
2594                                 ret = 0;
2595                                 break;
2596                         }
2597                         leaf = path->nodes[0];
2598                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2599
2600                         /*
2601                          * btrfs_next_leaf doesn't cow buffers, we have to
2602                          * do the search again
2603                          */
2604                         memcpy(&key, &found_key, sizeof(key));
2605                         btrfs_release_path(root, path);
2606                         goto resched_check;
2607                 }
2608
2609                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2610                 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2611                         printk("shrinker found key %Lu %u %Lu\n",
2612                                 found_key.objectid, found_key.type,
2613                                 found_key.offset);
2614                         path->slots[0]++;
2615                         goto bg_next;
2616                 }
2617                 ret = get_state_private(&info->block_group_cache,
2618                                         found_key.objectid, &ptr);
2619                 if (!ret)
2620                         kfree((void *)(unsigned long)ptr);
2621
2622                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2623                                   found_key.objectid + found_key.offset - 1,
2624                                   (unsigned int)-1, GFP_NOFS);
2625
2626                 key.objectid = found_key.objectid + 1;
2627                 btrfs_del_item(trans, root, path);
2628                 btrfs_release_path(root, path);
2629 resched_check:
2630                 if (need_resched()) {
2631                         mutex_unlock(&root->fs_info->fs_mutex);
2632                         cond_resched();
2633                         mutex_lock(&root->fs_info->fs_mutex);
2634                 }
2635         }
2636         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2637                            GFP_NOFS);
2638         btrfs_commit_transaction(trans, root);
2639 out:
2640         btrfs_free_path(path);
2641         return ret;
2642 }
2643
2644 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2645                            struct btrfs_root *root, u64 new_size)
2646 {
2647         btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
2648         return 0;
2649 }
2650
2651 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2652                            struct btrfs_key *key)
2653 {
2654         int ret;
2655         struct btrfs_key found_key;
2656         struct extent_buffer *leaf;
2657         int slot;
2658
2659         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2660         if (ret < 0)
2661                 return ret;
2662         while(1) {
2663                 slot = path->slots[0];
2664                 leaf = path->nodes[0];
2665                 if (slot >= btrfs_header_nritems(leaf)) {
2666                         ret = btrfs_next_leaf(root, path);
2667                         if (ret == 0)
2668                                 continue;
2669                         if (ret < 0)
2670                                 goto error;
2671                         break;
2672                 }
2673                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2674
2675                 if (found_key.objectid >= key->objectid &&
2676                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2677                         return 0;
2678                 path->slots[0]++;
2679         }
2680         ret = -ENOENT;
2681 error:
2682         return ret;
2683 }
2684
2685 int btrfs_read_block_groups(struct btrfs_root *root)
2686 {
2687         struct btrfs_path *path;
2688         int ret;
2689         int bit;
2690         struct btrfs_block_group_cache *cache;
2691         struct btrfs_fs_info *info = root->fs_info;
2692         struct btrfs_space_info *space_info;
2693         struct extent_io_tree *block_group_cache;
2694         struct btrfs_key key;
2695         struct btrfs_key found_key;
2696         struct extent_buffer *leaf;
2697
2698         block_group_cache = &info->block_group_cache;
2699         root = info->extent_root;
2700         key.objectid = 0;
2701         key.offset = 0;
2702         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2703         path = btrfs_alloc_path();
2704         if (!path)
2705                 return -ENOMEM;
2706
2707         while(1) {
2708                 ret = find_first_block_group(root, path, &key);
2709                 if (ret > 0) {
2710                         ret = 0;
2711                         goto error;
2712                 }
2713                 if (ret != 0)
2714                         goto error;
2715
2716                 leaf = path->nodes[0];
2717                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2718                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2719                 if (!cache) {
2720                         ret = -ENOMEM;
2721                         break;
2722                 }
2723
2724                 read_extent_buffer(leaf, &cache->item,
2725                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2726                                    sizeof(cache->item));
2727                 memcpy(&cache->key, &found_key, sizeof(found_key));
2728                 cache->cached = 0;
2729                 cache->pinned = 0;
2730
2731                 key.objectid = found_key.objectid + found_key.offset;
2732                 btrfs_release_path(root, path);
2733                 cache->flags = btrfs_block_group_flags(&cache->item);
2734                 bit = 0;
2735                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2736                         bit = BLOCK_GROUP_DATA;
2737                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2738                         bit = BLOCK_GROUP_SYSTEM;
2739                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2740                         bit = BLOCK_GROUP_METADATA;
2741                 }
2742                 if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) {
2743                         if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2744                                 info->extra_data_alloc_bits =
2745                                         BTRFS_BLOCK_GROUP_RAID0;
2746                         }
2747                         if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2748                                 info->extra_alloc_bits =
2749                                         BTRFS_BLOCK_GROUP_RAID0;
2750                         }
2751                 }
2752
2753                 ret = update_space_info(info, cache->flags, found_key.offset,
2754                                         btrfs_block_group_used(&cache->item),
2755                                         &space_info);
2756                 BUG_ON(ret);
2757                 cache->space_info = space_info;
2758
2759                 /* use EXTENT_LOCKED to prevent merging */
2760                 set_extent_bits(block_group_cache, found_key.objectid,
2761                                 found_key.objectid + found_key.offset - 1,
2762                                 bit | EXTENT_LOCKED, GFP_NOFS);
2763                 set_state_private(block_group_cache, found_key.objectid,
2764                                   (unsigned long)cache);
2765
2766                 if (key.objectid >=
2767                     btrfs_super_total_bytes(&info->super_copy))
2768                         break;
2769         }
2770         ret = 0;
2771 error:
2772         btrfs_free_path(path);
2773         return ret;
2774 }
2775
2776 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2777                            struct btrfs_root *root, u64 bytes_used,
2778                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2779                            u64 size)
2780 {
2781         int ret;
2782         int bit = 0;
2783         struct btrfs_root *extent_root;
2784         struct btrfs_block_group_cache *cache;
2785         struct extent_io_tree *block_group_cache;
2786
2787         extent_root = root->fs_info->extent_root;
2788         block_group_cache = &root->fs_info->block_group_cache;
2789
2790         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2791         BUG_ON(!cache);
2792         cache->key.objectid = chunk_objectid;
2793         cache->key.offset = size;
2794         cache->cached = 0;
2795         cache->pinned = 0;
2796         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2797         memset(&cache->item, 0, sizeof(cache->item));
2798         btrfs_set_block_group_used(&cache->item, bytes_used);
2799         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2800         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2801         cache->flags = type;
2802         btrfs_set_block_group_flags(&cache->item, type);
2803
2804         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2805                                 &cache->space_info);
2806         BUG_ON(ret);
2807
2808         if (type & BTRFS_BLOCK_GROUP_DATA) {
2809                 bit = BLOCK_GROUP_DATA;
2810         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2811                 bit = BLOCK_GROUP_SYSTEM;
2812         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2813                 bit = BLOCK_GROUP_METADATA;
2814         }
2815         set_extent_bits(block_group_cache, chunk_objectid,
2816                         chunk_objectid + size - 1,
2817                         bit | EXTENT_LOCKED, GFP_NOFS);
2818         set_state_private(block_group_cache, chunk_objectid,
2819                           (unsigned long)cache);
2820
2821         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2822                                 sizeof(cache->item));
2823         BUG_ON(ret);
2824
2825         finish_current_insert(trans, extent_root);
2826         ret = del_pending_extents(trans, extent_root);
2827         BUG_ON(ret);
2828         return 0;
2829 }