]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/extent-tree.c
Btrfs: reference counts on data extents
[karo-tx-linux.git] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
16                          *root, u64 blocknr, u64 num_blocks)
17 {
18         struct btrfs_path path;
19         int ret;
20         struct btrfs_key key;
21         struct btrfs_leaf *l;
22         struct btrfs_extent_item *item;
23         struct btrfs_key ins;
24         u32 refs;
25
26         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
27                          &ins);
28         btrfs_init_path(&path);
29         key.objectid = blocknr;
30         key.flags = 0;
31         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
32         key.offset = num_blocks;
33         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
34                                 0, 1);
35         if (ret != 0)
36                 BUG();
37         BUG_ON(ret != 0);
38         l = btrfs_buffer_leaf(path.nodes[0]);
39         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
40         refs = btrfs_extent_refs(item);
41         btrfs_set_extent_refs(item, refs + 1);
42         mark_buffer_dirty(path.nodes[0]);
43
44         btrfs_release_path(root->fs_info->extent_root, &path);
45         finish_current_insert(trans, root->fs_info->extent_root);
46         del_pending_extents(trans, root->fs_info->extent_root);
47         return 0;
48 }
49
50 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
51                             *root, u64 blocknr, u64 num_blocks, u32 *refs)
52 {
53         struct btrfs_path path;
54         int ret;
55         struct btrfs_key key;
56         struct btrfs_leaf *l;
57         struct btrfs_extent_item *item;
58         btrfs_init_path(&path);
59         key.objectid = blocknr;
60         key.offset = num_blocks;
61         key.flags = 0;
62         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
63         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
64                                 0, 0);
65         if (ret != 0)
66                 BUG();
67         l = btrfs_buffer_leaf(path.nodes[0]);
68         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
69         *refs = btrfs_extent_refs(item);
70         btrfs_release_path(root->fs_info->extent_root, &path);
71         return 0;
72 }
73
74 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
75                   struct buffer_head *buf)
76 {
77         u64 blocknr;
78         struct btrfs_node *buf_node;
79         struct btrfs_leaf *buf_leaf;
80         struct btrfs_disk_key *key;
81         struct btrfs_file_extent_item *fi;
82         int i;
83         int leaf;
84         int ret;
85
86         if (!root->ref_cows)
87                 return 0;
88         buf_node = btrfs_buffer_node(buf);
89         leaf = btrfs_is_leaf(buf_node);
90         buf_leaf = btrfs_buffer_leaf(buf);
91         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
92                 if (leaf) {
93                         key = &buf_leaf->items[i].key;
94                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
95                                 continue;
96                         fi = btrfs_item_ptr(buf_leaf, i,
97                                             struct btrfs_file_extent_item);
98                         ret = inc_block_ref(trans, root,
99                                     btrfs_file_extent_disk_blocknr(fi),
100                                     btrfs_file_extent_disk_num_blocks(fi));
101                         BUG_ON(ret);
102                 } else {
103                         blocknr = btrfs_node_blockptr(buf_node, i);
104                         ret = inc_block_ref(trans, root, blocknr, 1);
105                         BUG_ON(ret);
106                 }
107         }
108         return 0;
109 }
110
111 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
112                                btrfs_root *root)
113 {
114         unsigned long gang[8];
115         u64 first = 0;
116         int ret;
117         int i;
118         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
119
120         while(1) {
121                 ret = find_first_radix_bit(pinned_radix, gang,
122                                            ARRAY_SIZE(gang));
123                 if (!ret)
124                         break;
125                 if (!first)
126                         first = gang[0];
127                 for (i = 0; i < ret; i++) {
128                         clear_radix_bit(pinned_radix, gang[i]);
129                 }
130         }
131         if (root->fs_info->last_insert.objectid > first)
132                 root->fs_info->last_insert.objectid = first;
133         root->fs_info->last_insert.offset = 0;
134         return 0;
135 }
136
137 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
138                                  btrfs_root *extent_root)
139 {
140         struct btrfs_key ins;
141         struct btrfs_extent_item extent_item;
142         int i;
143         int ret;
144         u64 super_blocks_used;
145         struct btrfs_fs_info *info = extent_root->fs_info;
146
147         btrfs_set_extent_refs(&extent_item, 1);
148         btrfs_set_extent_owner(&extent_item,
149                 btrfs_header_parentid(btrfs_buffer_header(extent_root->node)));
150         ins.offset = 1;
151         ins.flags = 0;
152         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
153
154         for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) {
155                 ins.objectid = extent_root->fs_info->current_insert.objectid +
156                                 i;
157                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
158                 btrfs_set_super_blocks_used(info->disk_super,
159                                             super_blocks_used + 1);
160                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
161                                         sizeof(extent_item));
162                 BUG_ON(ret);
163         }
164         extent_root->fs_info->current_insert.offset = 0;
165         return 0;
166 }
167
168 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
169 {
170         int err;
171         struct btrfs_header *header;
172         struct buffer_head *bh;
173
174         bh = sb_find_get_block(root->fs_info->sb, blocknr);
175         if (bh) {
176                 header = btrfs_buffer_header(bh);
177                 if (btrfs_header_generation(header) ==
178                     root->fs_info->running_transaction->transid) {
179                         brelse(bh);
180                         return 0;
181                 }
182                 brelse(bh);
183         }
184         if (pending)
185                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
186         else
187                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
188         BUG_ON(err);
189         return 0;
190 }
191
192 /*
193  * remove an extent from the root, returns 0 on success
194  */
195 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
196                          *root, u64 blocknr, u64 num_blocks, int pin)
197 {
198         struct btrfs_path path;
199         struct btrfs_key key;
200         struct btrfs_fs_info *info = root->fs_info;
201         struct btrfs_root *extent_root = info->extent_root;
202         int ret;
203         struct btrfs_extent_item *ei;
204         struct btrfs_key ins;
205         u32 refs;
206
207         key.objectid = blocknr;
208         key.flags = 0;
209         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
210         key.offset = num_blocks;
211
212         find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
213         btrfs_init_path(&path);
214         ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
215         if (ret) {
216                 printk("failed to find %Lu\n", key.objectid);
217                 btrfs_print_tree(extent_root, extent_root->node);
218                 printk("failed to find %Lu\n", key.objectid);
219                 BUG();
220         }
221         ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
222                             struct btrfs_extent_item);
223         BUG_ON(ei->refs == 0);
224         refs = btrfs_extent_refs(ei) - 1;
225         btrfs_set_extent_refs(ei, refs);
226         if (refs == 0) {
227                 u64 super_blocks_used;
228
229                 if (pin) {
230                         ret = pin_down_block(root, blocknr, 0);
231                         BUG_ON(ret);
232                 }
233
234                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
235                 btrfs_set_super_blocks_used(info->disk_super,
236                                             super_blocks_used - num_blocks);
237                 ret = btrfs_del_item(trans, extent_root, &path);
238                 if (extent_root->fs_info->last_insert.objectid > blocknr)
239                         extent_root->fs_info->last_insert.objectid = blocknr;
240                 if (ret)
241                         BUG();
242         }
243         mark_buffer_dirty(path.nodes[0]);
244         btrfs_release_path(extent_root, &path);
245         finish_current_insert(trans, extent_root);
246         return ret;
247 }
248
249 /*
250  * find all the blocks marked as pending in the radix tree and remove
251  * them from the extent map
252  */
253 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
254                                btrfs_root *extent_root)
255 {
256         int ret;
257         int wret;
258         int err = 0;
259         unsigned long gang[4];
260         int i;
261         struct radix_tree_root *pending_radix;
262         struct radix_tree_root *pinned_radix;
263
264         pending_radix = &extent_root->fs_info->pending_del_radix;
265         pinned_radix = &extent_root->fs_info->pinned_radix;
266
267         while(1) {
268                 ret = find_first_radix_bit(pending_radix, gang,
269                                            ARRAY_SIZE(gang));
270                 if (!ret)
271                         break;
272                 for (i = 0; i < ret; i++) {
273                         wret = set_radix_bit(pinned_radix, gang[i]);
274                         BUG_ON(wret);
275                         wret = clear_radix_bit(pending_radix, gang[i]);
276                         BUG_ON(wret);
277                         wret = __free_extent(trans, extent_root,
278                                              gang[i], 1, 0);
279                         if (wret)
280                                 err = wret;
281                 }
282         }
283         return err;
284 }
285
286 /*
287  * remove an extent from the root, returns 0 on success
288  */
289 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
290                       *root, u64 blocknr, u64 num_blocks, int pin)
291 {
292         struct btrfs_root *extent_root = root->fs_info->extent_root;
293         struct buffer_head *t;
294         int pending_ret;
295         int ret;
296
297         if (root == extent_root) {
298                 t = find_tree_block(root, blocknr);
299                 pin_down_block(root, blocknr, 1);
300                 return 0;
301         }
302         ret = __free_extent(trans, root, blocknr, num_blocks, pin);
303         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
304         return ret ? ret : pending_ret;
305 }
306
307 /*
308  * walks the btree of allocated extents and find a hole of a given size.
309  * The key ins is changed to record the hole:
310  * ins->objectid == block start
311  * ins->flags = BTRFS_EXTENT_ITEM_KEY
312  * ins->offset == number of blocks
313  * Any available blocks before search_start are skipped.
314  */
315 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
316                             *orig_root, u64 num_blocks, u64 search_start, u64
317                             search_end, struct btrfs_key *ins)
318 {
319         struct btrfs_path path;
320         struct btrfs_key key;
321         int ret;
322         u64 hole_size = 0;
323         int slot = 0;
324         u64 last_block = 0;
325         u64 test_block;
326         int start_found;
327         struct btrfs_leaf *l;
328         struct btrfs_root * root = orig_root->fs_info->extent_root;
329         int total_needed = num_blocks;
330         int level;
331
332         level = btrfs_header_level(btrfs_buffer_header(root->node));
333         total_needed += (level + 1) * 3;
334         if (root->fs_info->last_insert.objectid > search_start)
335                 search_start = root->fs_info->last_insert.objectid;
336
337         ins->flags = 0;
338         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
339
340 check_failed:
341         btrfs_init_path(&path);
342         ins->objectid = search_start;
343         ins->offset = 0;
344         start_found = 0;
345         ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
346         if (ret < 0)
347                 goto error;
348
349         if (path.slots[0] > 0)
350                 path.slots[0]--;
351
352         while (1) {
353                 l = btrfs_buffer_leaf(path.nodes[0]);
354                 slot = path.slots[0];
355                 if (slot >= btrfs_header_nritems(&l->header)) {
356                         ret = btrfs_next_leaf(root, &path);
357                         if (ret == 0)
358                                 continue;
359                         if (ret < 0)
360                                 goto error;
361                         if (!start_found) {
362                                 ins->objectid = search_start;
363                                 ins->offset = (u64)-1;
364                                 start_found = 1;
365                                 goto check_pending;
366                         }
367                         ins->objectid = last_block > search_start ?
368                                         last_block : search_start;
369                         ins->offset = (u64)-1;
370                         goto check_pending;
371                 }
372                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
373                 if (key.objectid >= search_start) {
374                         if (start_found) {
375                                 if (last_block < search_start)
376                                         last_block = search_start;
377                                 hole_size = key.objectid - last_block;
378                                 if (hole_size > total_needed) {
379                                         ins->objectid = last_block;
380                                         ins->offset = hole_size;
381                                         goto check_pending;
382                                 }
383                         }
384                 }
385                 start_found = 1;
386                 last_block = key.objectid + key.offset;
387                 path.slots[0]++;
388         }
389         // FIXME -ENOSPC
390 check_pending:
391         /* we have to make sure we didn't find an extent that has already
392          * been allocated by the map tree or the original allocation
393          */
394         btrfs_release_path(root, &path);
395         BUG_ON(ins->objectid < search_start);
396         for (test_block = ins->objectid;
397              test_block < ins->objectid + total_needed; test_block++) {
398                 if (test_radix_bit(&root->fs_info->pinned_radix,
399                                       test_block)) {
400                         search_start = test_block + 1;
401                         goto check_failed;
402                 }
403         }
404         BUG_ON(root->fs_info->current_insert.offset);
405         root->fs_info->current_insert.offset = total_needed - num_blocks;
406         root->fs_info->current_insert.objectid = ins->objectid + num_blocks;
407         root->fs_info->current_insert.flags = 0;
408         root->fs_info->last_insert.objectid = ins->objectid;
409         ins->offset = num_blocks;
410         return 0;
411 error:
412         btrfs_release_path(root, &path);
413         return ret;
414 }
415
416 /*
417  * finds a free extent and does all the dirty work required for allocation
418  * returns the key for the extent through ins, and a tree buffer for
419  * the first block of the extent through buf.
420  *
421  * returns 0 if everything worked, non-zero otherwise.
422  */
423 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
424                         *root, u64 num_blocks, u64 search_start, u64
425                         search_end, u64 owner, struct btrfs_key *ins)
426 {
427         int ret;
428         int pending_ret;
429         u64 super_blocks_used;
430         struct btrfs_fs_info *info = root->fs_info;
431         struct btrfs_root *extent_root = info->extent_root;
432         struct btrfs_extent_item extent_item;
433
434         btrfs_set_extent_refs(&extent_item, 1);
435         btrfs_set_extent_owner(&extent_item, owner);
436
437         if (root == extent_root) {
438                 BUG_ON(extent_root->fs_info->current_insert.offset == 0);
439                 BUG_ON(num_blocks != 1);
440                 BUG_ON(extent_root->fs_info->current_insert.flags ==
441                        extent_root->fs_info->current_insert.offset);
442                 ins->offset = 1;
443                 ins->objectid = extent_root->fs_info->current_insert.objectid +
444                                 extent_root->fs_info->current_insert.flags++;
445                 return 0;
446         }
447         ret = find_free_extent(trans, root, num_blocks, search_start,
448                                search_end, ins);
449         if (ret)
450                 return ret;
451
452         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
453         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
454                                     num_blocks);
455         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
456                                 sizeof(extent_item));
457
458         finish_current_insert(trans, extent_root);
459         pending_ret = del_pending_extents(trans, extent_root);
460         if (ret)
461                 return ret;
462         if (pending_ret)
463                 return pending_ret;
464         return 0;
465 }
466
467 /*
468  * helper function to allocate a block for a given tree
469  * returns the tree buffer or NULL.
470  */
471 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
472                                             struct btrfs_root *root)
473 {
474         struct btrfs_key ins;
475         int ret;
476         struct buffer_head *buf;
477
478         ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1,
479                 btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins);
480         if (ret) {
481                 BUG();
482                 return NULL;
483         }
484         buf = find_tree_block(root, ins.objectid);
485         set_buffer_uptodate(buf);
486         return buf;
487 }
488
489 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
490                          struct btrfs_root *root, struct buffer_head *cur)
491 {
492         struct btrfs_disk_key *key;
493         struct btrfs_leaf *leaf;
494         struct btrfs_file_extent_item *fi;
495         int i;
496         int nritems;
497         int ret;
498
499         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
500         leaf = btrfs_buffer_leaf(cur);
501         nritems = btrfs_header_nritems(&leaf->header);
502         for (i = 0; i < nritems; i++) {
503                 key = &leaf->items[i].key;
504                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
505                         continue;
506                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
507                 /*
508                  * FIXME make sure to insert a trans record that
509                  * repeats the snapshot del on crash
510                  */
511                 ret = btrfs_free_extent(trans, root,
512                                         btrfs_file_extent_disk_blocknr(fi),
513                                         btrfs_file_extent_disk_num_blocks(fi),
514                                         0);
515                 BUG_ON(ret);
516         }
517         return 0;
518 }
519
520 /*
521  * helper function for drop_snapshot, this walks down the tree dropping ref
522  * counts as it goes.
523  */
524 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
525                           *root, struct btrfs_path *path, int *level)
526 {
527         struct buffer_head *next;
528         struct buffer_head *cur;
529         u64 blocknr;
530         int ret;
531         u32 refs;
532
533         ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr,
534                                1, &refs);
535         BUG_ON(ret);
536         if (refs > 1)
537                 goto out;
538         /*
539          * walk down to the last node level and free all the leaves
540          */
541         while(*level >= 0) {
542                 cur = path->nodes[*level];
543                 if (path->slots[*level] >=
544                     btrfs_header_nritems(btrfs_buffer_header(cur)))
545                         break;
546                 if (*level == 0) {
547                         ret = drop_leaf_ref(trans, root, cur);
548                         BUG_ON(ret);
549                         break;
550                 }
551                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
552                                               path->slots[*level]);
553                 ret = lookup_block_ref(trans, root, blocknr, 1, &refs);
554                 BUG_ON(ret);
555                 if (refs != 1) {
556                         path->slots[*level]++;
557                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
558                         BUG_ON(ret);
559                         continue;
560                 }
561                 next = read_tree_block(root, blocknr);
562                 if (path->nodes[*level-1])
563                         btrfs_block_release(root, path->nodes[*level-1]);
564                 path->nodes[*level-1] = next;
565                 *level = btrfs_header_level(btrfs_buffer_header(next));
566                 path->slots[*level] = 0;
567         }
568 out:
569         ret = btrfs_free_extent(trans, root,
570                                 path->nodes[*level]->b_blocknr, 1, 1);
571         btrfs_block_release(root, path->nodes[*level]);
572         path->nodes[*level] = NULL;
573         *level += 1;
574         BUG_ON(ret);
575         return 0;
576 }
577
578 /*
579  * helper for dropping snapshots.  This walks back up the tree in the path
580  * to find the first node higher up where we haven't yet gone through
581  * all the slots
582  */
583 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
584                         *root, struct btrfs_path *path, int *level)
585 {
586         int i;
587         int slot;
588         int ret;
589         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
590                 slot = path->slots[i];
591                 if (slot < btrfs_header_nritems(
592                     btrfs_buffer_header(path->nodes[i])) - 1) {
593                         path->slots[i]++;
594                         *level = i;
595                         return 0;
596                 } else {
597                         ret = btrfs_free_extent(trans, root,
598                                                 path->nodes[*level]->b_blocknr,
599                                                 1, 1);
600                         BUG_ON(ret);
601                         btrfs_block_release(root, path->nodes[*level]);
602                         path->nodes[*level] = NULL;
603                         *level = i + 1;
604                 }
605         }
606         return 1;
607 }
608
609 /*
610  * drop the reference count on the tree rooted at 'snap'.  This traverses
611  * the tree freeing any blocks that have a ref count of zero after being
612  * decremented.
613  */
614 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
615                         *root, struct buffer_head *snap)
616 {
617         int ret = 0;
618         int wret;
619         int level;
620         struct btrfs_path path;
621         int i;
622         int orig_level;
623
624         btrfs_init_path(&path);
625
626         level = btrfs_header_level(btrfs_buffer_header(snap));
627         orig_level = level;
628         path.nodes[level] = snap;
629         path.slots[level] = 0;
630         while(1) {
631                 wret = walk_down_tree(trans, root, &path, &level);
632                 if (wret > 0)
633                         break;
634                 if (wret < 0)
635                         ret = wret;
636
637                 wret = walk_up_tree(trans, root, &path, &level);
638                 if (wret > 0)
639                         break;
640                 if (wret < 0)
641                         ret = wret;
642         }
643         for (i = 0; i <= orig_level; i++) {
644                 if (path.nodes[i]) {
645                         btrfs_block_release(root, path.nodes[i]);
646                 }
647         }
648         return ret;
649 }