2 * Copyright (C) 2015 Facebook. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/kernel.h>
20 #include <linux/vmalloc.h>
24 #include "free-space-tree.h"
25 #include "transaction.h"
28 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
29 struct btrfs_fs_info *fs_info,
30 struct btrfs_block_group_cache *block_group,
31 struct btrfs_path *path);
33 void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
37 u64 num_bitmaps, total_bitmap_size;
40 * We convert to bitmaps when the disk space required for using extents
41 * exceeds that required for using bitmaps.
43 bitmap_range = cache->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
44 num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
46 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
47 total_bitmap_size = num_bitmaps * bitmap_size;
48 cache->bitmap_high_thresh = div_u64(total_bitmap_size,
49 sizeof(struct btrfs_item));
52 * We allow for a small buffer between the high threshold and low
53 * threshold to avoid thrashing back and forth between the two formats.
55 if (cache->bitmap_high_thresh > 100)
56 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
58 cache->bitmap_low_thresh = 0;
61 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
62 struct btrfs_fs_info *fs_info,
63 struct btrfs_block_group_cache *block_group,
64 struct btrfs_path *path)
66 struct btrfs_root *root = fs_info->free_space_root;
67 struct btrfs_free_space_info *info;
69 struct extent_buffer *leaf;
72 key.objectid = block_group->key.objectid;
73 key.type = BTRFS_FREE_SPACE_INFO_KEY;
74 key.offset = block_group->key.offset;
76 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
80 leaf = path->nodes[0];
81 info = btrfs_item_ptr(leaf, path->slots[0],
82 struct btrfs_free_space_info);
83 btrfs_set_free_space_extent_count(leaf, info, 0);
84 btrfs_set_free_space_flags(leaf, info, 0);
85 btrfs_mark_buffer_dirty(leaf);
89 btrfs_release_path(path);
93 struct btrfs_free_space_info *
94 search_free_space_info(struct btrfs_trans_handle *trans,
95 struct btrfs_fs_info *fs_info,
96 struct btrfs_block_group_cache *block_group,
97 struct btrfs_path *path, int cow)
99 struct btrfs_root *root = fs_info->free_space_root;
100 struct btrfs_key key;
103 key.objectid = block_group->key.objectid;
104 key.type = BTRFS_FREE_SPACE_INFO_KEY;
105 key.offset = block_group->key.offset;
107 ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
111 btrfs_warn(fs_info, "missing free space info for %llu\n",
112 block_group->key.objectid);
114 return ERR_PTR(-ENOENT);
117 return btrfs_item_ptr(path->nodes[0], path->slots[0],
118 struct btrfs_free_space_info);
122 * btrfs_search_slot() but we're looking for the greatest key less than the
125 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
126 struct btrfs_root *root,
127 struct btrfs_key *key, struct btrfs_path *p,
128 int ins_len, int cow)
132 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
141 if (p->slots[0] == 0) {
150 static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
152 return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
155 static unsigned long *alloc_bitmap(u32 bitmap_size)
157 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
161 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
162 struct btrfs_fs_info *fs_info,
163 struct btrfs_block_group_cache *block_group,
164 struct btrfs_path *path)
166 struct btrfs_root *root = fs_info->free_space_root;
167 struct btrfs_free_space_info *info;
168 struct btrfs_key key, found_key;
169 struct extent_buffer *leaf;
170 unsigned long *bitmap;
174 u32 bitmap_size, flags, expected_extent_count;
175 u32 extent_count = 0;
179 bitmap_size = free_space_bitmap_size(block_group->key.offset,
180 block_group->sectorsize);
181 bitmap = alloc_bitmap(bitmap_size);
187 start = block_group->key.objectid;
188 end = block_group->key.objectid + block_group->key.offset;
190 key.objectid = end - 1;
192 key.offset = (u64)-1;
195 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
199 leaf = path->nodes[0];
202 while (path->slots[0] > 0) {
203 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
205 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
206 ASSERT(found_key.objectid == block_group->key.objectid);
207 ASSERT(found_key.offset == block_group->key.offset);
210 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
213 ASSERT(found_key.objectid >= start);
214 ASSERT(found_key.objectid < end);
215 ASSERT(found_key.objectid + found_key.offset <= end);
217 first = div_u64(found_key.objectid - start,
218 block_group->sectorsize);
219 last = div_u64(found_key.objectid + found_key.offset - start,
220 block_group->sectorsize);
221 bitmap_set(bitmap, first, last - first);
231 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
234 btrfs_release_path(path);
237 info = search_free_space_info(trans, fs_info, block_group, path, 1);
242 leaf = path->nodes[0];
243 flags = btrfs_free_space_flags(leaf, info);
244 flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
245 btrfs_set_free_space_flags(leaf, info, flags);
246 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
247 btrfs_mark_buffer_dirty(leaf);
248 btrfs_release_path(path);
250 if (extent_count != expected_extent_count) {
251 btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
252 block_group->key.objectid, extent_count,
253 expected_extent_count);
259 bitmap_cursor = (char *)bitmap;
260 bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
267 extent_size = min(end - i, bitmap_range);
268 data_size = free_space_bitmap_size(extent_size,
269 block_group->sectorsize);
272 key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
273 key.offset = extent_size;
275 ret = btrfs_insert_empty_item(trans, root, path, &key,
280 leaf = path->nodes[0];
281 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
282 write_extent_buffer(leaf, bitmap_cursor, ptr,
284 btrfs_mark_buffer_dirty(leaf);
285 btrfs_release_path(path);
288 bitmap_cursor += data_size;
295 btrfs_abort_transaction(trans, root, ret);
299 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
300 struct btrfs_fs_info *fs_info,
301 struct btrfs_block_group_cache *block_group,
302 struct btrfs_path *path)
304 struct btrfs_root *root = fs_info->free_space_root;
305 struct btrfs_free_space_info *info;
306 struct btrfs_key key, found_key;
307 struct extent_buffer *leaf;
308 unsigned long *bitmap;
310 /* Initialize to silence GCC. */
311 u64 extent_start = 0;
313 u32 bitmap_size, flags, expected_extent_count;
314 int prev_bit = 0, bit, bitnr;
315 u32 extent_count = 0;
319 bitmap_size = free_space_bitmap_size(block_group->key.offset,
320 block_group->sectorsize);
321 bitmap = alloc_bitmap(bitmap_size);
327 start = block_group->key.objectid;
328 end = block_group->key.objectid + block_group->key.offset;
330 key.objectid = end - 1;
332 key.offset = (u64)-1;
335 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
339 leaf = path->nodes[0];
342 while (path->slots[0] > 0) {
343 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
345 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
346 ASSERT(found_key.objectid == block_group->key.objectid);
347 ASSERT(found_key.offset == block_group->key.offset);
350 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
353 u32 bitmap_pos, data_size;
355 ASSERT(found_key.objectid >= start);
356 ASSERT(found_key.objectid < end);
357 ASSERT(found_key.objectid + found_key.offset <= end);
359 bitmap_pos = div_u64(found_key.objectid - start,
360 block_group->sectorsize *
362 bitmap_cursor = ((char *)bitmap) + bitmap_pos;
363 data_size = free_space_bitmap_size(found_key.offset,
364 block_group->sectorsize);
366 ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
367 read_extent_buffer(leaf, bitmap_cursor, ptr,
377 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
380 btrfs_release_path(path);
383 info = search_free_space_info(trans, fs_info, block_group, path, 1);
388 leaf = path->nodes[0];
389 flags = btrfs_free_space_flags(leaf, info);
390 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
391 btrfs_set_free_space_flags(leaf, info, flags);
392 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
393 btrfs_mark_buffer_dirty(leaf);
394 btrfs_release_path(path);
398 while (offset < end) {
399 bit = !!test_bit(bitnr, bitmap);
400 if (prev_bit == 0 && bit == 1) {
401 extent_start = offset;
402 } else if (prev_bit == 1 && bit == 0) {
403 key.objectid = extent_start;
404 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
405 key.offset = offset - extent_start;
407 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
410 btrfs_release_path(path);
415 offset += block_group->sectorsize;
419 key.objectid = extent_start;
420 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
421 key.offset = end - extent_start;
423 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
426 btrfs_release_path(path);
431 if (extent_count != expected_extent_count) {
432 btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
433 block_group->key.objectid, extent_count,
434 expected_extent_count);
444 btrfs_abort_transaction(trans, root, ret);
448 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
449 struct btrfs_fs_info *fs_info,
450 struct btrfs_block_group_cache *block_group,
451 struct btrfs_path *path,
454 struct btrfs_free_space_info *info;
459 if (new_extents == 0)
462 info = search_free_space_info(trans, fs_info, block_group, path, 1);
467 flags = btrfs_free_space_flags(path->nodes[0], info);
468 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
470 extent_count += new_extents;
471 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
472 btrfs_mark_buffer_dirty(path->nodes[0]);
473 btrfs_release_path(path);
475 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
476 extent_count > block_group->bitmap_high_thresh) {
477 ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
479 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
480 extent_count < block_group->bitmap_low_thresh) {
481 ret = convert_free_space_to_extents(trans, fs_info, block_group,
489 int free_space_test_bit(struct btrfs_block_group_cache *block_group,
490 struct btrfs_path *path, u64 offset)
492 struct extent_buffer *leaf;
493 struct btrfs_key key;
494 u64 found_start, found_end;
495 unsigned long ptr, i;
497 leaf = path->nodes[0];
498 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
499 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
501 found_start = key.objectid;
502 found_end = key.objectid + key.offset;
503 ASSERT(offset >= found_start && offset < found_end);
505 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
506 i = div_u64(offset - found_start, block_group->sectorsize);
507 return !!extent_buffer_test_bit(leaf, ptr, i);
510 static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
511 struct btrfs_path *path, u64 *start, u64 *size,
514 struct extent_buffer *leaf;
515 struct btrfs_key key;
516 u64 end = *start + *size;
517 u64 found_start, found_end;
518 unsigned long ptr, first, last;
520 leaf = path->nodes[0];
521 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
522 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
524 found_start = key.objectid;
525 found_end = key.objectid + key.offset;
526 ASSERT(*start >= found_start && *start < found_end);
527 ASSERT(end > found_start);
532 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
533 first = div_u64(*start - found_start, block_group->sectorsize);
534 last = div_u64(end - found_start, block_group->sectorsize);
536 extent_buffer_bitmap_set(leaf, ptr, first, last - first);
538 extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
539 btrfs_mark_buffer_dirty(leaf);
541 *size -= end - *start;
546 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
547 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
548 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
551 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
552 struct btrfs_root *root, struct btrfs_path *p)
554 struct btrfs_key key;
556 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
561 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
562 btrfs_release_path(p);
564 key.objectid += key.offset;
566 key.offset = (u64)-1;
568 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
572 * If remove is 1, then we are removing free space, thus clearing bits in the
573 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
576 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
577 struct btrfs_fs_info *fs_info,
578 struct btrfs_block_group_cache *block_group,
579 struct btrfs_path *path,
580 u64 start, u64 size, int remove)
582 struct btrfs_root *root = fs_info->free_space_root;
583 struct btrfs_key key;
584 u64 end = start + size;
585 u64 cur_start, cur_size;
586 int prev_bit, next_bit;
591 * Read the bit for the block immediately before the extent of space if
592 * that block is within the block group.
594 if (start > block_group->key.objectid) {
595 u64 prev_block = start - block_group->sectorsize;
597 key.objectid = prev_block;
599 key.offset = (u64)-1;
601 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
605 prev_bit = free_space_test_bit(block_group, path, prev_block);
607 /* The previous block may have been in the previous bitmap. */
608 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
609 if (start >= key.objectid + key.offset) {
610 ret = free_space_next_bitmap(trans, root, path);
615 key.objectid = start;
617 key.offset = (u64)-1;
619 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
627 * Iterate over all of the bitmaps overlapped by the extent of space,
628 * clearing/setting bits as required.
633 free_space_set_bits(block_group, path, &cur_start, &cur_size,
637 ret = free_space_next_bitmap(trans, root, path);
643 * Read the bit for the block immediately after the extent of space if
644 * that block is within the block group.
646 if (end < block_group->key.objectid + block_group->key.offset) {
647 /* The next block may be in the next bitmap. */
648 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
649 if (end >= key.objectid + key.offset) {
650 ret = free_space_next_bitmap(trans, root, path);
655 next_bit = free_space_test_bit(block_group, path, end);
663 /* Leftover on the left. */
667 /* Leftover on the right. */
673 /* Merging with neighbor on the left. */
677 /* Merging with neighbor on the right. */
682 btrfs_release_path(path);
683 ret = update_free_space_extent_count(trans, fs_info, block_group, path,
690 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
691 struct btrfs_fs_info *fs_info,
692 struct btrfs_block_group_cache *block_group,
693 struct btrfs_path *path,
696 struct btrfs_root *root = fs_info->free_space_root;
697 struct btrfs_key key;
698 u64 found_start, found_end;
699 u64 end = start + size;
700 int new_extents = -1;
703 key.objectid = start;
705 key.offset = (u64)-1;
707 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
711 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
713 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
715 found_start = key.objectid;
716 found_end = key.objectid + key.offset;
717 ASSERT(start >= found_start && end <= found_end);
720 * Okay, now that we've found the free space extent which contains the
721 * free space that we are removing, there are four cases:
723 * 1. We're using the whole extent: delete the key we found and
724 * decrement the free space extent count.
725 * 2. We are using part of the extent starting at the beginning: delete
726 * the key we found and insert a new key representing the leftover at
727 * the end. There is no net change in the number of extents.
728 * 3. We are using part of the extent ending at the end: delete the key
729 * we found and insert a new key representing the leftover at the
730 * beginning. There is no net change in the number of extents.
731 * 4. We are using part of the extent in the middle: delete the key we
732 * found and insert two new keys representing the leftovers on each
733 * side. Where we used to have one extent, we now have two, so increment
734 * the extent count. We may need to convert the block group to bitmaps
738 /* Delete the existing key (cases 1-4). */
739 ret = btrfs_del_item(trans, root, path);
743 /* Add a key for leftovers at the beginning (cases 3 and 4). */
744 if (start > found_start) {
745 key.objectid = found_start;
746 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
747 key.offset = start - found_start;
749 btrfs_release_path(path);
750 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
756 /* Add a key for leftovers at the end (cases 2 and 4). */
757 if (end < found_end) {
759 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
760 key.offset = found_end - end;
762 btrfs_release_path(path);
763 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
769 btrfs_release_path(path);
770 ret = update_free_space_extent_count(trans, fs_info, block_group, path,
777 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
778 struct btrfs_fs_info *fs_info,
779 struct btrfs_block_group_cache *block_group,
780 struct btrfs_path *path, u64 start, u64 size)
782 struct btrfs_free_space_info *info;
786 if (block_group->needs_free_space) {
787 ret = __add_block_group_free_space(trans, fs_info, block_group,
793 info = search_free_space_info(NULL, fs_info, block_group, path, 0);
795 return PTR_ERR(info);
796 flags = btrfs_free_space_flags(path->nodes[0], info);
797 btrfs_release_path(path);
799 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
800 return modify_free_space_bitmap(trans, fs_info, block_group,
801 path, start, size, 1);
803 return remove_free_space_extent(trans, fs_info, block_group,
808 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
809 struct btrfs_fs_info *fs_info,
812 struct btrfs_block_group_cache *block_group;
813 struct btrfs_path *path;
816 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
819 path = btrfs_alloc_path();
825 block_group = btrfs_lookup_block_group(fs_info, start);
832 mutex_lock(&block_group->free_space_lock);
833 ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
835 mutex_unlock(&block_group->free_space_lock);
837 btrfs_put_block_group(block_group);
839 btrfs_free_path(path);
841 btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
845 static int add_free_space_extent(struct btrfs_trans_handle *trans,
846 struct btrfs_fs_info *fs_info,
847 struct btrfs_block_group_cache *block_group,
848 struct btrfs_path *path,
851 struct btrfs_root *root = fs_info->free_space_root;
852 struct btrfs_key key, new_key;
853 u64 found_start, found_end;
854 u64 end = start + size;
859 * We are adding a new extent of free space, but we need to merge
860 * extents. There are four cases here:
862 * 1. The new extent does not have any immediate neighbors to merge
863 * with: add the new key and increment the free space extent count. We
864 * may need to convert the block group to bitmaps as a result.
865 * 2. The new extent has an immediate neighbor before it: remove the
866 * previous key and insert a new key combining both of them. There is no
867 * net change in the number of extents.
868 * 3. The new extent has an immediate neighbor after it: remove the next
869 * key and insert a new key combining both of them. There is no net
870 * change in the number of extents.
871 * 4. The new extent has immediate neighbors on both sides: remove both
872 * of the keys and insert a new key combining all of them. Where we used
873 * to have two extents, we now have one, so decrement the extent count.
876 new_key.objectid = start;
877 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
878 new_key.offset = size;
880 /* Search for a neighbor on the left. */
881 if (start == block_group->key.objectid)
883 key.objectid = start - 1;
885 key.offset = (u64)-1;
887 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
891 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
893 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
894 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
895 btrfs_release_path(path);
899 found_start = key.objectid;
900 found_end = key.objectid + key.offset;
901 ASSERT(found_start >= block_group->key.objectid &&
902 found_end > block_group->key.objectid);
903 ASSERT(found_start < start && found_end <= start);
906 * Delete the neighbor on the left and absorb it into the new key (cases
909 if (found_end == start) {
910 ret = btrfs_del_item(trans, root, path);
913 new_key.objectid = found_start;
914 new_key.offset += key.offset;
917 btrfs_release_path(path);
920 /* Search for a neighbor on the right. */
921 if (end == block_group->key.objectid + block_group->key.offset)
925 key.offset = (u64)-1;
927 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
931 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
933 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
934 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
935 btrfs_release_path(path);
939 found_start = key.objectid;
940 found_end = key.objectid + key.offset;
941 ASSERT(found_start >= block_group->key.objectid &&
942 found_end > block_group->key.objectid);
943 ASSERT((found_start < start && found_end <= start) ||
944 (found_start >= end && found_end > end));
947 * Delete the neighbor on the right and absorb it into the new key
950 if (found_start == end) {
951 ret = btrfs_del_item(trans, root, path);
954 new_key.offset += key.offset;
957 btrfs_release_path(path);
960 /* Insert the new key (cases 1-4). */
961 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
965 btrfs_release_path(path);
966 ret = update_free_space_extent_count(trans, fs_info, block_group, path,
973 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
974 struct btrfs_fs_info *fs_info,
975 struct btrfs_block_group_cache *block_group,
976 struct btrfs_path *path, u64 start, u64 size)
978 struct btrfs_free_space_info *info;
982 if (block_group->needs_free_space) {
983 ret = __add_block_group_free_space(trans, fs_info, block_group,
989 info = search_free_space_info(NULL, fs_info, block_group, path, 0);
991 return PTR_ERR(info);
992 flags = btrfs_free_space_flags(path->nodes[0], info);
993 btrfs_release_path(path);
995 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
996 return modify_free_space_bitmap(trans, fs_info, block_group,
997 path, start, size, 0);
999 return add_free_space_extent(trans, fs_info, block_group, path,
1004 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1005 struct btrfs_fs_info *fs_info,
1006 u64 start, u64 size)
1008 struct btrfs_block_group_cache *block_group;
1009 struct btrfs_path *path;
1012 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1015 path = btrfs_alloc_path();
1021 block_group = btrfs_lookup_block_group(fs_info, start);
1028 mutex_lock(&block_group->free_space_lock);
1029 ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
1031 mutex_unlock(&block_group->free_space_lock);
1033 btrfs_put_block_group(block_group);
1035 btrfs_free_path(path);
1037 btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
1042 * Populate the free space tree by walking the extent tree. Operations on the
1043 * extent tree that happen as a result of writes to the free space tree will go
1044 * through the normal add/remove hooks.
1046 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1047 struct btrfs_fs_info *fs_info,
1048 struct btrfs_block_group_cache *block_group)
1050 struct btrfs_root *extent_root = fs_info->extent_root;
1051 struct btrfs_path *path, *path2;
1052 struct btrfs_key key;
1056 path = btrfs_alloc_path();
1061 path2 = btrfs_alloc_path();
1063 btrfs_free_path(path);
1067 ret = add_new_free_space_info(trans, fs_info, block_group, path2);
1071 mutex_lock(&block_group->free_space_lock);
1074 * Iterate through all of the extent and metadata items in this block
1075 * group, adding the free space between them and the free space at the
1076 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1077 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1080 key.objectid = block_group->key.objectid;
1081 key.type = BTRFS_EXTENT_ITEM_KEY;
1084 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1089 start = block_group->key.objectid;
1090 end = block_group->key.objectid + block_group->key.offset;
1092 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1094 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1095 key.type == BTRFS_METADATA_ITEM_KEY) {
1096 if (key.objectid >= end)
1099 if (start < key.objectid) {
1100 ret = __add_to_free_space_tree(trans, fs_info,
1108 start = key.objectid;
1109 if (key.type == BTRFS_METADATA_ITEM_KEY)
1110 start += fs_info->tree_root->nodesize;
1112 start += key.offset;
1113 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1114 if (key.objectid != block_group->key.objectid)
1118 ret = btrfs_next_item(extent_root, path);
1125 ret = __add_to_free_space_tree(trans, fs_info, block_group,
1126 path2, start, end - start);
1133 mutex_unlock(&block_group->free_space_lock);
1135 btrfs_free_path(path2);
1136 btrfs_free_path(path);
1140 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1142 struct btrfs_trans_handle *trans;
1143 struct btrfs_root *tree_root = fs_info->tree_root;
1144 struct btrfs_root *free_space_root;
1145 struct btrfs_block_group_cache *block_group;
1146 struct rb_node *node;
1149 trans = btrfs_start_transaction(tree_root, 0);
1151 return PTR_ERR(trans);
1153 fs_info->creating_free_space_tree = 1;
1154 free_space_root = btrfs_create_tree(trans, fs_info,
1155 BTRFS_FREE_SPACE_TREE_OBJECTID);
1156 if (IS_ERR(free_space_root)) {
1157 ret = PTR_ERR(free_space_root);
1160 fs_info->free_space_root = free_space_root;
1162 node = rb_first(&fs_info->block_group_cache_tree);
1164 block_group = rb_entry(node, struct btrfs_block_group_cache,
1166 ret = populate_free_space_tree(trans, fs_info, block_group);
1169 node = rb_next(node);
1172 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1173 btrfs_sysfs_feature_update(fs_info,
1174 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
1176 fs_info->creating_free_space_tree = 0;
1178 ret = btrfs_commit_transaction(trans, tree_root);
1185 fs_info->creating_free_space_tree = 0;
1186 btrfs_abort_transaction(trans, tree_root, ret);
1187 btrfs_end_transaction(trans, tree_root);
1191 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1192 struct btrfs_root *root)
1194 struct btrfs_path *path;
1195 struct btrfs_key key;
1199 path = btrfs_alloc_path();
1203 path->leave_spinning = 1;
1210 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1214 nr = btrfs_header_nritems(path->nodes[0]);
1219 ret = btrfs_del_items(trans, root, path, 0, nr);
1223 btrfs_release_path(path);
1228 btrfs_free_path(path);
1232 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1234 struct btrfs_trans_handle *trans;
1235 struct btrfs_root *tree_root = fs_info->tree_root;
1236 struct btrfs_root *free_space_root = fs_info->free_space_root;
1239 trans = btrfs_start_transaction(tree_root, 0);
1241 return PTR_ERR(trans);
1243 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1244 btrfs_sysfs_feature_update(fs_info,
1245 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
1247 fs_info->free_space_root = NULL;
1249 ret = clear_free_space_tree(trans, free_space_root);
1253 ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key);
1257 list_del(&free_space_root->dirty_list);
1259 btrfs_tree_lock(free_space_root->node);
1260 clean_tree_block(trans, tree_root->fs_info, free_space_root->node);
1261 btrfs_tree_unlock(free_space_root->node);
1262 btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
1265 free_extent_buffer(free_space_root->node);
1266 free_extent_buffer(free_space_root->commit_root);
1267 kfree(free_space_root);
1269 ret = btrfs_commit_transaction(trans, tree_root);
1276 btrfs_abort_transaction(trans, tree_root, ret);
1277 btrfs_end_transaction(trans, tree_root);
1281 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1282 struct btrfs_fs_info *fs_info,
1283 struct btrfs_block_group_cache *block_group,
1284 struct btrfs_path *path)
1289 start = block_group->key.objectid;
1290 end = block_group->key.objectid + block_group->key.offset;
1292 block_group->needs_free_space = 0;
1294 ret = add_new_free_space_info(trans, fs_info, block_group, path);
1298 return __add_to_free_space_tree(trans, fs_info, block_group, path,
1299 block_group->key.objectid,
1300 block_group->key.offset);
1303 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1304 struct btrfs_fs_info *fs_info,
1305 struct btrfs_block_group_cache *block_group)
1307 struct btrfs_path *path = NULL;
1310 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1313 mutex_lock(&block_group->free_space_lock);
1314 if (!block_group->needs_free_space)
1317 path = btrfs_alloc_path();
1323 ret = __add_block_group_free_space(trans, fs_info, block_group, path);
1326 btrfs_free_path(path);
1327 mutex_unlock(&block_group->free_space_lock);
1329 btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
1333 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1334 struct btrfs_fs_info *fs_info,
1335 struct btrfs_block_group_cache *block_group)
1337 struct btrfs_root *root = fs_info->free_space_root;
1338 struct btrfs_path *path;
1339 struct btrfs_key key, found_key;
1340 struct extent_buffer *leaf;
1345 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1348 if (block_group->needs_free_space) {
1349 /* We never added this block group to the free space tree. */
1353 path = btrfs_alloc_path();
1359 start = block_group->key.objectid;
1360 end = block_group->key.objectid + block_group->key.offset;
1362 key.objectid = end - 1;
1364 key.offset = (u64)-1;
1367 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1371 leaf = path->nodes[0];
1374 while (path->slots[0] > 0) {
1375 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1377 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1378 ASSERT(found_key.objectid == block_group->key.objectid);
1379 ASSERT(found_key.offset == block_group->key.offset);
1384 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1385 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1386 ASSERT(found_key.objectid >= start);
1387 ASSERT(found_key.objectid < end);
1388 ASSERT(found_key.objectid + found_key.offset <= end);
1396 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1399 btrfs_release_path(path);
1404 btrfs_free_path(path);
1406 btrfs_abort_transaction(trans, root, ret);
1410 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1411 struct btrfs_path *path,
1412 u32 expected_extent_count)
1414 struct btrfs_block_group_cache *block_group;
1415 struct btrfs_fs_info *fs_info;
1416 struct btrfs_root *root;
1417 struct btrfs_key key;
1418 int prev_bit = 0, bit;
1419 /* Initialize to silence GCC. */
1420 u64 extent_start = 0;
1422 u64 total_found = 0;
1423 u32 extent_count = 0;
1426 block_group = caching_ctl->block_group;
1427 fs_info = block_group->fs_info;
1428 root = fs_info->free_space_root;
1430 end = block_group->key.objectid + block_group->key.offset;
1433 ret = btrfs_next_item(root, path);
1439 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1441 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1444 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1445 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1447 caching_ctl->progress = key.objectid;
1449 offset = key.objectid;
1450 while (offset < key.objectid + key.offset) {
1451 bit = free_space_test_bit(block_group, path, offset);
1452 if (prev_bit == 0 && bit == 1) {
1453 extent_start = offset;
1454 } else if (prev_bit == 1 && bit == 0) {
1455 total_found += add_new_free_space(block_group,
1459 if (total_found > CACHING_CTL_WAKE_UP) {
1461 wake_up(&caching_ctl->wait);
1466 offset += block_group->sectorsize;
1469 if (prev_bit == 1) {
1470 total_found += add_new_free_space(block_group, fs_info,
1475 if (extent_count != expected_extent_count) {
1476 btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
1477 block_group->key.objectid, extent_count,
1478 expected_extent_count);
1484 caching_ctl->progress = (u64)-1;
1491 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1492 struct btrfs_path *path,
1493 u32 expected_extent_count)
1495 struct btrfs_block_group_cache *block_group;
1496 struct btrfs_fs_info *fs_info;
1497 struct btrfs_root *root;
1498 struct btrfs_key key;
1500 u64 total_found = 0;
1501 u32 extent_count = 0;
1504 block_group = caching_ctl->block_group;
1505 fs_info = block_group->fs_info;
1506 root = fs_info->free_space_root;
1508 end = block_group->key.objectid + block_group->key.offset;
1511 ret = btrfs_next_item(root, path);
1517 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1519 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1522 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1523 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1525 caching_ctl->progress = key.objectid;
1527 total_found += add_new_free_space(block_group, fs_info,
1529 key.objectid + key.offset);
1530 if (total_found > CACHING_CTL_WAKE_UP) {
1532 wake_up(&caching_ctl->wait);
1537 if (extent_count != expected_extent_count) {
1538 btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
1539 block_group->key.objectid, extent_count,
1540 expected_extent_count);
1546 caching_ctl->progress = (u64)-1;
1553 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1555 struct btrfs_block_group_cache *block_group;
1556 struct btrfs_fs_info *fs_info;
1557 struct btrfs_free_space_info *info;
1558 struct btrfs_path *path;
1559 u32 extent_count, flags;
1562 block_group = caching_ctl->block_group;
1563 fs_info = block_group->fs_info;
1565 path = btrfs_alloc_path();
1570 * Just like caching_thread() doesn't want to deadlock on the extent
1571 * tree, we don't want to deadlock on the free space tree.
1573 path->skip_locking = 1;
1574 path->search_commit_root = 1;
1577 info = search_free_space_info(NULL, fs_info, block_group, path, 0);
1579 ret = PTR_ERR(info);
1582 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1583 flags = btrfs_free_space_flags(path->nodes[0], info);
1586 * We left path pointing to the free space info item, so now
1587 * load_free_space_foo can just iterate through the free space tree from
1590 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1591 ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1593 ret = load_free_space_extents(caching_ctl, path, extent_count);
1596 btrfs_free_path(path);