2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/bio.h>
20 #include <linux/slab.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
25 #include "transaction.h"
26 #include "print-tree.h"
28 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
29 sizeof(struct btrfs_item) * 2) / \
32 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
35 #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
36 sizeof(struct btrfs_ordered_sum)) / \
37 sizeof(struct btrfs_sector_sum) * \
38 (r)->sectorsize - (r)->sectorsize)
40 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root,
42 u64 objectid, u64 pos,
43 u64 disk_offset, u64 disk_num_bytes,
44 u64 num_bytes, u64 offset, u64 ram_bytes,
45 u8 compression, u8 encryption, u16 other_encoding)
48 struct btrfs_file_extent_item *item;
49 struct btrfs_key file_key;
50 struct btrfs_path *path;
51 struct extent_buffer *leaf;
53 path = btrfs_alloc_path();
56 file_key.objectid = objectid;
57 file_key.offset = pos;
58 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
60 path->leave_spinning = 1;
61 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
65 BUG_ON(ret); /* Can't happen */
66 leaf = path->nodes[0];
67 item = btrfs_item_ptr(leaf, path->slots[0],
68 struct btrfs_file_extent_item);
69 btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
70 btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
71 btrfs_set_file_extent_offset(leaf, item, offset);
72 btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
73 btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
74 btrfs_set_file_extent_generation(leaf, item, trans->transid);
75 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
76 btrfs_set_file_extent_compression(leaf, item, compression);
77 btrfs_set_file_extent_encryption(leaf, item, encryption);
78 btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
80 btrfs_mark_buffer_dirty(leaf);
82 btrfs_free_path(path);
86 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
87 struct btrfs_root *root,
88 struct btrfs_path *path,
92 struct btrfs_key file_key;
93 struct btrfs_key found_key;
94 struct btrfs_csum_item *item;
95 struct extent_buffer *leaf;
97 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
100 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
101 file_key.offset = bytenr;
102 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
103 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
106 leaf = path->nodes[0];
109 if (path->slots[0] == 0)
112 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
113 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
116 csum_offset = (bytenr - found_key.offset) >>
117 root->fs_info->sb->s_blocksize_bits;
118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
119 csums_in_item /= csum_size;
121 if (csum_offset >= csums_in_item) {
126 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
127 item = (struct btrfs_csum_item *)((unsigned char *)item +
128 csum_offset * csum_size);
137 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
138 struct btrfs_root *root,
139 struct btrfs_path *path, u64 objectid,
143 struct btrfs_key file_key;
144 int ins_len = mod < 0 ? -1 : 0;
147 file_key.objectid = objectid;
148 file_key.offset = offset;
149 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
150 ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
155 static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
156 struct inode *inode, struct bio *bio,
157 u64 logical_offset, u32 *dst, int dio)
160 struct bio_vec *bvec = bio->bi_io_vec;
163 u64 item_start_offset = 0;
164 u64 item_last_offset = 0;
167 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
169 struct btrfs_path *path;
170 struct btrfs_csum_item *item = NULL;
171 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
173 path = btrfs_alloc_path();
176 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
179 WARN_ON(bio->bi_vcnt <= 0);
182 * the free space stuff is only read when it hasn't been
183 * updated in the current transaction. So, we can safely
184 * read from the commit root and sidestep a nasty deadlock
185 * between reading the free space cache and updating the csum tree.
187 if (btrfs_is_free_space_inode(inode)) {
188 path->search_commit_root = 1;
189 path->skip_locking = 1;
192 disk_bytenr = (u64)bio->bi_sector << 9;
194 offset = logical_offset;
195 while (bio_index < bio->bi_vcnt) {
197 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
198 ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
202 if (!item || disk_bytenr < item_start_offset ||
203 disk_bytenr >= item_last_offset) {
204 struct btrfs_key found_key;
208 btrfs_release_path(path);
209 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
210 path, disk_bytenr, 0);
213 if (ret == -ENOENT || ret == -EFBIG)
216 if (BTRFS_I(inode)->root->root_key.objectid ==
217 BTRFS_DATA_RELOC_TREE_OBJECTID) {
218 set_extent_bits(io_tree, offset,
219 offset + bvec->bv_len - 1,
220 EXTENT_NODATASUM, GFP_NOFS);
222 printk(KERN_INFO "btrfs no csum found "
223 "for inode %llu start %llu\n",
226 (unsigned long long)offset);
229 btrfs_release_path(path);
232 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
235 item_start_offset = found_key.offset;
236 item_size = btrfs_item_size_nr(path->nodes[0],
238 item_last_offset = item_start_offset +
239 (item_size / csum_size) *
241 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
242 struct btrfs_csum_item);
245 * this byte range must be able to fit inside
246 * a single leaf so it will also fit inside a u32
248 diff = disk_bytenr - item_start_offset;
249 diff = diff / root->sectorsize;
250 diff = diff * csum_size;
252 read_extent_buffer(path->nodes[0], &sum,
253 ((unsigned long)item) + diff,
259 set_state_private(io_tree, offset, sum);
260 disk_bytenr += bvec->bv_len;
261 offset += bvec->bv_len;
265 btrfs_free_path(path);
269 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
270 struct bio *bio, u32 *dst)
272 return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
275 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
276 struct bio *bio, u64 offset)
278 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
281 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
282 struct list_head *list, int search_commit)
284 struct btrfs_key key;
285 struct btrfs_path *path;
286 struct extent_buffer *leaf;
287 struct btrfs_ordered_sum *sums;
288 struct btrfs_sector_sum *sector_sum;
289 struct btrfs_csum_item *item;
291 unsigned long offset;
295 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
297 path = btrfs_alloc_path();
302 path->skip_locking = 1;
304 path->search_commit_root = 1;
307 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
309 key.type = BTRFS_EXTENT_CSUM_KEY;
311 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
314 if (ret > 0 && path->slots[0] > 0) {
315 leaf = path->nodes[0];
316 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
317 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
318 key.type == BTRFS_EXTENT_CSUM_KEY) {
319 offset = (start - key.offset) >>
320 root->fs_info->sb->s_blocksize_bits;
321 if (offset * csum_size <
322 btrfs_item_size_nr(leaf, path->slots[0] - 1))
327 while (start <= end) {
328 leaf = path->nodes[0];
329 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = btrfs_next_leaf(root, path);
335 leaf = path->nodes[0];
338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
339 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
340 key.type != BTRFS_EXTENT_CSUM_KEY)
343 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
344 if (key.offset > end)
347 if (key.offset > start)
350 size = btrfs_item_size_nr(leaf, path->slots[0]);
351 csum_end = key.offset + (size / csum_size) * root->sectorsize;
352 if (csum_end <= start) {
357 csum_end = min(csum_end, end + 1);
358 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
359 struct btrfs_csum_item);
360 while (start < csum_end) {
361 size = min_t(size_t, csum_end - start,
362 MAX_ORDERED_SUM_BYTES(root));
363 sums = kzalloc(btrfs_ordered_sum_size(root, size),
370 sector_sum = sums->sums;
371 sums->bytenr = start;
374 offset = (start - key.offset) >>
375 root->fs_info->sb->s_blocksize_bits;
379 read_extent_buffer(path->nodes[0],
381 ((unsigned long)item) +
383 sector_sum->bytenr = start;
385 size -= root->sectorsize;
386 start += root->sectorsize;
390 list_add_tail(&sums->list, &tmplist);
396 while (ret < 0 && !list_empty(&tmplist)) {
397 sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
398 list_del(&sums->list);
401 list_splice_tail(&tmplist, list);
403 btrfs_free_path(path);
407 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
408 struct bio *bio, u64 file_start, int contig)
410 struct btrfs_ordered_sum *sums;
411 struct btrfs_sector_sum *sector_sum;
412 struct btrfs_ordered_extent *ordered;
414 struct bio_vec *bvec = bio->bi_io_vec;
416 unsigned long total_bytes = 0;
417 unsigned long this_sum_bytes = 0;
421 WARN_ON(bio->bi_vcnt <= 0);
422 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
426 sector_sum = sums->sums;
427 disk_bytenr = (u64)bio->bi_sector << 9;
428 sums->len = bio->bi_size;
429 INIT_LIST_HEAD(&sums->list);
434 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
436 ordered = btrfs_lookup_ordered_extent(inode, offset);
437 BUG_ON(!ordered); /* Logic error */
438 sums->bytenr = ordered->start;
440 while (bio_index < bio->bi_vcnt) {
442 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
444 if (!contig && (offset >= ordered->file_offset + ordered->len ||
445 offset < ordered->file_offset)) {
446 unsigned long bytes_left;
447 sums->len = this_sum_bytes;
449 btrfs_add_ordered_sum(inode, ordered, sums);
450 btrfs_put_ordered_extent(ordered);
452 bytes_left = bio->bi_size - total_bytes;
454 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
456 BUG_ON(!sums); /* -ENOMEM */
457 sector_sum = sums->sums;
458 sums->len = bytes_left;
459 ordered = btrfs_lookup_ordered_extent(inode, offset);
460 BUG_ON(!ordered); /* Logic error */
461 sums->bytenr = ordered->start;
464 data = kmap_atomic(bvec->bv_page);
465 sector_sum->sum = ~(u32)0;
466 sector_sum->sum = btrfs_csum_data(root,
467 data + bvec->bv_offset,
471 btrfs_csum_final(sector_sum->sum,
472 (char *)§or_sum->sum);
473 sector_sum->bytenr = disk_bytenr;
477 total_bytes += bvec->bv_len;
478 this_sum_bytes += bvec->bv_len;
479 disk_bytenr += bvec->bv_len;
480 offset += bvec->bv_len;
484 btrfs_add_ordered_sum(inode, ordered, sums);
485 btrfs_put_ordered_extent(ordered);
490 * helper function for csum removal, this expects the
491 * key to describe the csum pointed to by the path, and it expects
492 * the csum to overlap the range [bytenr, len]
494 * The csum should not be entirely contained in the range and the
495 * range should not be entirely contained in the csum.
497 * This calls btrfs_truncate_item with the correct args based on the
498 * overlap, and fixes up the key as required.
500 static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
501 struct btrfs_root *root,
502 struct btrfs_path *path,
503 struct btrfs_key *key,
506 struct extent_buffer *leaf;
507 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
509 u64 end_byte = bytenr + len;
510 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
512 leaf = path->nodes[0];
513 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
514 csum_end <<= root->fs_info->sb->s_blocksize_bits;
515 csum_end += key->offset;
517 if (key->offset < bytenr && csum_end <= end_byte) {
522 * A simple truncate off the end of the item
524 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
525 new_size *= csum_size;
526 btrfs_truncate_item(trans, root, path, new_size, 1);
527 } else if (key->offset >= bytenr && csum_end > end_byte &&
528 end_byte > key->offset) {
533 * we need to truncate from the beginning of the csum
535 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
536 new_size *= csum_size;
538 btrfs_truncate_item(trans, root, path, new_size, 0);
540 key->offset = end_byte;
541 btrfs_set_item_key_safe(trans, root, path, key);
548 * deletes the csum items from the csum tree for a given
551 int btrfs_del_csums(struct btrfs_trans_handle *trans,
552 struct btrfs_root *root, u64 bytenr, u64 len)
554 struct btrfs_path *path;
555 struct btrfs_key key;
556 u64 end_byte = bytenr + len;
558 struct extent_buffer *leaf;
560 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
561 int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
563 root = root->fs_info->csum_root;
565 path = btrfs_alloc_path();
570 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
571 key.offset = end_byte - 1;
572 key.type = BTRFS_EXTENT_CSUM_KEY;
574 path->leave_spinning = 1;
575 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
577 if (path->slots[0] == 0)
580 } else if (ret < 0) {
584 leaf = path->nodes[0];
585 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
587 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
588 key.type != BTRFS_EXTENT_CSUM_KEY) {
592 if (key.offset >= end_byte)
595 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
596 csum_end <<= blocksize_bits;
597 csum_end += key.offset;
599 /* this csum ends before we start, we're done */
600 if (csum_end <= bytenr)
603 /* delete the entire item, it is inside our range */
604 if (key.offset >= bytenr && csum_end <= end_byte) {
605 ret = btrfs_del_item(trans, root, path);
608 if (key.offset == bytenr)
610 } else if (key.offset < bytenr && csum_end > end_byte) {
611 unsigned long offset;
612 unsigned long shift_len;
613 unsigned long item_offset;
618 * Our bytes are in the middle of the csum,
619 * we need to split this item and insert a new one.
621 * But we can't drop the path because the
622 * csum could change, get removed, extended etc.
624 * The trick here is the max size of a csum item leaves
625 * enough room in the tree block for a single
626 * item header. So, we split the item in place,
627 * adding a new header pointing to the existing
628 * bytes. Then we loop around again and we have
629 * a nicely formed csum item that we can neatly
632 offset = (bytenr - key.offset) >> blocksize_bits;
635 shift_len = (len >> blocksize_bits) * csum_size;
637 item_offset = btrfs_item_ptr_offset(leaf,
640 memset_extent_buffer(leaf, 0, item_offset + offset,
645 * btrfs_split_item returns -EAGAIN when the
646 * item changed size or key
648 ret = btrfs_split_item(trans, root, path, &key, offset);
649 if (ret && ret != -EAGAIN) {
650 btrfs_abort_transaction(trans, root, ret);
654 key.offset = end_byte - 1;
656 truncate_one_csum(trans, root, path, &key, bytenr, len);
657 if (key.offset < bytenr)
660 btrfs_release_path(path);
664 btrfs_free_path(path);
668 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
669 struct btrfs_root *root,
670 struct btrfs_ordered_sum *sums)
674 struct btrfs_key file_key;
675 struct btrfs_key found_key;
679 struct btrfs_path *path;
680 struct btrfs_csum_item *item;
681 struct btrfs_csum_item *item_end;
682 struct extent_buffer *leaf = NULL;
684 struct btrfs_sector_sum *sector_sum;
687 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
689 path = btrfs_alloc_path();
693 sector_sum = sums->sums;
694 trans->adding_csums = 1;
696 next_offset = (u64)-1;
698 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
699 file_key.offset = sector_sum->bytenr;
700 bytenr = sector_sum->bytenr;
701 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
703 item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
705 leaf = path->nodes[0];
710 if (ret != -EFBIG && ret != -ENOENT)
715 /* we found one, but it isn't big enough yet */
716 leaf = path->nodes[0];
717 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
718 if ((item_size / csum_size) >=
719 MAX_CSUM_ITEMS(root, csum_size)) {
720 /* already at max size, make a new one */
724 int slot = path->slots[0] + 1;
725 /* we didn't find a csum item, insert one */
726 nritems = btrfs_header_nritems(path->nodes[0]);
727 if (path->slots[0] >= nritems - 1) {
728 ret = btrfs_next_leaf(root, path);
735 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
736 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
737 found_key.type != BTRFS_EXTENT_CSUM_KEY) {
741 next_offset = found_key.offset;
747 * at this point, we know the tree has an item, but it isn't big
748 * enough yet to put our csum in. Grow it
750 btrfs_release_path(path);
751 ret = btrfs_search_slot(trans, root, &file_key, path,
757 if (path->slots[0] == 0)
762 leaf = path->nodes[0];
763 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
764 csum_offset = (bytenr - found_key.offset) >>
765 root->fs_info->sb->s_blocksize_bits;
767 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
768 found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
769 csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
773 if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
775 u32 diff = (csum_offset + 1) * csum_size;
778 * is the item big enough already? we dropped our lock
779 * before and need to recheck
781 if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
784 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
785 if (diff != csum_size)
788 btrfs_extend_item(trans, root, path, diff);
793 btrfs_release_path(path);
796 u64 tmp = total_bytes + root->sectorsize;
797 u64 next_sector = sector_sum->bytenr;
798 struct btrfs_sector_sum *next = sector_sum + 1;
800 while (tmp < sums->len) {
801 if (next_sector + root->sectorsize != next->bytenr)
803 tmp += root->sectorsize;
804 next_sector = next->bytenr;
807 tmp = min(tmp, next_offset - file_key.offset);
808 tmp >>= root->fs_info->sb->s_blocksize_bits;
809 tmp = max((u64)1, tmp);
810 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
811 ins_size = csum_size * tmp;
813 ins_size = csum_size;
815 path->leave_spinning = 1;
816 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
818 path->leave_spinning = 0;
826 leaf = path->nodes[0];
827 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
829 item = (struct btrfs_csum_item *)((unsigned char *)item +
830 csum_offset * csum_size);
832 item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
833 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
834 btrfs_item_size_nr(leaf, path->slots[0]));
837 write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size);
839 total_bytes += root->sectorsize;
841 if (total_bytes < sums->len) {
842 item = (struct btrfs_csum_item *)((char *)item +
844 if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
845 sector_sum->bytenr) {
846 bytenr = sector_sum->bytenr;
851 btrfs_mark_buffer_dirty(path->nodes[0]);
852 if (total_bytes < sums->len) {
853 btrfs_release_path(path);
858 trans->adding_csums = 0;
859 btrfs_free_path(path);