u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
u64 thresh;
+ unsigned long flags;
int ret = 0;
if (for_del)
do_div(thresh, 100);
- spin_lock(&root->fs_info->delalloc_lock);
+ spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
if (used + root->fs_info->delalloc_bytes + num_required > thresh)
ret = -ENOSPC;
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
return ret;
}
if (alloc_hint == EXTENT_MAP_INLINE)
goto out;
+ BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
+
while(num_bytes > 0) {
cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
+ root->sectorsize,
root->root_key.objectid,
trans->transid,
inode->i_ino, start, 0,
WARN_ON(1);
goto out;
}
+ cur_alloc_size = ins.offset;
ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
start, ins.objectid, ins.offset,
ins.offset);
inode->i_blocks += ins.offset >> 9;
btrfs_check_file(root, inode);
+ if (num_bytes < cur_alloc_size) {
+ printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
+ cur_alloc_size);
+ break;
+ }
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits)
{
+ unsigned long flags;
if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
- spin_lock(&root->fs_info->delalloc_lock);
+ spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
BTRFS_I(inode)->delalloc_bytes += end - start + 1;
root->fs_info->delalloc_bytes += end - start + 1;
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
}
return 0;
}
{
if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
- spin_lock(&root->fs_info->delalloc_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
if (end - start + 1 > root->fs_info->delalloc_bytes) {
printk("warning: delalloc account %Lu %Lu\n",
end - start + 1, root->fs_info->delalloc_bytes);
root->fs_info->delalloc_bytes -= end - start + 1;
BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
}
return 0;
}
u64 logical = bio->bi_sector << 9;
u64 length = 0;
u64 map_length;
- struct bio_vec *bvec;
- int i;
int ret;
- bio_for_each_segment(bvec, bio, i) {
- length += bvec->bv_len;
- }
+ length = bio->bi_size;
map_tree = &root->fs_info->mapping_tree;
map_length = length;
ret = btrfs_map_block(map_tree, READ, logical,
return 0;
}
-int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
+ char *sums = NULL;
+
+ ret = btrfs_csum_one_bio(root, bio, &sums);
+ BUG_ON(ret);
+
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+
+ btrfs_set_trans_block_group(trans, inode);
+ btrfs_csum_file_blocks(trans, root, inode, bio, sums);
+
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ mutex_unlock(&root->fs_info->fs_mutex);
+
+ kfree(sums);
+
+ return btrfs_map_bio(root, rw, bio, mirror_num);
+}
+
+int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+ int mirror_num)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
if (!(rw & (1 << BIO_RW))) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
goto mapit;
}
- mutex_lock(&root->fs_info->fs_mutex);
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
- btrfs_csum_file_blocks(trans, root, inode, bio);
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->fs_mutex);
+ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ inode, rw, bio, mirror_num,
+ __btrfs_submit_bio_hook);
mapit:
return btrfs_map_bio(root, rw, bio, mirror_num);
}
struct btrfs_csum_item *item;
struct btrfs_path *path = NULL;
u32 csum;
+
if (btrfs_test_opt(root, NODATASUM) ||
btrfs_test_flag(inode, NODATASUM))
return 0;
+
mutex_lock(&root->fs_info->fs_mutex);
path = btrfs_alloc_path();
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
return ret;
}
+struct io_failure_record {
+ struct page *page;
+ u64 start;
+ u64 len;
+ u64 logical;
+ int last_mirror;
+};
+
+int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
+ struct page *page, u64 start, u64 end,
+ struct extent_state *state)
+{
+ struct io_failure_record *failrec = NULL;
+ u64 private;
+ struct extent_map *em;
+ struct inode *inode = page->mapping->host;
+ struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct bio *bio;
+ int num_copies;
+ int ret;
+ u64 logical;
+
+ ret = get_state_private(failure_tree, start, &private);
+ if (ret) {
+ failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
+ if (!failrec)
+ return -ENOMEM;
+ failrec->start = start;
+ failrec->len = end - start + 1;
+ failrec->last_mirror = 0;
+
+ spin_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, start, failrec->len);
+ if (em->start > start || em->start + em->len < start) {
+ free_extent_map(em);
+ em = NULL;
+ }
+ spin_unlock(&em_tree->lock);
+
+ if (!em || IS_ERR(em)) {
+ kfree(failrec);
+ return -EIO;
+ }
+ logical = start - em->start;
+ logical = em->block_start + logical;
+ failrec->logical = logical;
+ free_extent_map(em);
+ set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
+ EXTENT_DIRTY, GFP_NOFS);
+ set_state_private(failure_tree, start,
+ (u64)(unsigned long)failrec);
+ } else {
+ failrec = (struct io_failure_record *)(unsigned long)private;
+ }
+ num_copies = btrfs_num_copies(
+ &BTRFS_I(inode)->root->fs_info->mapping_tree,
+ failrec->logical, failrec->len);
+ failrec->last_mirror++;
+ if (!state) {
+ spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
+ state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
+ failrec->start,
+ EXTENT_LOCKED);
+ if (state && state->start != failrec->start)
+ state = NULL;
+ spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
+ }
+ if (!state || failrec->last_mirror > num_copies) {
+ set_state_private(failure_tree, failrec->start, 0);
+ clear_extent_bits(failure_tree, failrec->start,
+ failrec->start + failrec->len - 1,
+ EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
+ kfree(failrec);
+ return -EIO;
+ }
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio->bi_private = state;
+ bio->bi_end_io = failed_bio->bi_end_io;
+ bio->bi_sector = failrec->logical >> 9;
+ bio->bi_bdev = failed_bio->bi_bdev;
+ bio->bi_size = 0;
+ bio_add_page(bio, page, failrec->len, start - page_offset(page));
+ btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
+ return 0;
+}
+
int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
}
kunmap_atomic(kaddr, KM_IRQ0);
local_irq_restore(flags);
+
+ /* if the io failure tree for this inode is non-empty,
+ * check to see if we've recovered from a failed IO
+ */
+ private = 0;
+ if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
+ (u64)-1, 1, EXTENT_DIRTY)) {
+ u64 private_failure;
+ struct io_failure_record *failure;
+ ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
+ start, &private_failure);
+ if (ret == 0) {
+ failure = (struct io_failure_record *)(unsigned long)
+ private_failure;
+ set_state_private(&BTRFS_I(inode)->io_failure_tree,
+ failure->start, 0);
+ clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
+ failure->start,
+ failure->start + failure->len - 1,
+ EXTENT_DIRTY | EXTENT_LOCKED,
+ GFP_NOFS);
+ kfree(failure);
+ }
+ }
return 0;
zeroit:
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_IRQ0);
local_irq_restore(flags);
- return 0;
+ if (private == 0)
+ return 0;
+ return -EIO;
}
void btrfs_read_locked_inode(struct inode *inode)
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
+ u64 mask = root->sectorsize - 1;
- btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
+ btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
path = btrfs_alloc_path();
path->reada = -1;
BUG_ON(!path);
hole_start, 0, 0,
hole_size);
btrfs_drop_extent_cache(inode, hole_start,
- hole_size - 1);
+ (u64)-1);
btrfs_check_file(root, inode);
}
btrfs_end_transaction(trans, root);
extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
+ inode->i_mapping, GFP_NOFS);
+ atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
return 0;
}
extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
+ inode->i_mapping, GFP_NOFS);
+ atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->root = root;
extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
+ inode->i_mapping, GFP_NOFS);
BTRFS_I(inode)->delalloc_bytes = 0;
+ atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
return err;
}
+static int merge_extent_mapping(struct extent_map_tree *em_tree,
+ struct extent_map *existing,
+ struct extent_map *em)
+{
+ u64 start_diff;
+ u64 new_end;
+ int ret = 0;
+ int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
+
+ if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
+ goto invalid;
+
+ if (!real_blocks && em->block_start != existing->block_start)
+ goto invalid;
+
+ new_end = max(existing->start + existing->len, em->start + em->len);
+
+ if (existing->start >= em->start) {
+ if (em->start + em->len < existing->start)
+ goto invalid;
+
+ start_diff = existing->start - em->start;
+ if (real_blocks && em->block_start + start_diff !=
+ existing->block_start)
+ goto invalid;
+
+ em->len = new_end - em->start;
+
+ remove_extent_mapping(em_tree, existing);
+ /* free for the tree */
+ free_extent_map(existing);
+ ret = add_extent_mapping(em_tree, em);
+
+ } else if (em->start > existing->start) {
+
+ if (existing->start + existing->len < em->start)
+ goto invalid;
+
+ start_diff = em->start - existing->start;
+ if (real_blocks && existing->block_start + start_diff !=
+ em->block_start)
+ goto invalid;
+
+ remove_extent_mapping(em_tree, existing);
+ em->block_start = existing->block_start;
+ em->start = existing->start;
+ em->len = new_end - existing->start;
+ free_extent_map(existing);
+
+ ret = add_extent_mapping(em_tree, em);
+ } else {
+ goto invalid;
+ }
+ return ret;
+
+invalid:
+ printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
+ existing->start, existing->len, existing->block_start,
+ em->start, em->len, em->block_start);
+ return -EIO;
+}
+
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create)
spin_unlock(&em_tree->lock);
if (em) {
- if (em->start > start) {
- printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
- start, len, em->start, em->len);
- WARN_ON(1);
- }
- if (em->block_start == EXTENT_MAP_INLINE && page)
+ if (em->start > start || em->start + em->len <= start)
+ free_extent_map(em);
+ else if (em->block_start == EXTENT_MAP_INLINE && page)
free_extent_map(em);
else
goto out;
err = 0;
spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ /* it is possible that someone inserted the extent into the tree
+ * while we had the lock dropped. It is also possible that
+ * an overlapping map exists in the tree
+ */
if (ret == -EEXIST) {
- free_extent_map(em);
- em = lookup_extent_mapping(em_tree, start, len);
- if (!em) {
- err = -EIO;
- printk("failing to insert %Lu %Lu\n", start, len);
+ struct extent_map *existing;
+ existing = lookup_extent_mapping(em_tree, start, len);
+ if (existing && (existing->start > start ||
+ existing->start + existing->len <= start)) {
+ free_extent_map(existing);
+ existing = NULL;
+ }
+ if (!existing) {
+ existing = lookup_extent_mapping(em_tree, em->start,
+ em->len);
+ if (existing) {
+ err = merge_extent_mapping(em_tree, existing,
+ em);
+ free_extent_map(existing);
+ if (err) {
+ free_extent_map(em);
+ em = NULL;
+ }
+ } else {
+ err = -EIO;
+ printk("failing to insert %Lu %Lu\n",
+ start, len);
+ free_extent_map(em);
+ em = NULL;
+ }
+ } else {
+ free_extent_map(em);
+ em = existing;
}
}
spin_unlock(&em_tree->lock);
return em;
}
+#if 0 /* waiting for O_DIRECT reads */
+static int btrfs_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct extent_map *em;
+ u64 start = (u64)iblock << inode->i_blkbits;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len;
+ u64 logical;
+ u64 map_length;
+ int ret = 0;
+
+ em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
+
+ if (!em || IS_ERR(em))
+ goto out;
+
+ if (em->start > start || em->start + em->len <= start) {
+ goto out;
+ }
+
+ if (em->block_start == EXTENT_MAP_INLINE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ len = em->start + em->len - start;
+ len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
+
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ em->block_start == EXTENT_MAP_DELALLOC) {
+ bh_result->b_size = len;
+ goto out;
+ }
+
+ logical = start - em->start;
+ logical = em->block_start + logical;
+
+ map_length = len;
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ logical, &map_length, &multi, 0);
+ BUG_ON(ret);
+ bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
+ bh_result->b_size = min(map_length, len);
+
+ bh_result->b_bdev = multi->stripes[0].dev->bdev;
+ set_buffer_mapped(bh_result);
+ kfree(multi);
+out:
+ free_extent_map(em);
+ return ret;
+}
+#endif
+
+static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
+{
+ return -EINVAL;
+#if 0
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+
+ if (rw == WRITE)
+ return -EINVAL;
+
+ return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ offset, nr_segs, btrfs_get_block, NULL);
+#endif
+}
+
static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
{
return extent_bmap(mapping, iblock, btrfs_get_extent);
map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_mapping(map, tree, page, gfp_flags);
if (ret == 1) {
+ invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
tree = &BTRFS_I(page->mapping->host)->io_tree;
extent_invalidatepage(tree, page, offset);
btrfs_releasepage(page, GFP_NOFS);
+ if (PagePrivate(page)) {
+ invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ page_cache_release(page);
+ }
}
/*
extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
+ inode->i_mapping, GFP_NOFS);
BTRFS_I(inode)->delalloc_bytes = 0;
+ atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
btrfs_throttle(root);
return err;
}
+
static int btrfs_permission(struct inode *inode, int mask,
struct nameidata *nd)
{
.merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_hook = btrfs_readpage_io_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+ .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
.set_bit_hook = btrfs_set_bit_hook,
.clear_bit_hook = btrfs_clear_bit_hook,
};
.readpages = btrfs_readpages,
.sync_page = block_sync_page,
.bmap = btrfs_bmap,
+ .direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
.set_page_dirty = __set_page_dirty_nobuffers,