btrfs_set_trans_block_group(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
- if (ret)
- printk(KERN_ERR"btrfs: fail to dirty inode %lu error %d\n",
- inode->i_ino, ret);
+ if (ret && ret == -ENOSPC) {
+ /* whoops, lets try again with the full transaction */
+ btrfs_end_transaction(trans, root);
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ if (printk_ratelimit()) {
+ printk(KERN_ERR "btrfs: fail to "
+ "dirty inode %lu error %ld\n",
+ inode->i_ino, PTR_ERR(trans));
+ }
+ return;
+ }
+ btrfs_set_trans_block_group(trans, inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ if (printk_ratelimit()) {
+ printk(KERN_ERR "btrfs: fail to "
+ "dirty inode %lu error %d\n",
+ inode->i_ino, ret);
+ }
+ }
+ }
btrfs_end_transaction(trans, root);
}
if (ret != 0)
goto fail;
- inode->i_uid = current_fsuid();
-
- if (dir && (dir->i_mode & S_ISGID)) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else
- inode->i_gid = current_fsgid();
-
- inode->i_mode = mode;
+ inode_init_owner(inode, dir, mode);
inode->i_ino = objectid;
inode_set_bytes(inode, 0);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return em;
}
+/*
+ * returns 1 when the nocow is safe, < 1 on error, 0 if the
+ * block must be cow'd
+ */
+static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 offset, u64 len)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct extent_buffer *leaf;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 disk_bytenr;
+ u64 backref_offset;
+ u64 extent_end;
+ u64 num_bytes;
+ int slot;
+ int found_type;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ offset, 0);
+ if (ret < 0)
+ goto out;
+
+ slot = path->slots[0];
+ if (ret == 1) {
+ if (slot == 0) {
+ /* can't find the item, must cow */
+ ret = 0;
+ goto out;
+ }
+ slot--;
+ }
+ ret = 0;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != inode->i_ino ||
+ key.type != BTRFS_EXTENT_DATA_KEY) {
+ /* not our file or wrong item type, must cow */
+ goto out;
+ }
+
+ if (key.offset > offset) {
+ /* Wrong offset, must cow */
+ goto out;
+ }
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ found_type = btrfs_file_extent_type(leaf, fi);
+ if (found_type != BTRFS_FILE_EXTENT_REG &&
+ found_type != BTRFS_FILE_EXTENT_PREALLOC) {
+ /* not a regular extent, must cow */
+ goto out;
+ }
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ backref_offset = btrfs_file_extent_offset(leaf, fi);
+
+ extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+ if (extent_end < offset + len) {
+ /* extent doesn't include our full range, must cow */
+ goto out;
+ }
+
+ if (btrfs_extent_readonly(root, disk_bytenr))
+ goto out;
+
+ /*
+ * look for other files referencing this extent, if we
+ * find any we must cow
+ */
+ if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
+ key.offset - backref_offset, disk_bytenr))
+ goto out;
+
+ /*
+ * adjust disk_bytenr and num_bytes to cover just the bytes
+ * in this extent we are about to write. If there
+ * are any csums in that range we have to cow in order
+ * to keep the csums correct
+ */
+ disk_bytenr += backref_offset;
+ disk_bytenr += offset - key.offset;
+ num_bytes = min(offset + len, extent_end) - offset;
+ if (csum_exist_in_range(root, disk_bytenr, num_bytes))
+ goto out;
+ /*
+ * all of the above have passed, it is safe to overwrite this extent
+ * without cow
+ */
+ ret = 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start = iblock << inode->i_blkbits;
u64 len = bh_result->b_size;
+ struct btrfs_trans_handle *trans;
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em))
* just use the extent.
*
*/
- if (!create)
+ if (!create) {
+ len = em->len - (start - em->start);
goto map;
+ }
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
em->block_start != EXTENT_MAP_HOLE)) {
- u64 block_start;
int type;
int ret;
+ u64 block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
type = BTRFS_ORDERED_PREALLOC;
else
type = BTRFS_ORDERED_NOCOW;
- len = min(len, em->block_len - (start - em->start));
+ len = min(len, em->len - (start - em->start));
block_start = em->block_start + (start - em->start);
- ret = btrfs_add_ordered_extent_dio(inode, start,
- start, len, len, type);
- if (ret) {
- free_extent_map(em);
- return ret;
+
+ /*
+ * we're not going to log anything, but we do need
+ * to make sure the current transaction stays open
+ * while we look for nocow cross refs
+ */
+ trans = btrfs_join_transaction(root, 0);
+ if (!trans)
+ goto must_cow;
+
+ if (can_nocow_odirect(trans, inode, start, len) == 1) {
+ ret = btrfs_add_ordered_extent_dio(inode, start,
+ block_start, len, len, type);
+ btrfs_end_transaction(trans, root);
+ if (ret) {
+ free_extent_map(em);
+ return ret;
+ }
+ goto unlock;
}
- } else {
- free_extent_map(em);
- em = btrfs_new_extent_direct(inode, start, len);
- if (IS_ERR(em))
- return PTR_ERR(em);
- len = min(len, em->block_len);
+ btrfs_end_transaction(trans, root);
}
+must_cow:
+ /*
+ * this will cow the extent, reset the len in case we changed
+ * it above
+ */
+ len = bh_result->b_size;
+ free_extent_map(em);
+ em = btrfs_new_extent_direct(inode, start, len);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+ len = min(len, em->len - (start - em->start));
+unlock:
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
0, NULL, GFP_NOFS);
map:
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
- bh_result->b_size = em->len - (start - em->start);
+ bh_result->b_size = len;
bh_result->b_bdev = em->bdev;
set_buffer_mapped(bh_result);
if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
bvec++;
} while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
- dip->disk_bytenr = bio->bi_sector << 9;
+ dip->disk_bytenr = (u64)bio->bi_sector << 9;
bio->bi_private = dip;
if (write)
bio_endio(bio, ret);
}
+static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
+{
+ int seg;
+ size_t size;
+ unsigned long addr;
+ unsigned blocksize_mask = root->sectorsize - 1;
+ ssize_t retval = -EINVAL;
+ loff_t end = offset;
+
+ if (offset & blocksize_mask)
+ goto out;
+
+ /* Check the memory alignment. Blocks cannot straddle pages */
+ for (seg = 0; seg < nr_segs; seg++) {
+ addr = (unsigned long)iov[seg].iov_base;
+ size = iov[seg].iov_len;
+ end += size;
+ if ((addr & blocksize_mask) || (size & blocksize_mask))
+ goto out;
+ }
+ retval = 0;
+out:
+ return retval;
+}
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
int write_bits = 0;
size_t count = iov_length(iov, nr_segs);
+ if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
+ offset, nr_segs)) {
+ return 0;
+ }
+
lockstart = offset;
lockend = offset + count - 1;
free_extent_state(cached_state);
cached_state = NULL;
- ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs,
- btrfs_get_blocks_direct, NULL,
- btrfs_submit_direct, 0);
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+ iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, 0);
if (ret < 0 && ret != -EIOCBQUEUED) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,