]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
Pull btrfs fixes from Chris Mason:
 "I've split out the big send/receive update from my last pull request
  and now have just the fixes in my for-linus branch.  The send/recv
  branch will wander over to linux-next shortly though.

  The largest patches in this pull are Josef's patches to fix DIO
  locking problems and his patch to fix a crash during balance.  They
  are both well tested.

  The rest are smaller fixes that we've had queued.  The last rc came
  out while I was hacking new and exciting ways to recover from a
  misplaced rm -rf on my dev box, so these missed rc3."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
  Btrfs: fix that repair code is spuriously executed for transid failures
  Btrfs: fix ordered extent leak when failing to start a transaction
  Btrfs: fix a dio write regression
  Btrfs: fix deadlock with freeze and sync V2
  Btrfs: revert checksum error statistic which can cause a BUG()
  Btrfs: remove superblock writing after fatal error
  Btrfs: allow delayed refs to be merged
  Btrfs: fix enospc problems when deleting a subvol
  Btrfs: fix wrong mtime and ctime when creating snapshots
  Btrfs: fix race in run_clustered_refs
  Btrfs: don't run __tree_mod_log_free_eb on leaves
  Btrfs: increase the size of the free space cache
  Btrfs: barrier before waitqueue_active
  Btrfs: fix deadlock in wait_for_more_refs
  btrfs: fix second lock in btrfs_delete_delayed_items()
  Btrfs: don't allocate a seperate csums array for direct reads
  Btrfs: do not strdup non existent strings
  Btrfs: do not use missing devices when showing devname
  Btrfs: fix that error value is changed by mistake
  Btrfs: lock extents as we map them in DIO
  ...

21 files changed:
fs/btrfs/backref.c
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/locking.c
fs/btrfs/qgroup.c
fs/btrfs/root-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index a256f3b2a845f22ef1c0c8dd1310364a96b651f5..ff6475f409d64aaade5499f9d0cee64c2989a80c 100644 (file)
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
        ret = extent_from_logical(fs_info, logical, path,
                                        &found_key);
        btrfs_release_path(path);
-       if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
-               ret = -EINVAL;
        if (ret < 0)
                return ret;
+       if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+               return -EINVAL;
 
        extent_item_pos = logical - found_key.objectid;
        ret = iterate_extent_inodes(fs_info, found_key.objectid,
index 86eff48dab786d525d3395d31dcf3f45a11dafc1..43d1c5a3a030888544d8dd8f7b36239386552d08 100644 (file)
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
        btrfs_compress_op[idx]->free_workspace(workspace);
        atomic_dec(alloc_workspace);
 wake:
+       smp_mb();
        if (waitqueue_active(workspace_wait))
                wake_up(workspace_wait);
 }
index 9d7621f271ff1e5b3f2b57931d96dad265678d7d..6d183f60d63a0521e8c4461e4d5fd705163d17d1 100644 (file)
@@ -420,12 +420,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
        }
        spin_unlock(&fs_info->tree_mod_seq_lock);
 
-       /*
-        * we removed the lowest blocker from the blocker list, so there may be
-        * more processible delayed refs.
-        */
-       wake_up(&fs_info->tree_mod_seq_wait);
-
        /*
         * anything that's lower than the lowest existing (read: blocked)
         * sequence number can be removed from the tree.
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
        u32 nritems;
        int ret;
 
+       if (btrfs_header_level(eb) == 0)
+               return;
+
        nritems = btrfs_header_nritems(eb);
        for (i = nritems - 1; i >= 0; i--) {
                ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
index 4bab807227ad938c87039ea2aa1ef8f94fe46146..0d195b5076604b4350f88de9a58e9f4836fd785a 100644 (file)
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
        atomic_t tree_mod_seq;
        struct list_head tree_mod_seq_list;
        struct seq_list tree_mod_seq_elem;
-       wait_queue_head_t tree_mod_seq_wait;
 
        /* this protects tree_mod_log */
        rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                          struct bio *bio, u32 *dst);
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 logical_offset, u32 *dst);
+                             struct bio *bio, u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
index 335605c8ceab730394d45d9975adaa7025203dd7..07d5eeb1e6f1df1f8ae2ddf94218b1f45298aeda 100644 (file)
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 
        rb_erase(&delayed_item->rb_node, root);
        delayed_item->delayed_node->count--;
-       atomic_dec(&delayed_root->items);
-       if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
+       if (atomic_dec_return(&delayed_root->items) <
+           BTRFS_DELAYED_BACKGROUND &&
            waitqueue_active(&delayed_root->wait))
                wake_up(&delayed_root->wait);
 }
@@ -1028,9 +1028,10 @@ do_again:
                btrfs_release_delayed_item(prev);
                ret = 0;
                btrfs_release_path(path);
-               if (curr)
+               if (curr) {
+                       mutex_unlock(&node->mutex);
                        goto do_again;
-               else
+               else
                        goto delete_fail;
        }
 
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
                delayed_node->count--;
 
                delayed_root = delayed_node->root->fs_info->delayed_root;
-               atomic_dec(&delayed_root->items);
-               if (atomic_read(&delayed_root->items) <
+               if (atomic_dec_return(&delayed_root->items) <
                    BTRFS_DELAYED_BACKGROUND &&
                    waitqueue_active(&delayed_root->wait))
                        wake_up(&delayed_root->wait);
index da7419ed01bb7e520cfbcac0fbef44ef607a92e5..ae94117733973e2d7aa8ea590c171fcf2e6bd26a 100644 (file)
 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
                          struct btrfs_delayed_tree_ref *ref1)
 {
-       if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
-               if (ref1->root < ref2->root)
-                       return -1;
-               if (ref1->root > ref2->root)
-                       return 1;
-       } else {
-               if (ref1->parent < ref2->parent)
-                       return -1;
-               if (ref1->parent > ref2->parent)
-                       return 1;
-       }
+       if (ref1->root < ref2->root)
+               return -1;
+       if (ref1->root > ref2->root)
+               return 1;
+       if (ref1->parent < ref2->parent)
+               return -1;
+       if (ref1->parent > ref2->parent)
+               return 1;
        return 0;
 }
 
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
  * type of the delayed backrefs and content of delayed backrefs.
  */
 static int comp_entry(struct btrfs_delayed_ref_node *ref2,
-                     struct btrfs_delayed_ref_node *ref1)
+                     struct btrfs_delayed_ref_node *ref1,
+                     bool compare_seq)
 {
        if (ref1->bytenr < ref2->bytenr)
                return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
        if (ref1->type > ref2->type)
                return 1;
        /* merging of sequenced refs is not allowed */
-       if (ref1->seq < ref2->seq)
-               return -1;
-       if (ref1->seq > ref2->seq)
-               return 1;
+       if (compare_seq) {
+               if (ref1->seq < ref2->seq)
+                       return -1;
+               if (ref1->seq > ref2->seq)
+                       return 1;
+       }
        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
                return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
                                 rb_node);
 
-               cmp = comp_entry(entry, ins);
+               cmp = comp_entry(entry, ins, 1);
                if (cmp < 0)
                        p = &(*p)->rb_left;
                else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+                                   struct btrfs_delayed_ref_root *delayed_refs,
+                                   struct btrfs_delayed_ref_node *ref)
+{
+       rb_erase(&ref->rb_node, &delayed_refs->root);
+       ref->in_tree = 0;
+       btrfs_put_delayed_ref(ref);
+       delayed_refs->num_entries--;
+       if (trans->delayed_ref_updates)
+               trans->delayed_ref_updates--;
+}
+
+static int merge_ref(struct btrfs_trans_handle *trans,
+                    struct btrfs_delayed_ref_root *delayed_refs,
+                    struct btrfs_delayed_ref_node *ref, u64 seq)
+{
+       struct rb_node *node;
+       int merged = 0;
+       int mod = 0;
+       int done = 0;
+
+       node = rb_prev(&ref->rb_node);
+       while (node) {
+               struct btrfs_delayed_ref_node *next;
+
+               next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+               node = rb_prev(node);
+               if (next->bytenr != ref->bytenr)
+                       break;
+               if (seq && next->seq >= seq)
+                       break;
+               if (comp_entry(ref, next, 0))
+                       continue;
+
+               if (ref->action == next->action) {
+                       mod = next->ref_mod;
+               } else {
+                       if (ref->ref_mod < next->ref_mod) {
+                               struct btrfs_delayed_ref_node *tmp;
+
+                               tmp = ref;
+                               ref = next;
+                               next = tmp;
+                               done = 1;
+                       }
+                       mod = -next->ref_mod;
+               }
+
+               merged++;
+               drop_delayed_ref(trans, delayed_refs, next);
+               ref->ref_mod += mod;
+               if (ref->ref_mod == 0) {
+                       drop_delayed_ref(trans, delayed_refs, ref);
+                       break;
+               } else {
+                       /*
+                        * You can't have multiples of the same ref on a tree
+                        * block.
+                        */
+                       WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+                               ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+               }
+
+               if (done)
+                       break;
+               node = rb_prev(&ref->rb_node);
+       }
+
+       return merged;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+                             struct btrfs_fs_info *fs_info,
+                             struct btrfs_delayed_ref_root *delayed_refs,
+                             struct btrfs_delayed_ref_head *head)
+{
+       struct rb_node *node;
+       u64 seq = 0;
+
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       if (!list_empty(&fs_info->tree_mod_seq_list)) {
+               struct seq_list *elem;
+
+               elem = list_first_entry(&fs_info->tree_mod_seq_list,
+                                       struct seq_list, list);
+               seq = elem->seq;
+       }
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+
+       node = rb_prev(&head->node.rb_node);
+       while (node) {
+               struct btrfs_delayed_ref_node *ref;
+
+               ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                              rb_node);
+               if (ref->bytenr != head->node.bytenr)
+                       break;
+
+               /* We can't merge refs that are outside of our seq count */
+               if (seq && ref->seq >= seq)
+                       break;
+               if (merge_ref(trans, delayed_refs, ref, seq))
+                       node = rb_prev(&head->node.rb_node);
+               else
+                       node = rb_prev(node);
+       }
+}
+
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                            struct btrfs_delayed_ref_root *delayed_refs,
                            u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
                 * every changing the extent allocation tree.
                 */
                existing->ref_mod--;
-               if (existing->ref_mod == 0) {
-                       rb_erase(&existing->rb_node,
-                                &delayed_refs->root);
-                       existing->in_tree = 0;
-                       btrfs_put_delayed_ref(existing);
-                       delayed_refs->num_entries--;
-                       if (trans->delayed_ref_updates)
-                               trans->delayed_ref_updates--;
-               } else {
+               if (existing->ref_mod == 0)
+                       drop_delayed_ref(trans, delayed_refs, existing);
+               else
                        WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
                                existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               }
        } else {
                WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
                        existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
                                   num_bytes, parent, ref_root, level, action,
                                   for_cow);
-       if (!need_ref_seq(for_cow, ref_root) &&
-           waitqueue_active(&fs_info->tree_mod_seq_wait))
-               wake_up(&fs_info->tree_mod_seq_wait);
        spin_unlock(&delayed_refs->lock);
        if (need_ref_seq(for_cow, ref_root))
                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
                                   num_bytes, parent, ref_root, owner, offset,
                                   action, for_cow);
-       if (!need_ref_seq(for_cow, ref_root) &&
-           waitqueue_active(&fs_info->tree_mod_seq_wait))
-               wake_up(&fs_info->tree_mod_seq_wait);
        spin_unlock(&delayed_refs->lock);
        if (need_ref_seq(for_cow, ref_root))
                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
                                   extent_op->is_data);
 
-       if (waitqueue_active(&fs_info->tree_mod_seq_wait))
-               wake_up(&fs_info->tree_mod_seq_wait);
        spin_unlock(&delayed_refs->lock);
        return 0;
 }
index 0d7c90c366b629152796c0c717c0e4cd5f50e9b1..ab5300595847e20602c307620a3a5059ca258b06 100644 (file)
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
                                struct btrfs_delayed_extent_op *extent_op);
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+                             struct btrfs_fs_info *fs_info,
+                             struct btrfs_delayed_ref_root *delayed_refs,
+                             struct btrfs_delayed_ref_head *head);
 
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
index 62e0cafd6e250d5d1717656d3d5821e2d1bfec42..22e98e04c2eabbc0b4baeb2618033657a9344fb9 100644 (file)
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                ret = read_extent_buffer_pages(io_tree, eb, start,
                                               WAIT_COMPLETE,
                                               btree_get_extent, mirror_num);
-               if (!ret && !verify_parent_transid(io_tree, eb,
+               if (!ret) {
+                       if (!verify_parent_transid(io_tree, eb,
                                                   parent_transid, 0))
-                       break;
+                               break;
+                       else
+                               ret = -EIO;
+               }
 
                /*
                 * This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
-       atomic_dec(&fs_info->nr_async_submits);
-
-       if (atomic_read(&fs_info->nr_async_submits) < limit &&
+       if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
            waitqueue_active(&fs_info->async_submit_wait))
                wake_up(&fs_info->async_submit_wait);
 
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
 
-       init_waitqueue_head(&fs_info->tree_mod_seq_wait);
-
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
        spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
                goto fail_trans_kthread;
 
        /* do not make disk changes in broken FS */
-       if (btrfs_super_log_root(disk_super) != 0 &&
-           !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
+       if (btrfs_super_log_root(disk_super) != 0) {
                u64 bytenr = btrfs_super_log_root(disk_super);
 
                if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
        /* clear out the rbtree of defraggable inodes */
        btrfs_run_defrag_inodes(fs_info);
 
-       /*
-        * Here come 2 situations when btrfs is broken to flip readonly:
-        *
-        * 1. when btrfs flips readonly somewhere else before
-        * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
-        * and btrfs will skip to write sb directly to keep
-        * ERROR state on disk.
-        *
-        * 2. when btrfs flips readonly just in btrfs_commit_super,
-        * and in such case, btrfs cannot write sb via btrfs_commit_super,
-        * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
-        * btrfs will cleanup all FS resources first and write sb then.
-        */
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret = btrfs_commit_super(root);
                if (ret)
                        printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
        }
 
-       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-               ret = btrfs_error_commit_super(root);
-               if (ret)
-                       printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
-       }
+       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+               btrfs_error_commit_super(root);
 
        btrfs_put_block_group_cache(fs_info);
 
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        if (read_only)
                return 0;
 
-       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-               printk(KERN_WARNING "warning: mount fs with errors, "
-                      "running btrfsck is recommended\n");
-       }
-
        return 0;
 }
 
-int btrfs_error_commit_super(struct btrfs_root *root)
+void btrfs_error_commit_super(struct btrfs_root *root)
 {
-       int ret;
-
        mutex_lock(&root->fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(root);
        mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
 
        /* cleanup FS via transaction */
        btrfs_cleanup_transaction(root);
-
-       ret = write_ctree_super(NULL, root, 0);
-
-       return ret;
 }
 
 static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                /* FIXME: cleanup wait for commit */
                t->in_commit = 1;
                t->blocked = 1;
+               smp_mb();
                if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
                        wake_up(&root->fs_info->transaction_blocked_wait);
 
                t->blocked = 0;
+               smp_mb();
                if (waitqueue_active(&root->fs_info->transaction_wait))
                        wake_up(&root->fs_info->transaction_wait);
 
                t->commit_done = 1;
+               smp_mb();
                if (waitqueue_active(&t->commit_wait))
                        wake_up(&t->commit_wait);
 
index 95e147eea23952c689ee9e87cfd39e7d61fde96b..c5b00a735fefac258b7914223139d902efacbf99 100644 (file)
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
-int btrfs_error_commit_super(struct btrfs_root *root);
+void btrfs_error_commit_super(struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize);
 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
index 4e1b153b7c47c99e00adffd8ee6d24e2db29093d..ba58024d40d3eaf486c50d96cd116c3b828b9c75 100644 (file)
@@ -2251,6 +2251,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
+               /*
+                * We need to try and merge add/drops of the same ref since we
+                * can run into issues with relocate dropping the implicit ref
+                * and then it being added back again before the drop can
+                * finish.  If we merged anything we need to re-loop so we can
+                * get a good ref.
+                */
+               btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+                                        locked_ref);
+
                /*
                 * locked_ref is the head node, so we have to go one
                 * node back for any delayed ref updates
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                ref->in_tree = 0;
                rb_erase(&ref->rb_node, &delayed_refs->root);
                delayed_refs->num_entries--;
-               /*
-                * we modified num_entries, but as we're currently running
-                * delayed refs, skip
-                *     wake_up(&delayed_refs->seq_wait);
-                * here.
-                */
+               if (locked_ref) {
+                       /*
+                        * when we play the delayed ref, also correct the
+                        * ref_mod on head
+                        */
+                       switch (ref->action) {
+                       case BTRFS_ADD_DELAYED_REF:
+                       case BTRFS_ADD_DELAYED_EXTENT:
+                               locked_ref->node.ref_mod -= ref->ref_mod;
+                               break;
+                       case BTRFS_DROP_DELAYED_REF:
+                               locked_ref->node.ref_mod += ref->ref_mod;
+                               break;
+                       default:
+                               WARN_ON(1);
+                       }
+               }
                spin_unlock(&delayed_refs->lock);
 
                ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
        return count;
 }
 
-static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
-                              struct btrfs_delayed_ref_root *delayed_refs,
-                              unsigned long num_refs,
-                              struct list_head *first_seq)
-{
-       spin_unlock(&delayed_refs->lock);
-       pr_debug("waiting for more refs (num %ld, first %p)\n",
-                num_refs, first_seq);
-       wait_event(fs_info->tree_mod_seq_wait,
-                  num_refs != delayed_refs->num_entries ||
-                  fs_info->tree_mod_seq_list.next != first_seq);
-       pr_debug("done waiting for more refs (num %ld, first %p)\n",
-                delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
-       spin_lock(&delayed_refs->lock);
-}
-
 #ifdef SCRAMBLE_DELAYED_REFS
 /*
  * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_delayed_ref_node *ref;
        struct list_head cluster;
-       struct list_head *first_seq = NULL;
        int ret;
        u64 delayed_start;
        int run_all = count == (unsigned long)-1;
        int run_most = 0;
-       unsigned long num_refs = 0;
-       int consider_waiting;
+       int loops;
 
        /* We'll clean this up in btrfs_cleanup_transaction */
        if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        delayed_refs = &trans->transaction->delayed_refs;
        INIT_LIST_HEAD(&cluster);
 again:
-       consider_waiting = 0;
+       loops = 0;
        spin_lock(&delayed_refs->lock);
 
 #ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
                if (ret)
                        break;
 
-               if (delayed_start >= delayed_refs->run_delayed_start) {
-                       if (consider_waiting == 0) {
-                               /*
-                                * btrfs_find_ref_cluster looped. let's do one
-                                * more cycle. if we don't run any delayed ref
-                                * during that cycle (because we can't because
-                                * all of them are blocked) and if the number of
-                                * refs doesn't change, we avoid busy waiting.
-                                */
-                               consider_waiting = 1;
-                               num_refs = delayed_refs->num_entries;
-                               first_seq = root->fs_info->tree_mod_seq_list.next;
-                       } else {
-                               wait_for_more_refs(root->fs_info, delayed_refs,
-                                                  num_refs, first_seq);
-                               /*
-                                * after waiting, things have changed. we
-                                * dropped the lock and someone else might have
-                                * run some refs, built new clusters and so on.
-                                * therefore, we restart staleness detection.
-                                */
-                               consider_waiting = 0;
-                       }
-               }
-
                ret = run_clustered_refs(trans, root, &cluster);
                if (ret < 0) {
                        spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
                if (count == 0)
                        break;
 
-               if (ret || delayed_refs->run_delayed_start == 0) {
+               if (delayed_start >= delayed_refs->run_delayed_start) {
+                       if (loops == 0) {
+                               /*
+                                * btrfs_find_ref_cluster looped. let's do one
+                                * more cycle. if we don't run any delayed ref
+                                * during that cycle (because we can't because
+                                * all of them are blocked), bail out.
+                                */
+                               loops = 1;
+                       } else {
+                               /*
+                                * no runnable refs left, stop trying
+                                */
+                               BUG_ON(run_all);
+                               break;
+                       }
+               }
+               if (ret) {
                        /* refs were run, let's reset staleness detection */
-                       consider_waiting = 0;
+                       loops = 0;
                }
        }
 
@@ -3007,17 +3002,16 @@ again:
        }
        spin_unlock(&block_group->lock);
 
-       num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+       /*
+        * Try to preallocate enough space based on how big the block group is.
+        * Keep in mind this has to include any pinned space which could end up
+        * taking up quite a bit since it's not folded into the other space
+        * cache.
+        */
+       num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
        if (!num_pages)
                num_pages = 1;
 
-       /*
-        * Just to make absolutely sure we have enough space, we're going to
-        * preallocate 12 pages worth of space for each block group.  In
-        * practice we ought to use at most 8, but we need extra space so we can
-        * add our header and have a terminator between the extents and the
-        * bitmaps.
-        */
        num_pages *= 16;
        num_pages *= PAGE_CACHE_SIZE;
 
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        if (root->fs_info->quota_enabled) {
                ret = btrfs_qgroup_reserve(root, num_bytes +
                                           nr_extents * root->leafsize);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
                        return ret;
+               }
        }
 
        ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        rb_erase(&head->node.rb_node, &delayed_refs->root);
 
        delayed_refs->num_entries--;
-       smp_mb();
-       if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
-               wake_up(&root->fs_info->tree_mod_seq_wait);
 
        /*
         * we don't take a ref on the node because we're removing it from the
index 45c81bb4ac820323c7fd4282a0cf71f7cc2d190d..4c878476bb91ce0985dabc25464622442aaca54a 100644 (file)
@@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                              state, mirror);
-                       if (ret) {
-                               /* no IO indicated but software detected errors
-                                * in the block, either checksum errors or
-                                * issues with the contents */
-                               struct btrfs_root *root =
-                                       BTRFS_I(page->mapping->host)->root;
-                               struct btrfs_device *device;
-
+                       if (ret)
                                uptodate = 0;
-                               device = btrfs_find_device_for_logical(
-                                               root, start, mirror);
-                               if (device)
-                                       btrfs_dev_stat_inc_and_print(device,
-                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
-                       } else {
+                       else
                                clean_io_failure(start, page);
-                       }
                }
 
                if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
index b45b9de0c21d0f773f19f45ee10ae9803fa36f3a..857d93cd01dc579eb46838624349442fef07cf00 100644 (file)
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 }
 
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 offset, u32 *dst)
+                             struct bio *bio, u64 offset)
 {
-       return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
index 6e8f416773d4b221713c30079eb1de1f732e5ded..ec154f95464696cfa7df3cd6ab87ac4a0185ed03 100644 (file)
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
        nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
                PAGE_CACHE_SHIFT;
 
-       atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
-
-       if (atomic_read(&root->fs_info->async_delalloc_pages) <
+       if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
            5 * 1024 * 1024 &&
            waitqueue_active(&root->fs_info->async_submit_wait))
                wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                                trans = btrfs_join_transaction_nolock(root);
                        else
                                trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans))
-                               return PTR_ERR(trans);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               trans = NULL;
+                               goto out;
+                       }
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode_fallback(trans, root, inode);
                        if (ret) /* -ENOMEM or corruption */
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        btrfs_i_size_write(dir, dir->i_size - name_len * 2);
        inode_inc_iversion(dir);
        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-       ret = btrfs_update_inode(trans, root, dir);
+       ret = btrfs_update_inode_fallback(trans, root, dir);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 out:
@@ -5774,18 +5775,112 @@ out:
        return ret;
 }
 
+static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
+                             struct extent_state **cached_state, int writing)
+{
+       struct btrfs_ordered_extent *ordered;
+       int ret = 0;
+
+       while (1) {
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                0, cached_state);
+               /*
+                * We're concerned with the entire range that we're going to be
+                * doing DIO to, so we need to make sure theres no ordered
+                * extents in this range.
+                */
+               ordered = btrfs_lookup_ordered_range(inode, lockstart,
+                                                    lockend - lockstart + 1);
+
+               /*
+                * We need to make sure there are no buffered pages in this
+                * range either, we could have raced between the invalidate in
+                * generic_file_direct_write and locking the extent.  The
+                * invalidate needs to happen so that reads after a write do not
+                * get stale data.
+                */
+               if (!ordered && (!writing ||
+                   !test_range_bit(&BTRFS_I(inode)->io_tree,
+                                   lockstart, lockend, EXTENT_UPTODATE, 0,
+                                   *cached_state)))
+                       break;
+
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                    cached_state, GFP_NOFS);
+
+               if (ordered) {
+                       btrfs_start_ordered_extent(inode, ordered, 1);
+                       btrfs_put_ordered_extent(ordered);
+               } else {
+                       /* Screw you mmap */
+                       ret = filemap_write_and_wait_range(inode->i_mapping,
+                                                          lockstart,
+                                                          lockend);
+                       if (ret)
+                               break;
+
+                       /*
+                        * If we found a page that couldn't be invalidated just
+                        * fall back to buffered.
+                        */
+                       ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                       lockstart >> PAGE_CACHE_SHIFT,
+                                       lockend >> PAGE_CACHE_SHIFT);
+                       if (ret)
+                               break;
+               }
+
+               cond_resched();
+       }
+
+       return ret;
+}
+
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
 {
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_state *cached_state = NULL;
        u64 start = iblock << inode->i_blkbits;
+       u64 lockstart, lockend;
        u64 len = bh_result->b_size;
        struct btrfs_trans_handle *trans;
+       int unlock_bits = EXTENT_LOCKED;
+       int ret;
+
+       if (create) {
+               ret = btrfs_delalloc_reserve_space(inode, len);
+               if (ret)
+                       return ret;
+               unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+       } else {
+               len = min_t(u64, len, root->sectorsize);
+       }
+
+       lockstart = start;
+       lockend = start + len - 1;
+
+       /*
+        * If this errors out it's because we couldn't invalidate pagecache for
+        * this range and we need to fallback to buffered.
+        */
+       if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
+               return -ENOTBLK;
+
+       if (create) {
+               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, EXTENT_DELALLOC, NULL,
+                                    &cached_state, GFP_NOFS);
+               if (ret)
+                       goto unlock_err;
+       }
 
        em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-       if (IS_ERR(em))
-               return PTR_ERR(em);
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto unlock_err;
+       }
 
        /*
         * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
            em->block_start == EXTENT_MAP_INLINE) {
                free_extent_map(em);
-               return -ENOTBLK;
+               ret = -ENOTBLK;
+               goto unlock_err;
        }
 
        /* Just a good old fashioned hole, return */
        if (!create && (em->block_start == EXTENT_MAP_HOLE ||
                        test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
                free_extent_map(em);
-               /* DIO will do one hole at a time, so just unlock a sector */
-               unlock_extent(&BTRFS_I(inode)->io_tree, start,
-                             start + root->sectorsize - 1);
-               return 0;
+               ret = 0;
+               goto unlock_err;
        }
 
        /*
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
         *
         */
        if (!create) {
-               len = em->len - (start - em->start);
-               goto map;
+               len = min(len, em->len - (start - em->start));
+               lockstart = start + len;
+               goto unlock;
        }
 
        if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                        btrfs_end_transaction(trans, root);
                        if (ret) {
                                free_extent_map(em);
-                               return ret;
+                               goto unlock_err;
                        }
                        goto unlock;
                }
@@ -5873,14 +5968,12 @@ must_cow:
         */
        len = bh_result->b_size;
        em = btrfs_new_extent_direct(inode, em, start, len);
-       if (IS_ERR(em))
-               return PTR_ERR(em);
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto unlock_err;
+       }
        len = min(len, em->len - (start - em->start));
 unlock:
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
-                         EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
-                         0, NULL, GFP_NOFS);
-map:
        bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
                inode->i_blkbits;
        bh_result->b_size = len;
@@ -5898,9 +5991,44 @@ map:
                        i_size_write(inode, start + len);
        }
 
+       /*
+        * In the case of write we need to clear and unlock the entire range,
+        * in the case of read we need to unlock only the end area that we
+        * aren't using if there is any left over space.
+        */
+       if (lockstart < lockend) {
+               if (create && len < lockend - lockstart) {
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                        lockstart + len - 1, unlock_bits, 1, 0,
+                                        &cached_state, GFP_NOFS);
+                       /*
+                        * Beside unlock, we also need to cleanup reserved space
+                        * for the left range by attaching EXTENT_DO_ACCOUNTING.
+                        */
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                                        lockstart + len, lockend,
+                                        unlock_bits | EXTENT_DO_ACCOUNTING,
+                                        1, 0, NULL, GFP_NOFS);
+               } else {
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                        lockend, unlock_bits, 1, 0,
+                                        &cached_state, GFP_NOFS);
+               }
+       } else {
+               free_extent_state(cached_state);
+       }
+
        free_extent_map(em);
 
        return 0;
+
+unlock_err:
+       if (create)
+               unlock_bits |= EXTENT_DO_ACCOUNTING;
+
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                        unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+       return ret;
 }
 
 struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
        u64 logical_offset;
        u64 disk_bytenr;
        u64 bytes;
-       u32 *csums;
        void *private;
 
        /* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 start;
-       u32 *private = dip->csums;
 
        start = dip->logical_offset;
        do {
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        struct page *page = bvec->bv_page;
                        char *kaddr;
                        u32 csum = ~(u32)0;
+                       u64 private = ~(u32)0;
                        unsigned long flags;
 
+                       if (get_state_private(&BTRFS_I(inode)->io_tree,
+                                             start, &private))
+                               goto failed;
                        local_irq_save(flags);
                        kaddr = kmap_atomic(page);
                        csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        local_irq_restore(flags);
 
                        flush_dcache_page(bvec->bv_page);
-                       if (csum != *private) {
+                       if (csum != private) {
+failed:
                                printk(KERN_ERR "btrfs csum failed ino %llu off"
                                      " %llu csum %u private %u\n",
                                      (unsigned long long)btrfs_ino(inode),
                                      (unsigned long long)start,
-                                     csum, *private);
+                                     csum, (unsigned)private);
                                err = -EIO;
                        }
                }
 
                start += bvec->bv_len;
-               private++;
                bvec++;
        } while (bvec <= bvec_end);
 
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                      dip->logical_offset + dip->bytes - 1);
        bio->bi_private = dip->private;
 
-       kfree(dip->csums);
        kfree(dip);
 
        /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                         int rw, u64 file_offset, int skip_sum,
-                                        u32 *csums, int async_submit)
+                                        int async_submit)
 {
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                if (ret)
                        goto err;
        } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
-                                         file_offset, csums);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
                if (ret)
                        goto err;
        }
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        u64 submit_len = 0;
        u64 map_length;
        int nr_pages = 0;
-       u32 *csums = dip->csums;
        int ret = 0;
        int async_submit = 0;
-       int write = rw & REQ_WRITE;
 
        map_length = orig_bio->bi_size;
        ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                        atomic_inc(&dip->pending_bios);
                        ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                     file_offset, skip_sum,
-                                                    csums, async_submit);
+                                                    async_submit);
                        if (ret) {
                                bio_put(bio);
                                atomic_dec(&dip->pending_bios);
                                goto out_err;
                        }
 
-                       /* Write's use the ordered csums */
-                       if (!write && !skip_sum)
-                               csums = csums + nr_pages;
                        start_sector += submit_len >> 9;
                        file_offset += submit_len;
 
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 
 submit:
        ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                    csums, async_submit);
+                                    async_submit);
        if (!ret)
                return 0;
 
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
                ret = -ENOMEM;
                goto free_ordered;
        }
-       dip->csums = NULL;
-
-       /* Write's use the ordered csum stuff, so we don't need dip->csums */
-       if (!write && !skip_sum) {
-               dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
-               if (!dip->csums) {
-                       kfree(dip);
-                       ret = -ENOMEM;
-                       goto free_ordered;
-               }
-       }
 
        dip->private = bio->bi_private;
        dip->inode = inode;
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 out:
        return retval;
 }
+
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                        const struct iovec *iov, loff_t offset,
                        unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
-       struct btrfs_ordered_extent *ordered;
-       struct extent_state *cached_state = NULL;
-       u64 lockstart, lockend;
-       ssize_t ret;
-       int writing = rw & WRITE;
-       int write_bits = 0;
-       size_t count = iov_length(iov, nr_segs);
 
        if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-                           offset, nr_segs)) {
+                           offset, nr_segs))
                return 0;
-       }
-
-       lockstart = offset;
-       lockend = offset + count - 1;
-
-       if (writing) {
-               ret = btrfs_delalloc_reserve_space(inode, count);
-               if (ret)
-                       goto out;
-       }
-
-       while (1) {
-               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, &cached_state);
-               /*
-                * We're concerned with the entire range that we're going to be
-                * doing DIO to, so we need to make sure theres no ordered
-                * extents in this range.
-                */
-               ordered = btrfs_lookup_ordered_range(inode, lockstart,
-                                                    lockend - lockstart + 1);
-
-               /*
-                * We need to make sure there are no buffered pages in this
-                * range either, we could have raced between the invalidate in
-                * generic_file_direct_write and locking the extent.  The
-                * invalidate needs to happen so that reads after a write do not
-                * get stale data.
-                */
-               if (!ordered && (!writing ||
-                   !test_range_bit(&BTRFS_I(inode)->io_tree,
-                                   lockstart, lockend, EXTENT_UPTODATE, 0,
-                                   cached_state)))
-                       break;
-
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    &cached_state, GFP_NOFS);
-
-               if (ordered) {
-                       btrfs_start_ordered_extent(inode, ordered, 1);
-                       btrfs_put_ordered_extent(ordered);
-               } else {
-                       /* Screw you mmap */
-                       ret = filemap_write_and_wait_range(file->f_mapping,
-                                                          lockstart,
-                                                          lockend);
-                       if (ret)
-                               goto out;
-
-                       /*
-                        * If we found a page that couldn't be invalidated just
-                        * fall back to buffered.
-                        */
-                       ret = invalidate_inode_pages2_range(file->f_mapping,
-                                       lockstart >> PAGE_CACHE_SHIFT,
-                                       lockend >> PAGE_CACHE_SHIFT);
-                       if (ret) {
-                               if (ret == -EBUSY)
-                                       ret = 0;
-                               goto out;
-                       }
-               }
-
-               cond_resched();
-       }
 
-       /*
-        * we don't use btrfs_set_extent_delalloc because we don't want
-        * the dirty or uptodate bits
-        */
-       if (writing) {
-               write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
-               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    EXTENT_DELALLOC, NULL, &cached_state,
-                                    GFP_NOFS);
-               if (ret) {
-                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                        lockend, EXTENT_LOCKED | write_bits,
-                                        1, 0, &cached_state, GFP_NOFS);
-                       goto out;
-               }
-       }
-
-       free_extent_state(cached_state);
-       cached_state = NULL;
-
-       ret = __blockdev_direct_IO(rw, iocb, inode,
+       return __blockdev_direct_IO(rw, iocb, inode,
                   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
                   iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
                   btrfs_submit_direct, 0);
-
-       if (ret < 0 && ret != -EIOCBQUEUED) {
-               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
-                             offset + iov_length(iov, nr_segs) - 1,
-                             EXTENT_LOCKED | write_bits, 1, 0,
-                             &cached_state, GFP_NOFS);
-       } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
-               /*
-                * We're falling back to buffered, unlock the section we didn't
-                * do IO on.
-                */
-               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
-                             offset + iov_length(iov, nr_segs) - 1,
-                             EXTENT_LOCKED | write_bits, 1, 0,
-                             &cached_state, GFP_NOFS);
-       }
-out:
-       free_extent_state(cached_state);
-       return ret;
 }
 
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
index 7bb755677a220f71fd0540932c0c19565aec6a23..9df50fa8a0781ba387553297fbe442ed964e671e 100644 (file)
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
        uuid_le_gen(&new_uuid);
        memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
        root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
-       root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+       root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
        root_item.ctime = root_item.otime;
        btrfs_set_root_ctransid(&root_item, trans->transid);
        btrfs_set_root_otransid(&root_item, trans->transid);
index a44eff0748051b1627448e54f281ca04cd77c719..2a1762c660416c662d32f95060046ba2557ab903 100644 (file)
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
        if (eb->lock_nested) {
                read_lock(&eb->lock);
-               if (&eb->lock_nested && current->pid == eb->lock_owner) {
+               if (eb->lock_nested && current->pid == eb->lock_owner) {
                        read_unlock(&eb->lock);
                        return;
                }
index bc424ae5a81a49ef15cadb0159812f80acf0221f..38b42e7bc91d0e258a7fd086d793aae3d43ac4c1 100644 (file)
@@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
        spin_lock(&fs_info->qgroup_lock);
 
        dstgroup = add_qgroup_rb(fs_info, objectid);
-       if (!dstgroup)
+       if (IS_ERR(dstgroup)) {
+               ret = PTR_ERR(dstgroup);
                goto unlock;
+       }
 
        if (srcid) {
                srcgroup = find_qgroup_rb(fs_info, srcid);
-               if (!srcgroup)
+               if (!srcgroup) {
+                       ret = -EINVAL;
                        goto unlock;
+               }
                dstgroup->rfer = srcgroup->rfer - level_size;
                dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
                srcgroup->excl = level_size;
@@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                qgroup_dirty(fs_info, srcgroup);
        }
 
-       if (!inherit)
+       if (!inherit) {
+               ret = -EINVAL;
                goto unlock;
+       }
 
        i_qgroups = (u64 *)(inherit + 1);
        for (i = 0; i < inherit->num_qgroups; ++i) {
index 6bb465cca20f5c68804ac3c4f78d4aa650fee96c..10d8e4d88071747651afd3a875eae0fb407c22e4 100644 (file)
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
        struct timespec ct = CURRENT_TIME;
 
        spin_lock(&root->root_times_lock);
-       item->ctransid = trans->transid;
+       item->ctransid = cpu_to_le64(trans->transid);
        item->ctime.sec = cpu_to_le64(ct.tv_sec);
-       item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
+       item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
        spin_unlock(&root->root_times_lock);
 }
index f2eb24c477a3ca1c60ee95b51040354dd5a869ba..83d6f9f9c2209861efdec86dec9ad54d629deeb9 100644 (file)
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        struct btrfs_trans_handle *trans;
        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
        struct btrfs_root *root = fs_info->tree_root;
-       int ret;
 
        trace_btrfs_sync_fs(wait);
 
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 
        btrfs_wait_ordered_extents(root, 0, 0);
 
-       trans = btrfs_start_transaction(root, 0);
+       spin_lock(&fs_info->trans_lock);
+       if (!fs_info->running_transaction) {
+               spin_unlock(&fs_info->trans_lock);
+               return 0;
+       }
+       spin_unlock(&fs_info->trans_lock);
+
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       return ret;
+       return btrfs_commit_transaction(trans, root);
 }
 
 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
        while (cur_devices) {
                head = &cur_devices->devices;
                list_for_each_entry(dev, head, dev_list) {
+                       if (dev->missing)
+                               continue;
                        if (!first_dev || dev->devid < first_dev->devid)
                                first_dev = dev;
                }
index 17be3dedacbab1c47084270153ed059719984470..27c26004e050a33211674363cfd80c02c98d1063 100644 (file)
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
        btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                         dentry->d_name.len * 2);
+       parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        if (ret)
                goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        memcpy(new_root_item->parent_uuid, root->root_item.uuid,
                        BTRFS_UUID_SIZE);
        new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
-       new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+       new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
        btrfs_set_root_otransid(new_root_item, trans->transid);
        memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
        memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
index e86ae04abe6a78e72dd86b3a813bce3107a8802e..88b969aeeb71a53128ae941e569ad19f7b1038c3 100644 (file)
@@ -227,9 +227,8 @@ loop_lock:
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
-               atomic_dec(&fs_info->nr_async_bios);
 
-               if (atomic_read(&fs_info->nr_async_bios) < limit &&
+               if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
                    waitqueue_active(&fs_info->async_submit_wait))
                        wake_up(&fs_info->async_submit_wait);
 
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                memcpy(new_device, device, sizeof(*new_device));
 
                /* Safe because we are under uuid_mutex */
-               name = rcu_string_strdup(device->name->str, GFP_NOFS);
-               BUG_ON(device->name && !name); /* -ENOMEM */
-               rcu_assign_pointer(new_device->name, name);
+               if (device->name) {
+                       name = rcu_string_strdup(device->name->str, GFP_NOFS);
+                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       rcu_assign_pointer(new_device->name, name);
+               }
                new_device->bdev = NULL;
                new_device->writeable = 0;
                new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        return ret;
 }
 
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
-                                                  u64 logical, int mirror_num)
-{
-       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-       int ret;
-       u64 map_length = 0;
-       struct btrfs_bio *bbio = NULL;
-       struct btrfs_device *device;
-
-       BUG_ON(mirror_num == 0);
-       ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
-                             mirror_num);
-       if (ret) {
-               BUG_ON(bbio != NULL);
-               return NULL;
-       }
-       BUG_ON(mirror_num != bbio->mirror_num);
-       device = bbio->stripes[mirror_num - 1].dev;
-       kfree(bbio);
-       return device;
-}
-
 int btrfs_read_chunk_tree(struct btrfs_root *root)
 {
        struct btrfs_path *path;
index 5479325987b3c8af40e0760790d5365fda0efc8d..53c06af92e8da94270dab4f24906b937441d8a8f 100644 (file)
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
                         u64 *start, u64 *max_avail);
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
-                                                  u64 logical, int mirror_num);
 void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_root *root,