]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 18 May 2013 18:35:28 +0000 (11:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 18 May 2013 18:35:28 +0000 (11:35 -0700)
Pull btrfs fixes from Chris Mason:
 "Miao Xie has been very busy, fixing races and enospc problems and many
  other small but important pieces.

  Alexandre Oliva discovered some problems with how our error handling
  was interacting with the block layer and for now has disabled our
  partial handling of sub-page writes.  The real sub-page work is in a
  series of patches from IBM that we still need to integrate and test.
  The code Alexandre has turned off was really incomplete.

  Josef has more error handling fixes and an important fix for the new
  skinny extent format.

  This also has my fix for the tracepoint crash from late in 3.9.  It's
  the first stage in a larger clean up to get rid of btrfs_bio and make
  a proper bioset for all the items we need to tack into the bio.  For
  now the bioset only holds our mirror_num and stripe_index, but for the
  next merge window I'll shuffle more in."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
  Btrfs: use a btrfs bioset instead of abusing bio internals
  Btrfs: make sure roots are assigned before freeing their nodes
  Btrfs: explicitly use global_block_rsv for quota_tree
  btrfs: do away with non-whole_page extent I/O
  Btrfs: don't invoke btrfs_invalidate_inodes() in the spin lock context
  Btrfs: remove BUG_ON() in btrfs_read_fs_tree_no_radix()
  Btrfs: pause the space balance when remounting to R/O
  Btrfs: fix unprotected root node of the subvolume's inode rb-tree
  Btrfs: fix accessing a freed tree root
  Btrfs: return errno if possible when we fail to allocate memory
  Btrfs: update the global reserve if it is empty
  Btrfs: don't steal the reserved space from the global reserve if their space type is different
  Btrfs: optimize the error handle of use_block_rsv()
  Btrfs: don't use global block reservation for inode cache truncation
  Btrfs: don't abort the current transaction if there is no enough space for inode cache
  Correct allowed raid levels on balance.
  Btrfs: fix possible memory leak in replace_path()
  Btrfs: fix possible memory leak in the find_parent_nodes()
  Btrfs: don't allow device replace on RAID5/RAID6
  Btrfs: handle running extent ops with skinny metadata
  ...

21 files changed:
fs/btrfs/backref.c
fs/btrfs/check-integrity.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index b4fb41558111b75d461096d8acdfca34690b8728..290e347b6db3f925f414fd9be4e6ea394da6f887 100644 (file)
@@ -918,7 +918,8 @@ again:
                                                           ref->parent, bsz, 0);
                                if (!eb || !extent_buffer_uptodate(eb)) {
                                        free_extent_buffer(eb);
-                                       return -EIO;
+                                       ret = -EIO;
+                                       goto out;
                                }
                                ret = find_extent_in_eb(eb, bytenr,
                                                        *extent_item_pos, &eie);
index 18af6f48781a1f31e1d41c23bb08a2e1b6ea12a7..1431a696501704d3f9e0901c64de537b2b20183a 100644 (file)
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
                unsigned int j;
                DECLARE_COMPLETION_ONSTACK(complete);
 
-               bio = bio_alloc(GFP_NOFS, num_pages - i);
+               bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
                if (!bio) {
                        printk(KERN_INFO
                               "btrfsic: bio_alloc() for %u pages failed!\n",
index de6de8e60b46019f528b93505d3848adb8699191..02fae7f7e42cb417a14fe0243805bcad4270b592 100644 (file)
@@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        BUG_ON(ret); /* -ENOMEM */
                }
                if (new_flags != 0) {
+                       int level = btrfs_header_level(buf);
+
                        ret = btrfs_set_disk_extent_flags(trans, root,
                                                          buf->start,
                                                          buf->len,
-                                                         new_flags, 0);
+                                                         new_flags, level, 0);
                        if (ret)
                                return ret;
                }
index 63c328a9ce956c716c8ed6e1a0131b5260bb022b..d6dd49b51ba8dfe27069a139ab42cfd88e58c117 100644 (file)
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
 /* holds checksums of all the data extents */
 #define BTRFS_CSUM_TREE_OBJECTID 7ULL
 
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
 /* holds quota configuration and tracking */
 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL
 
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
 /* orhpan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
@@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
-                               int is_data);
+                               int level, int is_data);
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
                      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
index f75fcaf79aebd11d54ebcc6df94d6f5b67c55dfe..70b962cc177d973688b0d2deb56965bffbc423ae 100644 (file)
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
 struct btrfs_delayed_extent_op {
        struct btrfs_disk_key key;
        u64 flags_to_set;
+       int level;
        unsigned int update_key:1;
        unsigned int update_flags:1;
        unsigned int is_data:1;
index 7ba7b3900cb8eb749b82241cc4533b4fc978ef8a..65241f32d3f8aec282a80d443c7acc677f68726a 100644 (file)
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        struct btrfs_device *tgt_device = NULL;
        struct btrfs_device *src_device = NULL;
 
+       if (btrfs_fs_incompat(fs_info, RAID56)) {
+               pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+               return -EINVAL;
+       }
+
        switch (args->start.cont_reading_from_srcdev_mode) {
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
index 4e9ebe1f1827895712afdea8e0e8016bd20088ca..e7b3cb5286a5a699c4716a8a5dc2ae6a0e54f622 100644 (file)
@@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
        { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
        { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
-       { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+       { .id = BTRFS_QUOTA_TREE_OBJECTID,      .name_stem = "quota"    },
        { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
@@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        }
 
        root->commit_root = btrfs_root_node(root);
-       BUG_ON(!root->node); /* -ENOMEM */
 out:
        if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
                root->ref_cows = 1;
@@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
 {
        free_extent_buffer(info->tree_root->node);
        free_extent_buffer(info->tree_root->commit_root);
-       free_extent_buffer(info->dev_root->node);
-       free_extent_buffer(info->dev_root->commit_root);
-       free_extent_buffer(info->extent_root->node);
-       free_extent_buffer(info->extent_root->commit_root);
-       free_extent_buffer(info->csum_root->node);
-       free_extent_buffer(info->csum_root->commit_root);
-       if (info->quota_root) {
-               free_extent_buffer(info->quota_root->node);
-               free_extent_buffer(info->quota_root->commit_root);
-       }
-
        info->tree_root->node = NULL;
        info->tree_root->commit_root = NULL;
-       info->dev_root->node = NULL;
-       info->dev_root->commit_root = NULL;
-       info->extent_root->node = NULL;
-       info->extent_root->commit_root = NULL;
-       info->csum_root->node = NULL;
-       info->csum_root->commit_root = NULL;
+
+       if (info->dev_root) {
+               free_extent_buffer(info->dev_root->node);
+               free_extent_buffer(info->dev_root->commit_root);
+               info->dev_root->node = NULL;
+               info->dev_root->commit_root = NULL;
+       }
+       if (info->extent_root) {
+               free_extent_buffer(info->extent_root->node);
+               free_extent_buffer(info->extent_root->commit_root);
+               info->extent_root->node = NULL;
+               info->extent_root->commit_root = NULL;
+       }
+       if (info->csum_root) {
+               free_extent_buffer(info->csum_root->node);
+               free_extent_buffer(info->csum_root->commit_root);
+               info->csum_root->node = NULL;
+               info->csum_root->commit_root = NULL;
+       }
        if (info->quota_root) {
+               free_extent_buffer(info->quota_root->node);
+               free_extent_buffer(info->quota_root->commit_root);
                info->quota_root->node = NULL;
                info->quota_root->commit_root = NULL;
        }
-
        if (chunk_root) {
                free_extent_buffer(info->chunk_root->node);
                free_extent_buffer(info->chunk_root->commit_root);
@@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
         * caller
         */
        device->flush_bio = NULL;
-       bio = bio_alloc(GFP_NOFS, 0);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
        if (!bio)
                return -ENOMEM;
 
@@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
                                         ordered_operations);
 
                list_del_init(&btrfs_inode->ordered_operations);
+               spin_unlock(&root->fs_info->ordered_extent_lock);
 
                btrfs_invalidate_inodes(btrfs_inode->root);
+
+               spin_lock(&root->fs_info->ordered_extent_lock);
        }
 
        spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
                list_del_init(&btrfs_inode->delalloc_inodes);
                clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
                          &btrfs_inode->runtime_flags);
+               spin_unlock(&root->fs_info->delalloc_lock);
 
                btrfs_invalidate_inodes(btrfs_inode->root);
+
+               spin_lock(&root->fs_info->delalloc_lock);
        }
 
        spin_unlock(&root->fs_info->delalloc_lock);
@@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                while (start <= end) {
                        eb = btrfs_find_tree_block(root, start,
                                                   root->leafsize);
-                       start += eb->len;
+                       start += root->leafsize;
                        if (!eb)
                                continue;
                        wait_on_extent_buffer_writeback(eb);
index 2305b5c5cf0012d77de5dde14721ddc1cc0ccc5c..df472ab1b5acca7b411b05bcc414913bcebcc244 100644 (file)
@@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        u32 item_size;
        int ret;
        int err = 0;
-       int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                       node->type == BTRFS_SHARED_BLOCK_REF_KEY);
+       int metadata = !extent_op->is_data;
 
        if (trans->aborted)
                return 0;
@@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        key.objectid = node->bytenr;
 
        if (metadata) {
-               struct btrfs_delayed_tree_ref *tree_ref;
-
-               tree_ref = btrfs_delayed_node_to_tree_ref(node);
                key.type = BTRFS_METADATA_ITEM_KEY;
-               key.offset = tree_ref->level;
+               key.offset = extent_op->level;
        } else {
                key.type = BTRFS_EXTENT_ITEM_KEY;
                key.offset = node->num_bytes;
@@ -2719,7 +2715,7 @@ out:
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
-                               int is_data)
+                               int level, int is_data)
 {
        struct btrfs_delayed_extent_op *extent_op;
        int ret;
@@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->update_flags = 1;
        extent_op->update_key = 0;
        extent_op->is_data = is_data ? 1 : 0;
+       extent_op->level = level;
 
        ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
                                          num_bytes, extent_op);
@@ -3109,6 +3106,11 @@ again:
        WARN_ON(ret);
 
        if (i_size_read(inode) > 0) {
+               ret = btrfs_check_trunc_cache_free_space(root,
+                                       &root->fs_info->global_block_rsv);
+               if (ret)
+                       goto out_put;
+
                ret = btrfs_truncate_free_space_cache(root, trans, path,
                                                      inode);
                if (ret)
@@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
        fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+       if (fs_info->quota_root)
+               fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
 
        update_global_block_rsv(fs_info);
@@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
        int ret;
+       bool global_updated = false;
 
        block_rsv = get_block_rsv(trans, root);
 
-       if (block_rsv->size == 0) {
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
-                                            BTRFS_RESERVE_NO_FLUSH);
-               /*
-                * If we couldn't reserve metadata bytes try and use some from
-                * the global reserve.
-                */
-               if (ret && block_rsv != global_rsv) {
-                       ret = block_rsv_use_bytes(global_rsv, blocksize);
-                       if (!ret)
-                               return global_rsv;
-                       return ERR_PTR(ret);
-               } else if (ret) {
-                       return ERR_PTR(ret);
-               }
+       if (unlikely(block_rsv->size == 0))
+               goto try_reserve;
+again:
+       ret = block_rsv_use_bytes(block_rsv, blocksize);
+       if (!ret)
                return block_rsv;
+
+       if (block_rsv->failfast)
+               return ERR_PTR(ret);
+
+       if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+               global_updated = true;
+               update_global_block_rsv(root->fs_info);
+               goto again;
        }
 
-       ret = block_rsv_use_bytes(block_rsv, blocksize);
+       if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+               static DEFINE_RATELIMIT_STATE(_rs,
+                               DEFAULT_RATELIMIT_INTERVAL * 10,
+                               /*DEFAULT_RATELIMIT_BURST*/ 1);
+               if (__ratelimit(&_rs))
+                       WARN(1, KERN_DEBUG
+                               "btrfs: block rsv returned %d\n", ret);
+       }
+try_reserve:
+       ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                                    BTRFS_RESERVE_NO_FLUSH);
        if (!ret)
                return block_rsv;
-       if (ret && !block_rsv->failfast) {
-               if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
-                       static DEFINE_RATELIMIT_STATE(_rs,
-                                       DEFAULT_RATELIMIT_INTERVAL * 10,
-                                       /*DEFAULT_RATELIMIT_BURST*/ 1);
-                       if (__ratelimit(&_rs))
-                               WARN(1, KERN_DEBUG
-                                       "btrfs: block rsv returned %d\n", ret);
-               }
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
-                                            BTRFS_RESERVE_NO_FLUSH);
-               if (!ret) {
-                       return block_rsv;
-               } else if (ret && block_rsv != global_rsv) {
-                       ret = block_rsv_use_bytes(global_rsv, blocksize);
-                       if (!ret)
-                               return global_rsv;
-               }
+       /*
+        * If we couldn't reserve metadata bytes try and use some from
+        * the global reserve if its space type is the same as the global
+        * reservation.
+        */
+       if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+           block_rsv->space_info == global_rsv->space_info) {
+               ret = block_rsv_use_bytes(global_rsv, blocksize);
+               if (!ret)
+                       return global_rsv;
        }
-
-       return ERR_PTR(-ENOSPC);
+       return ERR_PTR(ret);
 }
 
 static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                        extent_op->update_key = 1;
                extent_op->update_flags = 1;
                extent_op->is_data = 0;
+               extent_op->level = level;
 
                ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
                                        ins.objectid,
@@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
                BUG_ON(ret); /* -ENOMEM */
                ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
-                                                 eb->len, flag, 0);
+                                                 eb->len, flag,
+                                                 btrfs_header_level(eb), 0);
                BUG_ON(ret); /* -ENOMEM */
                wc->flags[level] |= flag;
        }
index 32d67a822e93c0cf53f5c0409bd73ff863bf90bc..e7e7afb4a87268211e8b0ef881a6eeac0068eefd 100644 (file)
@@ -23,6 +23,7 @@
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
 
 #ifdef CONFIG_BTRFS_DEBUG
 static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
                goto free_state_cache;
+
+       btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+                                    offsetof(struct btrfs_io_bio, bio));
+       if (!btrfs_bioset)
+               goto free_buffer_cache;
        return 0;
 
+free_buffer_cache:
+       kmem_cache_destroy(extent_buffer_cache);
+       extent_buffer_cache = NULL;
+
 free_state_cache:
        kmem_cache_destroy(extent_state_cache);
+       extent_state_cache = NULL;
        return -ENOMEM;
 }
 
@@ -145,6 +156,8 @@ void extent_io_exit(void)
                kmem_cache_destroy(extent_state_cache);
        if (extent_buffer_cache)
                kmem_cache_destroy(extent_buffer_cache);
+       if (btrfs_bioset)
+               bioset_free(btrfs_bioset);
 }
 
 void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1947,28 +1960,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
                SetPageUptodate(page);
 }
 
-/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
-       u64 start = page_offset(page);
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
-               unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
-                                struct page *page)
-{
-       end_page_writeback(page);
-}
-
 /*
  * When IO fails, either with EIO or csum verification fails, we
  * try other mirrors that might have a good copy of the data.  This
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
                return 0;
 
-       bio = bio_alloc(GFP_NOFS, 1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio)
                return -EIO;
        bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                return -EIO;
        }
 
-       bio = bio_alloc(GFP_NOFS, 1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio) {
                free_io_failure(inode, failrec, 0);
                return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-       int whole_page;
 
        do {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-               start = page_offset(page) + bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
+               /* We always issue full-page reads, but if some block
+                * in a page fails to read, blk_update_request() will
+                * advance bv_offset and adjust bv_len to compensate.
+                * Print a warning for nonzero offsets, and an error
+                * if they don't add up to a full page.  */
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                       printk("%s page write in btrfs with offset %u and length %u\n",
+                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                              bvec->bv_offset, bvec->bv_len);
 
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
+               start = page_offset(page);
+               end = start + bvec->bv_offset + bvec->bv_len - 1;
 
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                if (end_extent_writepage(page, err, start, end))
                        continue;
 
-               if (whole_page)
-                       end_page_writeback(page);
-               else
-                       check_page_writeback(tree, page);
+               end_page_writeback(page);
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-       int whole_page;
        int mirror;
        int ret;
 
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                struct page *page = bvec->bv_page;
                struct extent_state *cached = NULL;
                struct extent_state *state;
+               struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                        "mirror=%ld\n", (u64)bio->bi_sector, err,
-                        (long int)bio->bi_bdev);
+                        "mirror=%lu\n", (u64)bio->bi_sector, err,
+                        io_bio->mirror_num);
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-               start = page_offset(page) + bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
+               /* We always issue full-page reads, but if some block
+                * in a page fails to read, blk_update_request() will
+                * advance bv_offset and adjust bv_len to compensate.
+                * Print a warning for nonzero offsets, and an error
+                * if they don't add up to a full page.  */
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                       printk("%s page read in btrfs with offset %u and length %u\n",
+                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                              bvec->bv_offset, bvec->bv_len);
 
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
+               start = page_offset(page);
+               end = start + bvec->bv_offset + bvec->bv_len - 1;
 
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                spin_unlock(&tree->lock);
 
-               mirror = (int)(unsigned long)bio->bi_bdev;
+               mirror = io_bio->mirror_num;
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                              state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
-               if (whole_page) {
-                       if (uptodate) {
-                               SetPageUptodate(page);
-                       } else {
-                               ClearPageUptodate(page);
-                               SetPageError(page);
-                       }
-                       unlock_page(page);
+               if (uptodate) {
+                       SetPageUptodate(page);
                } else {
-                       if (uptodate) {
-                               check_page_uptodate(tree, page);
-                       } else {
-                               ClearPageUptodate(page);
-                               SetPageError(page);
-                       }
-                       check_page_locked(tree, page);
+                       ClearPageUptodate(page);
+                       SetPageError(page);
                }
+               unlock_page(page);
        } while (bvec <= bvec_end);
 
        bio_put(bio);
 }
 
+/*
+ * this allocates from the btrfs_bioset.  We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
 {
        struct bio *bio;
 
-       bio = bio_alloc(gfp_flags, nr_vecs);
+       bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
 
        if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-               while (!bio && (nr_vecs /= 2))
-                       bio = bio_alloc(gfp_flags, nr_vecs);
+               while (!bio && (nr_vecs /= 2)) {
+                       bio = bio_alloc_bioset(gfp_flags,
+                                              nr_vecs, btrfs_bioset);
+               }
        }
 
        if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
        return bio;
 }
 
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+       return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+       return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
 static int __must_check submit_one_bio(int rw, struct bio *bio,
                                       int mirror_num, unsigned long bio_flags)
 {
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                last_for_get_extent = isize;
        }
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
                         &cached_state);
 
        em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 out_free:
        free_extent_map(em);
 out:
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                             &cached_state, GFP_NOFS);
        return ret;
 }
index a2c03a17500971f131b8a43051b1269e86bfc0af..41fb81e7ec53c3fda80caf28a039af7b4c1a0682 100644 (file)
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags);
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
 
 struct btrfs_fs_info;
 
index ecca6c7375a610dbee385a71781cece9082935e0..e53009657f0e5b91b638f3d31d5e4d2cbf919f62 100644 (file)
@@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
                                         block_group->key.objectid);
 }
 
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
-                                   struct btrfs_trans_handle *trans,
-                                   struct btrfs_path *path,
-                                   struct inode *inode)
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+                                      struct btrfs_block_rsv *rsv)
 {
-       struct btrfs_block_rsv *rsv;
        u64 needed_bytes;
-       loff_t oldsize;
-       int ret = 0;
-
-       rsv = trans->block_rsv;
-       trans->block_rsv = &root->fs_info->global_block_rsv;
+       int ret;
 
        /* 1 for slack space, 1 for updating the inode */
        needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
                btrfs_calc_trans_metadata_size(root, 1);
 
-       spin_lock(&trans->block_rsv->lock);
-       if (trans->block_rsv->reserved < needed_bytes) {
-               spin_unlock(&trans->block_rsv->lock);
-               trans->block_rsv = rsv;
-               return -ENOSPC;
-       }
-       spin_unlock(&trans->block_rsv->lock);
+       spin_lock(&rsv->lock);
+       if (rsv->reserved < needed_bytes)
+               ret = -ENOSPC;
+       else
+               ret = 0;
+       spin_unlock(&rsv->lock);
+       return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                   struct btrfs_trans_handle *trans,
+                                   struct btrfs_path *path,
+                                   struct inode *inode)
+{
+       loff_t oldsize;
+       int ret = 0;
 
        oldsize = i_size_read(inode);
        btrfs_i_size_write(inode, 0);
@@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
         */
        ret = btrfs_truncate_inode_items(trans, root, inode,
                                         0, BTRFS_EXTENT_DATA_KEY);
-
        if (ret) {
-               trans->block_rsv = rsv;
                btrfs_abort_transaction(trans, root, ret);
                return ret;
        }
@@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
        ret = btrfs_update_inode(trans, root, inode);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
-       trans->block_rsv = rsv;
 
        return ret;
 }
@@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        /* Make sure we can fit our crcs into the first page */
        if (io_ctl.check_crcs &&
-           (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
-               WARN_ON(1);
+           (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
                goto out_nospc;
-       }
 
        io_ctl_set_generation(&io_ctl, trans->transid);
 
index 4dc17d8809c76d25cddb635e29bd8309b340f464..8b7f19f4496153886975547923cdcdc815940e96 100644 (file)
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path);
 
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+                                      struct btrfs_block_rsv *rsv);
 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                    struct btrfs_trans_handle *trans,
                                    struct btrfs_path *path,
index d26f67a59e36f9caf3e91832fa1d4c943c8c9040..2c66ddbbe670e0ab80753dc41eb4054aacd00388 100644 (file)
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
        num_bytes = trans->bytes_reserved;
        /*
         * 1 item for inode item insertion if need
-        * 3 items for inode item update (in the worst case)
+        * 4 items for inode item update (in the worst case)
+        * 1 items for slack space if we need do truncation
         * 1 item for free space object
         * 3 items for pre-allocation
         */
-       trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
+       trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
        ret = btrfs_block_rsv_add(root, trans->block_rsv,
                                  trans->bytes_reserved,
                                  BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
        if (i_size_read(inode) > 0) {
                ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
                if (ret) {
-                       btrfs_abort_transaction(trans, root, ret);
+                       if (ret != -ENOSPC)
+                               btrfs_abort_transaction(trans, root, ret);
                        goto out_put;
                }
        }
index 9b31b3b091fceb6536612b30e64f2fd191d47e89..af978f7682b3441282b0f54e62a60b7534d51203 100644 (file)
@@ -715,8 +715,10 @@ retry:
                                        async_extent->ram_size - 1, 0);
 
                em = alloc_extent_map();
-               if (!em)
+               if (!em) {
+                       ret = -ENOMEM;
                        goto out_free_reserve;
+               }
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
@@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                }
 
                em = alloc_extent_map();
-               if (!em)
+               if (!em) {
+                       ret = -ENOMEM;
                        goto out_reserve;
+               }
                em->start = start;
                em->orig_start = em->start;
                ram_size = ins.offset;
@@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
 no_delete:
+       btrfs_remove_delayed_node(inode);
        clear_inode(inode);
        return;
 }
@@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
        struct rb_node **p;
        struct rb_node *parent;
        u64 ino = btrfs_ino(inode);
-again:
-       p = &root->inode_tree.rb_node;
-       parent = NULL;
 
        if (inode_unhashed(inode))
                return;
-
+again:
+       parent = NULL;
        spin_lock(&root->inode_lock);
+       p = &root->inode_tree.rb_node;
        while (*p) {
                parent = *p;
                entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -6928,7 +6932,11 @@ struct btrfs_dio_private {
        /* IO errors */
        int errors;
 
+       /* orig_bio is our btrfs_io_bio */
        struct bio *orig_bio;
+
+       /* dio_bio came from fs/direct-io.c */
+       struct bio *dio_bio;
 };
 
 static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
        struct bio_vec *bvec = bio->bi_io_vec;
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct bio *dio_bio;
        u64 start;
 
        start = dip->logical_offset;
@@ -6977,14 +6986,15 @@ failed:
 
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
                      dip->logical_offset + dip->bytes - 1);
-       bio->bi_private = dip->private;
+       dio_bio = dip->dio_bio;
 
        kfree(dip);
 
        /* If we had a csum failure make sure to clear the uptodate flag */
        if (err)
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       dio_end_io(bio, err);
+               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+       dio_end_io(dio_bio, err);
+       bio_put(bio);
 }
 
 static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
        struct btrfs_ordered_extent *ordered = NULL;
        u64 ordered_offset = dip->logical_offset;
        u64 ordered_bytes = dip->bytes;
+       struct bio *dio_bio;
        int ret;
 
        if (err)
@@ -7022,14 +7033,15 @@ out_test:
                goto again;
        }
 out_done:
-       bio->bi_private = dip->private;
+       dio_bio = dip->dio_bio;
 
        kfree(dip);
 
        /* If we had an error make sure to clear the uptodate flag */
        if (err)
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       dio_end_io(bio, err);
+               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+       dio_end_io(dio_bio, err);
+       bio_put(bio);
 }
 
 static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
        if (!atomic_dec_and_test(&dip->pending_bios))
                goto out;
 
-       if (dip->errors)
+       if (dip->errors) {
                bio_io_error(dip->orig_bio);
-       else {
-               set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+       else {
+               set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
                bio_endio(dip->orig_bio, 0);
        }
 out:
@@ -7243,25 +7255,34 @@ out_err:
        return 0;
 }
 
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
-                               loff_t file_offset)
+static void btrfs_submit_direct(int rw, struct bio *dio_bio,
+                               struct inode *inode, loff_t file_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_dio_private *dip;
-       struct bio_vec *bvec = bio->bi_io_vec;
+       struct bio_vec *bvec = dio_bio->bi_io_vec;
+       struct bio *io_bio;
        int skip_sum;
        int write = rw & REQ_WRITE;
        int ret = 0;
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
+       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+
+       if (!io_bio) {
+               ret = -ENOMEM;
+               goto free_ordered;
+       }
+
        dip = kmalloc(sizeof(*dip), GFP_NOFS);
        if (!dip) {
                ret = -ENOMEM;
-               goto free_ordered;
+               goto free_io_bio;
        }
 
-       dip->private = bio->bi_private;
+       dip->private = dio_bio->bi_private;
+       io_bio->bi_private = dio_bio->bi_private;
        dip->inode = inode;
        dip->logical_offset = file_offset;
 
@@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        do {
                dip->bytes += bvec->bv_len;
                bvec++;
-       } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+       } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
 
-       dip->disk_bytenr = (u64)bio->bi_sector << 9;
-       bio->bi_private = dip;
+       dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+       io_bio->bi_private = dip;
        dip->errors = 0;
-       dip->orig_bio = bio;
+       dip->orig_bio = io_bio;
+       dip->dio_bio = dio_bio;
        atomic_set(&dip->pending_bios, 0);
 
        if (write)
-               bio->bi_end_io = btrfs_endio_direct_write;
+               io_bio->bi_end_io = btrfs_endio_direct_write;
        else
-               bio->bi_end_io = btrfs_endio_direct_read;
+               io_bio->bi_end_io = btrfs_endio_direct_read;
 
        ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
        if (!ret)
                return;
+
+free_io_bio:
+       bio_put(io_bio);
+
 free_ordered:
        /*
         * If this is a write, we need to clean up the reserved space and kill
@@ -7300,7 +7326,7 @@ free_ordered:
                btrfs_put_ordered_extent(ordered);
                btrfs_put_ordered_extent(ordered);
        }
-       bio_endio(bio, ret);
+       bio_endio(dio_bio, ret);
 }
 
 static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)
        inode_tree_del(inode);
        btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
-       btrfs_remove_delayed_node(inode);
        call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
 
index 0de4a2fcfb24f1cdd2e8ae9574405bd4ce247526..0f81d67cdc8da651890df1a89c01df9188667d97 100644 (file)
@@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                item_off = btrfs_item_ptr_offset(leaf, i);
                item_len = btrfs_item_size_nr(leaf, i);
 
-               if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+               btrfs_item_key_to_cpu(leaf, key, i);
+               if (!key_in_sk(key, sk))
+                       continue;
+
+               if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
                        item_len = 0;
 
                if (sizeof(sh) + item_len + *sk_offset >
@@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                        goto overflow;
                }
 
-               btrfs_item_key_to_cpu(leaf, key, i);
-               if (!key_in_sk(key, sk))
-                       continue;
-
                sh.objectid = key->objectid;
                sh.offset = key->offset;
                sh.type = key->type;
index 0740621daf6ca370498e78688922347c54e172df..0525e1389f5b16658ccea028da6408da812c974b 100644 (file)
@@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
        }
 
        /* put a new bio on the list */
-       bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
        if (!bio)
                return -ENOMEM;
 
index 704a1b8d2a2bae870fcf678d27aa4601f6745011..395b82031a4286f2a8a1481bc87a684172468b96 100644 (file)
@@ -1773,7 +1773,7 @@ again:
                        if (!eb || !extent_buffer_uptodate(eb)) {
                                ret = (!eb) ? -ENOMEM : -EIO;
                                free_extent_buffer(eb);
-                               return ret;
+                               break;
                        }
                        btrfs_tree_lock(eb);
                        if (cow) {
@@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
        }
 
 truncate:
+       ret = btrfs_check_trunc_cache_free_space(root,
+                                                &fs_info->global_block_rsv);
+       if (ret)
+               goto out;
+
        path = btrfs_alloc_path();
        if (!path) {
                ret = -ENOMEM;
index f489e24659a434fa2b67984a5a8a7cf9df2b06b6..79bd479317cb53cbea30a7021d81a891ed9ae20c 100644 (file)
@@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                }
 
                WARN_ON(!page->page);
-               bio = bio_alloc(GFP_NOFS, 1);
+               bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
                if (!bio) {
                        page->io_error = 1;
                        sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
                        return -EIO;
                }
 
-               bio = bio_alloc(GFP_NOFS, 1);
+               bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
                if (!bio)
                        return -EIO;
                bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@ again:
                sbio->dev = wr_ctx->tgtdev;
                bio = sbio->bio;
                if (!bio) {
-                       bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+                       bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
                        if (!bio) {
                                mutex_unlock(&wr_ctx->wr_lock);
                                return -ENOMEM;
@@ -1930,7 +1930,7 @@ again:
                sbio->dev = spage->dev;
                bio = sbio->bio;
                if (!bio) {
-                       bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+                       bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
                        if (!bio)
                                return -ENOMEM;
                        sbio->bio = bio;
@@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
                        "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
                return -EIO;
        }
-       bio = bio_alloc(GFP_NOFS, 1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio) {
                spin_lock(&sctx->stat_lock);
                sctx->stat.malloc_errors++;
index a4807ced23cc5791d500d28ee9f91fdc915f5df0..f0857e092a3cb1af485604850052440579a1bbf1 100644 (file)
@@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 
                btrfs_dev_replace_suspend_for_unmount(fs_info);
                btrfs_scrub_cancel(fs_info);
+               btrfs_pause_balance(fs_info);
 
                ret = btrfs_commit_super(root);
                if (ret)
index 0e925ced971ba87bc0e356c6a2c7e3dbf1b5e91e..8bffb9174afba04d8375b96f754256b68ff9b4ef 100644 (file)
@@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
        if (num_devices == 1)
                allowed |= BTRFS_BLOCK_GROUP_DUP;
-       else if (num_devices < 4)
+       else if (num_devices > 1)
                allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
-       else
-               allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
-                               BTRFS_BLOCK_GROUP_RAID10 |
-                               BTRFS_BLOCK_GROUP_RAID5 |
-                               BTRFS_BLOCK_GROUP_RAID6);
-
+       if (num_devices > 2)
+               allowed |= BTRFS_BLOCK_GROUP_RAID5;
+       if (num_devices > 3)
+               allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
+                           BTRFS_BLOCK_GROUP_RAID6);
        if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (!alloc_profile_is_valid(bctl->data.target, 1) ||
             (bctl->data.target & ~allowed))) {
@@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        return 0;
 }
 
-static void *merge_stripe_index_into_bio_private(void *bi_private,
-                                                unsigned int stripe_index)
-{
-       /*
-        * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
-        * at most 1.
-        * The alternative solution (instead of stealing bits from the
-        * pointer) would be to allocate an intermediate structure
-        * that contains the old private pointer plus the stripe_index.
-        */
-       BUG_ON((((uintptr_t)bi_private) & 3) != 0);
-       BUG_ON(stripe_index > 3);
-       return (void *)(((uintptr_t)bi_private) | stripe_index);
-}
-
-static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
-{
-       return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
-}
-
-static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
-{
-       return (unsigned int)((uintptr_t)bi_private) & 3;
-}
-
 static void btrfs_end_bio(struct bio *bio, int err)
 {
-       struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
+       struct btrfs_bio *bbio = bio->bi_private;
        int is_orig_bio = 0;
 
        if (err) {
                atomic_inc(&bbio->error);
                if (err == -EIO || err == -EREMOTEIO) {
                        unsigned int stripe_index =
-                               extract_stripe_index_from_bio_private(
-                                       bio->bi_private);
+                               btrfs_io_bio(bio)->stripe_index;
                        struct btrfs_device *dev;
 
                        BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
                }
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
-               bio->bi_bdev = (struct block_device *)
-                                       (unsigned long)bbio->mirror_num;
+               btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the btrfs bio
                 */
@@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
        struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
 
        bio->bi_private = bbio;
-       bio->bi_private = merge_stripe_index_into_bio_private(
-                       bio->bi_private, (unsigned int)dev_nr);
+       btrfs_io_bio(bio)->stripe_index = dev_nr;
        bio->bi_end_io = btrfs_end_bio;
        bio->bi_sector = physical >> 9;
 #ifdef DEBUG
@@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
        if (atomic_dec_and_test(&bbio->stripes_pending)) {
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
-               bio->bi_bdev = (struct block_device *)
-                       (unsigned long)bbio->mirror_num;
+               btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                bio->bi_sector = logical >> 9;
                kfree(bbio);
                bio_endio(bio, -EIO);
@@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
 
                if (dev_nr < total_devs - 1) {
-                       bio = bio_clone(first_bio, GFP_NOFS);
+                       bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                        BUG_ON(!bio); /* -ENOMEM */
                } else {
                        bio = first_bio;
index 845ccbb0d2e35e9ad5b92101102b7e853d800ab0..f6247e2a47f7b643d8c88f2ee4c5b53f5c79f043 100644 (file)
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
        int rotating;
 };
 
+/*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+ * Really, what we need is a btrfs_bio structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet.  We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios.  We'll cram as much of
+ * struct btrfs_bio as we can into this over time.
+ */
+struct btrfs_io_bio {
+       unsigned long mirror_num;
+       unsigned long stripe_index;
+       struct bio bio;
+};
+
+static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
+{
+       return container_of(bio, struct btrfs_io_bio, bio);
+}
+
 struct btrfs_bio_stripe {
        struct btrfs_device *dev;
        u64 physical;