Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
diff --combined fs/btrfs/dev-replace.c

index 5f8f3341c099ecd088226726ac8981908868ac00,af800ef677a0d334b0ec84fbae4e0c669e91780d..a64435359385e86a483f30696c932da2c8d5bdb0
--- 1/fs/btrfs/dev-replace.c
--- 2/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@@ -148,13 -148,13 +148,13 @@@ no_valid_dev_replace_entry_found
                     !btrfs_test_opt(dev_root, DEGRADED)) {
                         ret = -EIO;
                         pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
-                               (unsigned long long)src_devid);
+                               src_devid);
                 }
                 if (!dev_replace->tgtdev &&
                     !btrfs_test_opt(dev_root, DEGRADED)) {
                         ret = -EIO;
                         pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
-                               (unsigned long long)BTRFS_DEV_REPLACE_DEVID);
+                               BTRFS_DEV_REPLACE_DEVID);
                 }
                 if (dev_replace->tgtdev) {
                         if (dev_replace->srcdev) {
@@@ -747,7 -747,7 +747,7 @@@ int btrfs_resume_dev_replace_async(stru
         WARN_ON(atomic_xchg(
                 &fs_info->mutually_exclusive_operation_running, 1));
         task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
- -      return PTR_RET(task);
+ +      return PTR_ERR_OR_ZERO(task);
   }
   
   static int btrfs_dev_replace_kthread(void *data)
diff --combined fs/btrfs/file.c

index 4d2eb6417145964c8731bf5e56673a0e816d59aa,5ba87b0d2ef86d1563705df7ae62f48e59b7cbae..bc5072b2db537f0f27af1851532b7417b41a8489
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -1334,7 -1334,6 +1334,6 @@@ fail
   static noinline int check_can_nocow(struct inode *inode, loff_t pos,
                                     size_t *write_bytes)
   {
-       struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_ordered_extent *ordered;
         u64 lockstart, lockend;
@@@ -1356,16 -1355,8 +1355,8 @@@
                 btrfs_put_ordered_extent(ordered);
         }
   
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-               return PTR_ERR(trans);
-       }
- 
         num_bytes = lockend - lockstart + 1;
-       ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
-                              NULL);
-       btrfs_end_transaction(trans, root);
+       ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
         if (ret <= 0) {
                 ret = 0;
         } else {
@@@ -1727,7 -1718,7 +1718,7 @@@ static ssize_t btrfs_file_aio_write(str
          */
         BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
         BTRFS_I(inode)->last_sub_trans = root->log_transid;
- -      if (num_written > 0 || num_written == -EIOCBQUEUED) {
+ +      if (num_written > 0) {
                 err = generic_write_sync(file, pos, num_written);
                 if (err < 0 && num_written > 0)
                         num_written = err;
diff --combined fs/btrfs/inode.c

index 7bdc83d04d54ca36006ae54e221092f2d789ef0c,6091ba9d249419765da7cc1ea692c4e9fab21de0..db1e4394857906e55000e9b0f85714a67e59260a
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -230,12 -230,13 +230,13 @@@ fail
    * does the checks required to make sure the data is small enough
    * to fit as an inline extent.
    */
- static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                struct inode *inode, u64 start, u64 end,
-                                size_t compressed_size, int compress_type,
-                                struct page **compressed_pages)
+ static noinline int cow_file_range_inline(struct btrfs_root *root,
+                                         struct inode *inode, u64 start,
+                                         u64 end, size_t compressed_size,
+                                         int compress_type,
+                                         struct page **compressed_pages)
   {
+       struct btrfs_trans_handle *trans;
         u64 isize = i_size_read(inode);
         u64 actual_end = min(end + 1, isize);
         u64 inline_len = actual_end - start;
@@@ -256,9 -257,16 +257,16 @@@
                 return 1;
         }
   
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ 
         ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
-       if (ret)
-               return ret;
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out;
+       }
   
         if (isize > actual_end)
                 inline_len = min_t(u64, isize, actual_end);
@@@ -267,15 -275,18 +275,18 @@@
                                    compress_type, compressed_pages);
         if (ret && ret != -ENOSPC) {
                 btrfs_abort_transaction(trans, root, ret);
-               return ret;
+               goto out;
         } else if (ret == -ENOSPC) {
-               return 1;
+               ret = 1;
+               goto out;
         }
   
         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
         btrfs_delalloc_release_metadata(inode, end + 1 - start);
         btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
-       return 0;
+ out:
+       btrfs_end_transaction(trans, root);
+       return ret;
   }
   
   struct async_extent {
@@@ -343,7 -354,6 +354,6 @@@ static noinline int compress_file_range
                                         int *num_added)
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
         u64 num_bytes;
         u64 blocksize = root->sectorsize;
         u64 actual_end;
@@@ -461,45 -471,36 +471,36 @@@ again
         }
   cont:
         if (start == 0) {
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-                       trans = NULL;
-                       goto cleanup_and_out;
-               }
-               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- 
                 /* lets try to make an inline extent */
                 if (ret || total_in < (actual_end - start)) {
                         /* we didn't compress the entire range, try
                          * to make an uncompressed inline extent.
                          */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end, 0, 0, NULL);
+                       ret = cow_file_range_inline(root, inode, start, end,
+                                                   0, 0, NULL);
                 } else {
                         /* try making a compressed inline extent */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end,
+                       ret = cow_file_range_inline(root, inode, start, end,
                                                     total_compressed,
                                                     compress_type, pages);
                 }
                 if (ret <= 0) {
+                       unsigned long clear_flags = EXTENT_DELALLOC |
+                               EXTENT_DEFRAG;
+                       clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
+ 
                         /*
                          * inline extent creation worked or returned error,
                          * we don't need to create any more async work items.
                          * Unlock and free up our temp pages.
                          */
-                       extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, NULL,
-                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
- 
-                       btrfs_end_transaction(trans, root);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                                    clear_flags, PAGE_UNLOCK |
+                                                    PAGE_CLEAR_DIRTY |
+                                                    PAGE_SET_WRITEBACK |
+                                                    PAGE_END_WRITEBACK);
                         goto free_pages_out;
                 }
-               btrfs_end_transaction(trans, root);
         }
   
         if (will_compress) {
@@@ -590,20 -591,6 +591,6 @@@ free_pages_out
         kfree(pages);
   
         goto out;
- 
- cleanup_and_out:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
-       if (!trans || IS_ERR(trans))
-               btrfs_error(root->fs_info, ret, "Failed to join transaction");
-       else
-               btrfs_abort_transaction(trans, root, ret);
-       goto free_pages_out;
   }
   
   /*
@@@ -617,7 -604,6 +604,6 @@@ static noinline int submit_compressed_e
   {
         struct async_extent *async_extent;
         u64 alloc_hint = 0;
-       struct btrfs_trans_handle *trans;
         struct btrfs_key ins;
         struct extent_map *em;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@@ -678,20 -664,10 +664,10 @@@ retry
                 lock_extent(io_tree, async_extent->start,
                             async_extent->start + async_extent->ram_size - 1);
   
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-               } else {
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_reserve_extent(trans, root,
+               ret = btrfs_reserve_extent(root,
                                            async_extent->compressed_size,
                                            async_extent->compressed_size,
                                            0, alloc_hint, &ins, 1);
-                       if (ret && ret != -ENOSPC)
-                               btrfs_abort_transaction(trans, root, ret);
-                       btrfs_end_transaction(trans, root);
-               }
- 
                 if (ret) {
                         int i;
   
@@@ -770,16 -746,12 +746,12 @@@
                 /*
                  * clear dirty, set writeback and unlock the pages.
                  */
-               extent_clear_unlock_delalloc(inode,
-                               &BTRFS_I(inode)->io_tree,
-                               async_extent->start,
+               extent_clear_unlock_delalloc(inode, async_extent->start,
                                 async_extent->start +
                                 async_extent->ram_size - 1,
-                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK |
-                               EXTENT_CLEAR_DELALLOC |
-                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
- 
+                               NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
+                               PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                               PAGE_SET_WRITEBACK);
                 ret = btrfs_submit_compressed_write(inode,
                                     async_extent->start,
                                     async_extent->ram_size,
@@@ -798,16 -770,13 +770,13 @@@ out
   out_free_reserve:
         btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
   out_free:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    async_extent->start,
+       extent_clear_unlock_delalloc(inode, async_extent->start,
                                      async_extent->start +
                                      async_extent->ram_size - 1,
-                                    NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                                    NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
         kfree(async_extent);
         goto again;
   }
@@@ -857,14 -826,13 +826,13 @@@ static u64 get_extent_allocation_hint(s
    * required to start IO on it.  It may be clean and already done with
    * IO when we return.
    */
- static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
-                                    struct inode *inode,
-                                    struct btrfs_root *root,
-                                    struct page *locked_page,
-                                    u64 start, u64 end, int *page_started,
-                                    unsigned long *nr_written,
-                                    int unlock)
+ static noinline int cow_file_range(struct inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+                                  unsigned long *nr_written,
+                                  int unlock)
   {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 alloc_hint = 0;
         u64 num_bytes;
         unsigned long ram_size;
@@@ -885,29 -853,24 +853,24 @@@
         /* if this is a small write inside eof, kick off defrag */
         if (num_bytes < 64 * 1024 &&
             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
-               btrfs_add_inode_defrag(trans, inode);
+               btrfs_add_inode_defrag(NULL, inode);
   
         if (start == 0) {
                 /* lets try to make an inline extent */
-               ret = cow_file_range_inline(trans, root, inode,
-                                           start, end, 0, 0, NULL);
+               ret = cow_file_range_inline(root, inode, start, end, 0, 0,
+                                           NULL);
                 if (ret == 0) {
-                       extent_clear_unlock_delalloc(inode,
-                                    &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                    EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG, PAGE_UNLOCK |
+                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+                                    PAGE_END_WRITEBACK);
   
                         *nr_written = *nr_written +
                              (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                         *page_started = 1;
                         goto out;
                 } else if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
                         goto out_unlock;
                 }
         }
@@@ -922,13 -885,11 +885,11 @@@
                 unsigned long op;
   
                 cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size,
                                            root->sectorsize, 0, alloc_hint,
                                            &ins, 1);
-               if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
+               if (ret < 0)
                         goto out_unlock;
-               }
   
                 em = alloc_extent_map();
                 if (!em) {
@@@ -974,10 -935,8 +935,8 @@@
                     BTRFS_DATA_RELOC_TREE_OBJECTID) {
                         ret = btrfs_reloc_clone_csums(inode, start,
                                                       cur_alloc_size);
-                       if (ret) {
-                               btrfs_abort_transaction(trans, root, ret);
+                       if (ret)
                                 goto out_reserve;
-                       }
                 }
   
                 if (disk_num_bytes < cur_alloc_size)
@@@ -990,13 -949,13 +949,13 @@@
                  * Do set the Private2 bit so we know this page was properly
                  * setup for writepage
                  */
-               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
-               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                       EXTENT_SET_PRIVATE2;
+               op = unlock ? PAGE_UNLOCK : 0;
+               op |= PAGE_SET_PRIVATE2;
   
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                            start, start + ram_size - 1,
-                                            locked_page, op);
+               extent_clear_unlock_delalloc(inode, start,
+                                            start + ram_size - 1, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC,
+                                            op);
                 disk_num_bytes -= cur_alloc_size;
                 num_bytes -= cur_alloc_size;
                 alloc_hint = ins.objectid + ins.offset;
@@@ -1008,52 -967,14 +967,14 @@@ out
   out_reserve:
         btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
   out_unlock:
-       extent_clear_unlock_delalloc(inode,
-                    &BTRFS_I(inode)->io_tree,
-                    start, end, locked_page,
-                    EXTENT_CLEAR_UNLOCK_PAGE |
-                    EXTENT_CLEAR_UNLOCK |
-                    EXTENT_CLEAR_DELALLOC |
-                    EXTENT_CLEAR_DIRTY |
-                    EXTENT_SET_WRITEBACK |
-                    EXTENT_END_WRITEBACK);
- 
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                    EXTENT_DELALLOC | EXTENT_DEFRAG,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
         goto out;
   }
   
- static noinline int cow_file_range(struct inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written,
-                                  int unlock)
- {
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
- 
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-               return PTR_ERR(trans);
-       }
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- 
-       ret = __cow_file_range(trans, inode, root, locked_page, start, end,
-                              page_started, nr_written, unlock);
- 
-       btrfs_end_transaction(trans, root);
- 
-       return ret;
- }
- 
   /*
    * work queue call back to started compression on a file and pages
    */
@@@ -1221,15 -1142,13 +1142,13 @@@ static noinline int run_delalloc_nocow(
   
         path = btrfs_alloc_path();
         if (!path) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                 return -ENOMEM;
         }
   
@@@ -1241,15 -1160,13 +1160,13 @@@
                 trans = btrfs_join_transaction(root);
   
         if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                 btrfs_free_path(path);
                 return PTR_ERR(trans);
         }
@@@ -1369,9 -1286,9 +1286,9 @@@ out_check
   
                 btrfs_release_path(path);
                 if (cow_start != (u64)-1) {
-                       ret = __cow_file_range(trans, inode, root, locked_page,
-                                              cow_start, found_key.offset - 1,
-                                              page_started, nr_written, 1);
+                       ret = cow_file_range(inode, locked_page,
+                                            cow_start, found_key.offset - 1,
+                                            page_started, nr_written, 1);
                         if (ret) {
                                 btrfs_abort_transaction(trans, root, ret);
                                 goto error;
@@@ -1428,11 -1345,11 +1345,11 @@@
                         }
                 }
   
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                               cur_offset, cur_offset + num_bytes - 1,
-                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                               EXTENT_SET_PRIVATE2);
+               extent_clear_unlock_delalloc(inode, cur_offset,
+                                            cur_offset + num_bytes - 1,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC, PAGE_UNLOCK |
+                                            PAGE_SET_PRIVATE2);
                 cur_offset = extent_end;
                 if (cur_offset > end)
                         break;
@@@ -1445,9 -1362,8 +1362,8 @@@
         }
   
         if (cow_start != (u64)-1) {
-               ret = __cow_file_range(trans, inode, root, locked_page,
-                                      cow_start, end,
-                                      page_started, nr_written, 1);
+               ret = cow_file_range(inode, locked_page, cow_start, end,
+                                    page_started, nr_written, 1);
                 if (ret) {
                         btrfs_abort_transaction(trans, root, ret);
                         goto error;
@@@ -1460,16 -1376,13 +1376,13 @@@ error
                 ret = err;
   
         if (ret && cur_offset < end)
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            cur_offset, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
- 
+               extent_clear_unlock_delalloc(inode, cur_offset, end,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC | EXTENT_DEFRAG |
+                                            EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
         btrfs_free_path(path);
         return ret;
   }
@@@ -2132,6 -2045,7 +2045,7 @@@ static noinline int record_one_backref(
                 WARN_ON(1);
                 return ret;
         }
+       ret = 0;
   
         while (1) {
                 cond_resched();
@@@ -2181,8 -2095,6 +2095,6 @@@
                     old->len || extent_offset + num_bytes <=
                     old->extent_offset + old->offset)
                         continue;
- 
-               ret = 0;
                 break;
         }
   
@@@ -2238,16 -2150,18 +2150,18 @@@ static noinline bool record_extent_back
   
   static int relink_is_mergable(struct extent_buffer *leaf,
                               struct btrfs_file_extent_item *fi,
-                             u64 disk_bytenr)
+                             struct new_sa_defrag_extent *new)
   {
-       if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+       if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
                 return 0;
   
         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
                 return 0;
   
-       if (btrfs_file_extent_compression(leaf, fi) ||
-           btrfs_file_extent_encryption(leaf, fi) ||
+       if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
+               return 0;
+ 
+       if (btrfs_file_extent_encryption(leaf, fi) ||
             btrfs_file_extent_other_encoding(leaf, fi))
                 return 0;
   
@@@ -2391,8 -2305,8 +2305,8 @@@ again
                                     struct btrfs_file_extent_item);
                 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
   
-               if (relink_is_mergable(leaf, fi, new->bytenr) &&
-                   extent_len + found_key.offset == start) {
+               if (extent_len + found_key.offset == start &&
+                   relink_is_mergable(leaf, fi, new)) {
                         btrfs_set_file_extent_num_bytes(leaf, fi,
                                                         extent_len + len);
                         btrfs_mark_buffer_dirty(leaf);
@@@ -2648,8 -2562,10 +2562,10 @@@ static int btrfs_finish_ordered_io(stru
         struct extent_state *cached_state = NULL;
         struct new_sa_defrag_extent *new = NULL;
         int compress_type = 0;
-       int ret;
+       int ret = 0;
+       u64 logical_len = ordered_extent->len;
         bool nolock;
+       bool truncated = false;
   
         nolock = btrfs_is_free_space_inode(inode);
   
@@@ -2658,6 -2574,14 +2574,14 @@@
                 goto out;
         }
   
+       if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+               truncated = true;
+               logical_len = ordered_extent->truncated_len;
+               /* Truncated the entire extent, don't bother adding */
+               if (!logical_len)
+                       goto out;
+       }
+ 
         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
                 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@@ -2713,15 -2637,14 +2637,14 @@@
                 ret = btrfs_mark_extent_written(trans, inode,
                                                 ordered_extent->file_offset,
                                                 ordered_extent->file_offset +
-                                               ordered_extent->len);
+                                               logical_len);
         } else {
                 BUG_ON(root == root->fs_info->tree_root);
                 ret = insert_reserved_file_extent(trans, inode,
                                                 ordered_extent->file_offset,
                                                 ordered_extent->start,
                                                 ordered_extent->disk_len,
-                                               ordered_extent->len,
-                                               ordered_extent->len,
+                                               logical_len, logical_len,
                                                 compress_type, 0, 0,
                                                 BTRFS_FILE_EXTENT_REG);
         }
@@@ -2753,17 -2676,27 +2676,27 @@@ out
         if (trans)
                 btrfs_end_transaction(trans, root);
   
-       if (ret) {
-               clear_extent_uptodate(io_tree, ordered_extent->file_offset,
-                                     ordered_extent->file_offset +
-                                     ordered_extent->len - 1, NULL, GFP_NOFS);
+       if (ret || truncated) {
+               u64 start, end;
+ 
+               if (truncated)
+                       start = ordered_extent->file_offset + logical_len;
+               else
+                       start = ordered_extent->file_offset;
+               end = ordered_extent->file_offset + ordered_extent->len - 1;
+               clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
+ 
+               /* Drop the cache for the part of the extent we didn't write. */
+               btrfs_drop_extent_cache(inode, start, end, 0);
   
                 /*
                  * If the ordered extent had an IOERR or something else went
                  * wrong we need to return the space for this ordered extent
-                * back to the allocator.
+                * back to the allocator.  We only free the extent in the
+                * truncated case if we didn't write out the extent at all.
                  */
-               if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+               if ((ret || !logical_len) &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
                     !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
                         btrfs_free_reserved_extent(root, ordered_extent->start,
                                                    ordered_extent->disk_len);
@@@ -2827,16 -2760,16 +2760,16 @@@ static int btrfs_writepage_end_io_hook(
    * if there's a match, we allow the bio to finish.  If not, the code in
    * extent_io.c will try to find good copies for us.
    */
- static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state, int mirror)
+ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
   {
         size_t offset = start - page_offset(page);
         struct inode *inode = page->mapping->host;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
         char *kaddr;
-       u64 private = ~(u32)0;
-       int ret;
         struct btrfs_root *root = BTRFS_I(inode)->root;
+       u32 csum_expected;
         u32 csum = ~(u32)0;
         static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                       DEFAULT_RATELIMIT_BURST);
@@@ -2856,19 -2789,13 +2789,13 @@@
                 return 0;
         }
   
-       if (state && state->start == start) {
-               private = state->private;
-               ret = 0;
-       } else {
-               ret = get_state_private(io_tree, start, &private);
-       }
-       kaddr = kmap_atomic(page);
-       if (ret)
-               goto zeroit;
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
   
+       kaddr = kmap_atomic(page);
         csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
         btrfs_csum_final(csum, (char *)&csum);
-       if (csum != private)
+       if (csum != csum_expected)
                 goto zeroit;
   
         kunmap_atomic(kaddr);
@@@ -2877,14 -2804,12 +2804,12 @@@ good
   
   zeroit:
         if (__ratelimit(&_rs))
-               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
-                       (unsigned long long)btrfs_ino(page->mapping->host),
-                       (unsigned long long)start, csum,
-                       (unsigned long long)private);
+               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                       btrfs_ino(page->mapping->host), start, csum, csum_expected);
         memset(kaddr + offset, 1, end - start + 1);
         flush_dcache_page(page);
         kunmap_atomic(kaddr);
-       if (private == 0)
+       if (csum_expected == 0)
                 return 0;
         return -EIO;
   }
@@@ -2971,8 -2896,10 +2896,10 @@@ void btrfs_orphan_commit_root(struct bt
             btrfs_root_refs(&root->root_item) > 0) {
                 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
                                             root->root_key.objectid);
-               BUG_ON(ret);
-               root->orphan_item_inserted = 0;
+               if (ret)
+                       btrfs_abort_transaction(trans, root, ret);
+               else
+                       root->orphan_item_inserted = 0;
         }
   
         if (block_rsv) {
@@@ -3041,11 -2968,18 +2968,18 @@@ int btrfs_orphan_add(struct btrfs_trans
         /* insert an orphan item to track this unlinked/truncated file */
         if (insert >= 1) {
                 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
-               if (ret && ret != -EEXIST) {
-                       clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-                                 &BTRFS_I(inode)->runtime_flags);
-                       btrfs_abort_transaction(trans, root, ret);
-                       return ret;
+               if (ret) {
+                       if (reserve) {
+                               clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_orphan_release_metadata(inode);
+                       }
+                       if (ret != -EEXIST) {
+                               clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_abort_transaction(trans, root, ret);
+                               return ret;
+                       }
                 }
                 ret = 0;
         }
@@@ -3084,17 -3018,15 +3018,15 @@@ static int btrfs_orphan_del(struct btrf
                 release_rsv = 1;
         spin_unlock(&root->orphan_lock);
   
-       if (trans && delete_item) {
+       if (trans && delete_item)
                 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-               BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
-       }
   
         if (release_rsv) {
                 btrfs_orphan_release_metadata(inode);
                 atomic_dec(&root->orphan_inodes);
         }
   
-       return 0;
+       return ret;
   }
   
   /*
@@@ -3174,7 -3106,7 +3106,7 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                 found_key.type = BTRFS_INODE_ITEM_KEY;
                 found_key.offset = 0;
                 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- -              ret = PTR_RET(inode);
+ +              ret = PTR_ERR_OR_ZERO(inode);
                 if (ret && ret != -ESTALE)
                         goto out;
   
@@@ -3224,8 -3156,9 +3156,9 @@@
                                 found_key.objectid);
                         ret = btrfs_del_orphan_item(trans, root,
                                                     found_key.objectid);
-                       BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
                         btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
                         continue;
                 }
   
@@@ -3657,8 -3590,7 +3590,7 @@@ static int __btrfs_unlink_inode(struct 
         if (ret) {
                 btrfs_info(root->fs_info,
                         "failed to delete reference to %.*s, inode %llu parent %llu",
-                       name_len, name,
-                       (unsigned long long)ino, (unsigned long long)dir_ino);
+                       name_len, name, ino, dir_ino);
                 btrfs_abort_transaction(trans, root, ret);
                 goto err;
         }
@@@ -3929,6 -3861,7 +3861,7 @@@ int btrfs_truncate_inode_items(struct b
         u64 extent_num_bytes = 0;
         u64 extent_offset = 0;
         u64 item_end = 0;
+       u64 last_size = (u64)-1;
         u32 found_type = (u8)-1;
         int found_extent;
         int del_item;
@@@ -4026,6 -3959,11 +3959,11 @@@ search_again
                 if (found_type != BTRFS_EXTENT_DATA_KEY)
                         goto delete;
   
+               if (del_item)
+                       last_size = found_key.offset;
+               else
+                       last_size = new_size;
+ 
                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                         u64 num_dec;
                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@@ -4137,6 -4075,8 +4075,8 @@@ out
                         btrfs_abort_transaction(trans, root, ret);
         }
   error:
+       if (last_size != (u64)-1)
+               btrfs_ordered_update_i_size(inode, last_size, NULL);
         btrfs_free_path(path);
         return err;
   }
@@@ -4465,8 -4405,26 +4405,26 @@@ static int btrfs_setsize(struct inode *
                 btrfs_inode_resume_unlocked_dio(inode);
   
                 ret = btrfs_truncate(inode);
-               if (ret && inode->i_nlink)
-                       btrfs_orphan_del(NULL, inode);
+               if (ret && inode->i_nlink) {
+                       int err;
+ 
+                       /*
+                        * failed to truncate, disk_i_size is only adjusted down
+                        * as we remove extents, so it should represent the true
+                        * size of the inode, so reset the in memory size and
+                        * delete our orphan entry.
+                        */
+                       trans = btrfs_join_transaction(root);
+                       if (IS_ERR(trans)) {
+                               btrfs_orphan_del(NULL, inode);
+                               return ret;
+                       }
+                       i_size_write(inode, BTRFS_I(inode)->disk_i_size);
+                       err = btrfs_orphan_del(trans, inode);
+                       if (err)
+                               btrfs_abort_transaction(trans, root, err);
+                       btrfs_end_transaction(trans, root);
+               }
         }
   
         return ret;
@@@ -4601,10 -4559,15 +4559,15 @@@ void btrfs_evict_inode(struct inode *in
   
         btrfs_free_block_rsv(root, rsv);
   
+       /*
+        * Errors here aren't a big deal, it just means we leave orphan items
+        * in the tree.  They will be cleaned up on the next mount.
+        */
         if (ret == 0) {
                 trans->block_rsv = root->orphan_block_rsv;
-               ret = btrfs_orphan_del(trans, inode);
-               BUG_ON(ret);
+               btrfs_orphan_del(trans, inode);
+       } else {
+               btrfs_orphan_del(NULL, inode);
         }
   
         trans->block_rsv = &root->fs_info->trans_block_rsv;
@@@ -6161,10 -6124,7 +6124,7 @@@ insert
         btrfs_release_path(path);
         if (em->start > start || extent_map_end(em) <= start) {
                 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
-                       (unsigned long long)em->start,
-                       (unsigned long long)em->len,
-                       (unsigned long long)start,
-                       (unsigned long long)len);
+                       em->start, em->len, start, len);
                 err = -EIO;
                 goto out;
         }
@@@ -6362,39 -6322,32 +6322,32 @@@ static struct extent_map *btrfs_new_ext
                                                   u64 start, u64 len)
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
         struct extent_map *em;
         struct btrfs_key ins;
         u64 alloc_hint;
         int ret;
   
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               return ERR_CAST(trans);
- 
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- 
         alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
                                    alloc_hint, &ins, 1);
-       if (ret) {
-               em = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               return ERR_PTR(ret);
   
         em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
                               ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em))
-               goto out;
+       if (IS_ERR(em)) {
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+               return em;
+       }
   
         ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                            ins.offset, ins.offset, 0);
         if (ret) {
                 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
-               em = ERR_PTR(ret);
+               free_extent_map(em);
+               return ERR_PTR(ret);
         }
- out:
-       btrfs_end_transaction(trans, root);
+ 
         return em;
   }
   
@@@ -6402,11 -6355,11 +6355,11 @@@
    * returns 1 when the nocow is safe, < 1 on error, 0 if the
    * block must be cow'd
    */
- noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
-                             struct inode *inode, u64 offset, u64 *len,
+ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                               u64 *orig_start, u64 *orig_block_len,
                               u64 *ram_bytes)
   {
+       struct btrfs_trans_handle *trans;
         struct btrfs_path *path;
         int ret;
         struct extent_buffer *leaf;
@@@ -6424,7 -6377,7 +6377,7 @@@
         if (!path)
                 return -ENOMEM;
   
-       ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
+       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
                                        offset, 0);
         if (ret < 0)
                 goto out;
@@@ -6489,9 -6442,19 +6442,19 @@@
          * look for other files referencing this extent, if we
          * find any we must cow
          */
-       if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
-                                 key.offset - backref_offset, disk_bytenr))
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               ret = 0;
                 goto out;
+       }
+ 
+       ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+                                   key.offset - backref_offset, disk_bytenr);
+       btrfs_end_transaction(trans, root);
+       if (ret) {
+               ret = 0;
+               goto out;
+       }
   
         /*
          * adjust disk_bytenr and num_bytes to cover just the bytes
@@@ -6633,7 -6596,6 +6596,6 @@@ static int btrfs_get_blocks_direct(stru
         u64 start = iblock << inode->i_blkbits;
         u64 lockstart, lockend;
         u64 len = bh_result->b_size;
-       struct btrfs_trans_handle *trans;
         int unlock_bits = EXTENT_LOCKED;
         int ret = 0;
   
@@@ -6715,16 -6677,7 +6677,7 @@@
                 len = min(len, em->len - (start - em->start));
                 block_start = em->block_start + (start - em->start);
   
-               /*
-                * we're not going to log anything, but we do need
-                * to make sure the current transaction stays open
-                * while we look for nocow cross refs
-                */
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans))
-                       goto must_cow;
- 
-               if (can_nocow_extent(trans, inode, start, &len, &orig_start,
+               if (can_nocow_extent(inode, start, &len, &orig_start,
                                      &orig_block_len, &ram_bytes) == 1) {
                         if (type == BTRFS_ORDERED_PREALLOC) {
                                 free_extent_map(em);
@@@ -6733,24 -6686,20 +6686,20 @@@
                                                        block_start, len,
                                                        orig_block_len,
                                                        ram_bytes, type);
-                               if (IS_ERR(em)) {
-                                       btrfs_end_transaction(trans, root);
+                               if (IS_ERR(em))
                                         goto unlock_err;
-                               }
                         }
   
                         ret = btrfs_add_ordered_extent_dio(inode, start,
                                            block_start, len, len, type);
-                       btrfs_end_transaction(trans, root);
                         if (ret) {
                                 free_extent_map(em);
                                 goto unlock_err;
                         }
                         goto unlock;
                 }
-               btrfs_end_transaction(trans, root);
         }
- must_cow:
+ 
         /*
          * this will cow the extent, reset the len in case we changed
          * it above
@@@ -6813,26 -6762,6 +6762,6 @@@ unlock_err
         return ret;
   }
   
- struct btrfs_dio_private {
-       struct inode *inode;
-       u64 logical_offset;
-       u64 disk_bytenr;
-       u64 bytes;
-       void *private;
- 
-       /* number of bios pending for this dio */
-       atomic_t pending_bios;
- 
-       /* IO errors */
-       int errors;
- 
-       /* orig_bio is our btrfs_io_bio */
-       struct bio *orig_bio;
- 
-       /* dio_bio came from fs/direct-io.c */
-       struct bio *dio_bio;
- };
- 
   static void btrfs_endio_direct_read(struct bio *bio, int err)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
@@@ -6841,6 -6770,8 +6770,8 @@@
         struct inode *inode = dip->inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct bio *dio_bio;
+       u32 *csums = (u32 *)dip->csum;
+       int index = 0;
         u64 start;
   
         start = dip->logical_offset;
@@@ -6849,12 -6780,8 +6780,8 @@@
                         struct page *page = bvec->bv_page;
                         char *kaddr;
                         u32 csum = ~(u32)0;
-                       u64 private = ~(u32)0;
                         unsigned long flags;
   
-                       if (get_state_private(&BTRFS_I(inode)->io_tree,
-                                             start, &private))
-                               goto failed;
                         local_irq_save(flags);
                         kaddr = kmap_atomic(page);
                         csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@@ -6864,18 -6791,17 +6791,17 @@@
                         local_irq_restore(flags);
   
                         flush_dcache_page(bvec->bv_page);
-                       if (csum != private) {
- failed:
-                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       (unsigned long long)start,
-                                       csum, (unsigned)private);
+                       if (csum != csums[index]) {
+                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                                         btrfs_ino(inode), start, csum,
+                                         csums[index]);
                                 err = -EIO;
                         }
                 }
   
                 start += bvec->bv_len;
                 bvec++;
+               index++;
         } while (bvec <= bvec_end);
   
         unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@@ -6956,7 -6882,7 +6882,7 @@@ static void btrfs_end_dio_bio(struct bi
         if (err) {
                 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
                       "sector %#Lx len %u err no %d\n",
-                     (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
+                     btrfs_ino(dip->inode), bio->bi_rw,
                       (unsigned long long)bio->bi_sector, bio->bi_size, err);
                 dip->errors = 1;
   
@@@ -6992,6 -6918,7 +6918,7 @@@ static inline int __btrfs_submit_dio_bi
                                          int rw, u64 file_offset, int skip_sum,
                                          int async_submit)
   {
+       struct btrfs_dio_private *dip = bio->bi_private;
         int write = rw & REQ_WRITE;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         int ret;
@@@ -7026,7 -6953,8 +6953,8 @@@
                 if (ret)
                         goto err;
         } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+                                               file_offset);
                 if (ret)
                         goto err;
         }
@@@ -7061,6 -6989,7 +6989,7 @@@ static int btrfs_submit_direct_hook(in
                 bio_put(orig_bio);
                 return -EIO;
         }
+ 
         if (map_length >= orig_bio->bi_size) {
                 bio = orig_bio;
                 goto submit;
@@@ -7156,19 -7085,28 +7085,28 @@@ static void btrfs_submit_direct(int rw
         struct btrfs_dio_private *dip;
         struct bio *io_bio;
         int skip_sum;
+       int sum_len;
         int write = rw & REQ_WRITE;
         int ret = 0;
+       u16 csum_size;
   
         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
   
         io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
- 
         if (!io_bio) {
                 ret = -ENOMEM;
                 goto free_ordered;
         }
   
-       dip = kmalloc(sizeof(*dip), GFP_NOFS);
+       if (!skip_sum && !write) {
+               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+               sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+               sum_len *= csum_size;
+       } else {
+               sum_len = 0;
+       }
+ 
+       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
         if (!dip) {
                 ret = -ENOMEM;
                 goto free_io_bio;
@@@ -7443,10 -7381,23 +7381,23 @@@ static void btrfs_invalidatepage(struc
                  * whoever cleared the private bit is responsible
                  * for the finish_ordered_io
                  */
-               if (TestClearPagePrivate2(page) &&
-                   btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
-                                                  PAGE_CACHE_SIZE, 1)) {
-                       btrfs_finish_ordered_io(ordered);
+               if (TestClearPagePrivate2(page)) {
+                       struct btrfs_ordered_inode_tree *tree;
+                       u64 new_len;
+ 
+                       tree = &BTRFS_I(inode)->ordered_tree;
+ 
+                       spin_lock_irq(&tree->lock);
+                       set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+                       new_len = page_start - ordered->file_offset;
+                       if (new_len < ordered->truncated_len)
+                               ordered->truncated_len = new_len;
+                       spin_unlock_irq(&tree->lock);
+ 
+                       if (btrfs_dec_test_ordered_pending(inode, &ordered,
+                                                          page_start,
+                                                          PAGE_CACHE_SIZE, 1))
+                               btrfs_finish_ordered_io(ordered);
                 }
                 btrfs_put_ordered_extent(ordered);
                 cached_state = NULL;
@@@ -7612,7 -7563,6 +7563,6 @@@ static int btrfs_truncate(struct inode 
         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
   
         btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
-       btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
   
         /*
          * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
@@@ -7876,7 -7826,7 +7826,7 @@@ void btrfs_destroy_inode(struct inode *
         if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                      &BTRFS_I(inode)->runtime_flags)) {
                 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
-                       (unsigned long long)btrfs_ino(inode));
+                       btrfs_ino(inode));
                 atomic_dec(&root->orphan_inodes);
         }
   
@@@ -7886,8 -7836,7 +7836,7 @@@
                         break;
                 else {
                         btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
-                               (unsigned long long)ordered->file_offset,
-                               (unsigned long long)ordered->len);
+                               ordered->file_offset, ordered->len);
                         btrfs_remove_ordered_extent(inode, ordered);
                         btrfs_put_ordered_extent(ordered);
                         btrfs_put_ordered_extent(ordered);
@@@ -8161,10 -8110,8 +8110,8 @@@ static int btrfs_rename(struct inode *o
                                                  new_dentry->d_name.name,
                                                  new_dentry->d_name.len);
                 }
-               if (!ret && new_inode->i_nlink == 0) {
+               if (!ret && new_inode->i_nlink == 0)
                         ret = btrfs_orphan_add(trans, new_dentry->d_inode);
-                       BUG_ON(ret);
-               }
                 if (ret) {
                         btrfs_abort_transaction(trans, root, ret);
                         goto out_fail;
@@@ -8525,8 -8472,8 +8472,8 @@@ static int __btrfs_prealloc_file_range(
   
                 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
                 cur_bytes = max(cur_bytes, min_size);
-               ret = btrfs_reserve_extent(trans, root, cur_bytes,
-                                          min_size, 0, *alloc_hint, &ins, 1);
+               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
+                                          *alloc_hint, &ins, 1);
                 if (ret) {
                         if (own_trans)
                                 btrfs_end_transaction(trans, root);
diff --combined fs/btrfs/send.c

index 2e14fd89a8b46e80622168ef30b9ac206b4b6a04,b4b15467426b0cfab6033c449a043e216991c464..e46e0ed7492555646e58659f4cbb4c94ddf4c4d1
--- 1/fs/btrfs/send.c
--- 2/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@@ -26,6 -26,7 +26,7 @@@
   #include <linux/radix-tree.h>
   #include <linux/crc32c.h>
   #include <linux/vmalloc.h>
+ #include <linux/string.h>
   
   #include "send.h"
   #include "backref.h"
@@@ -54,8 -55,8 +55,8 @@@ struct fs_path 
   
                         char *buf;
                         int buf_len;
-                       int reversed:1;
-                       int virtual_mem:1;
+                       unsigned int reversed:1;
+                       unsigned int virtual_mem:1;
                         char inline_buf[];
                 };
                 char pad[PAGE_SIZE];
@@@ -219,7 -220,7 +220,7 @@@ static int fs_path_ensure_buf(struct fs
         len = PAGE_ALIGN(len);
   
         if (p->buf == p->inline_buf) {
- -              tmp_buf = kmalloc(len, GFP_NOFS);
+ +              tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN);
                 if (!tmp_buf) {
                         tmp_buf = vmalloc(len);
                         if (!tmp_buf)
@@@ -1668,6 -1669,7 +1669,7 @@@ static int will_overwrite_ref(struct se
                               u64 *who_ino, u64 *who_gen)
   {
         int ret = 0;
+       u64 gen;
         u64 other_inode = 0;
         u8 other_type = 0;
   
@@@ -1678,6 -1680,24 +1680,24 @@@
         if (ret <= 0)
                 goto out;
   
+       /*
+        * If we have a parent root we need to verify that the parent dir was
+        * not delted and then re-created, if it was then we have no overwrite
+        * and we can just unlink this entry.
+        */
+       if (sctx->parent_root) {
+               ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret < 0 && ret != -ENOENT)
+                       goto out;
+               if (ret) {
+                       ret = 0;
+                       goto out;
+               }
+               if (gen != dir_gen)
+                       goto out;
+       }
+ 
         ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
                         &other_inode, &other_type);
         if (ret < 0 && ret != -ENOENT)
@@@ -2519,7 -2539,8 +2539,8 @@@ static int did_create_dir(struct send_c
                 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
   
-               if (di_key.objectid < sctx->send_progress) {
+               if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+                   di_key.objectid < sctx->send_progress) {
                         ret = 1;
                         goto out;
                 }
@@@ -2581,7 -2602,6 +2602,6 @@@ static int record_ref(struct list_head 
                       u64 dir_gen, struct fs_path *path)
   {
         struct recorded_ref *ref;
-       char *tmp;
   
         ref = kmalloc(sizeof(*ref), GFP_NOFS);
         if (!ref)
@@@ -2591,25 -2611,35 +2611,35 @@@
         ref->dir_gen = dir_gen;
         ref->full_path = path;
   
-       tmp = strrchr(ref->full_path->start, '/');
-       if (!tmp) {
-               ref->name_len = ref->full_path->end - ref->full_path->start;
-               ref->name = ref->full_path->start;
+       ref->name = (char *)kbasename(ref->full_path->start);
+       ref->name_len = ref->full_path->end - ref->name;
+       ref->dir_path = ref->full_path->start;
+       if (ref->name == ref->full_path->start)
                 ref->dir_path_len = 0;
-               ref->dir_path = ref->full_path->start;
-       } else {
-               tmp++;
-               ref->name_len = ref->full_path->end - tmp;
-               ref->name = tmp;
-               ref->dir_path = ref->full_path->start;
+       else
                 ref->dir_path_len = ref->full_path->end -
                                 ref->full_path->start - 1 - ref->name_len;
-       }
   
         list_add_tail(&ref->list, head);
         return 0;
   }
   
+ static int dup_ref(struct recorded_ref *ref, struct list_head *list)
+ {
+       struct recorded_ref *new;
+ 
+       new = kmalloc(sizeof(*ref), GFP_NOFS);
+       if (!new)
+               return -ENOMEM;
+ 
+       new->dir = ref->dir;
+       new->dir_gen = ref->dir_gen;
+       new->full_path = NULL;
+       INIT_LIST_HEAD(&new->list);
+       list_add_tail(&new->list, list);
+       return 0;
+ }
+ 
   static void __free_recorded_refs(struct list_head *head)
   {
         struct recorded_ref *cur;
@@@ -2724,9 -2754,7 +2754,7 @@@ static int process_recorded_refs(struc
         int ret = 0;
         struct recorded_ref *cur;
         struct recorded_ref *cur2;
-       struct ulist *check_dirs = NULL;
-       struct ulist_iterator uit;
-       struct ulist_node *un;
+       struct list_head check_dirs;
         struct fs_path *valid_path = NULL;
         u64 ow_inode = 0;
         u64 ow_gen;
@@@ -2740,6 -2768,7 +2768,7 @@@ verbose_printk("btrfs: process_recorded
          * which is always '..'
          */
         BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+       INIT_LIST_HEAD(&check_dirs);
   
         valid_path = fs_path_alloc();
         if (!valid_path) {
@@@ -2747,12 -2776,6 +2776,6 @@@
                 goto out;
         }
   
-       check_dirs = ulist_alloc(GFP_NOFS);
-       if (!check_dirs) {
-               ret = -ENOMEM;
-               goto out;
-       }
- 
         /*
          * First, check if the first ref of the current inode was overwritten
          * before. If yes, we know that the current inode was already orphanized
@@@ -2889,8 -2912,7 +2912,7 @@@
                                         goto out;
                         }
                 }
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                 if (ret < 0)
                         goto out;
         }
@@@ -2918,8 -2940,7 +2940,7 @@@
                 }
   
                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                         if (ret < 0)
                                 goto out;
                 }
@@@ -2930,8 -2951,7 +2951,7 @@@
                  */
                 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
                                 list);
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                 if (ret < 0)
                         goto out;
         } else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@@ -2951,12 -2971,10 +2971,10 @@@
                                 if (ret < 0)
                                         goto out;
                         }
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                         if (ret < 0)
                                 goto out;
                 }
- 
                 /*
                  * If the inode is still orphan, unlink the orphan. This may
                  * happen when a previous inode did overwrite the first ref
@@@ -2978,33 -2996,32 +2996,32 @@@
          * deletion and if it's finally possible to perform the rmdir now.
          * We also update the inode stats of the parent dirs here.
          */
-       ULIST_ITER_INIT(&uit);
-       while ((un = ulist_next(check_dirs, &uit))) {
+       list_for_each_entry(cur, &check_dirs, list) {
                 /*
                  * In case we had refs into dirs that were not processed yet,
                  * we don't need to do the utime and rmdir logic for these dirs.
                  * The dir will be processed later.
                  */
-               if (un->val > sctx->cur_ino)
+               if (cur->dir > sctx->cur_ino)
                         continue;
   
-               ret = get_cur_inode_state(sctx, un->val, un->aux);
+               ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
                 if (ret < 0)
                         goto out;
   
                 if (ret == inode_state_did_create ||
                     ret == inode_state_no_change) {
                         /* TODO delayed utimes */
-                       ret = send_utimes(sctx, un->val, un->aux);
+                       ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                         if (ret < 0)
                                 goto out;
                 } else if (ret == inode_state_did_delete) {
-                       ret = can_rmdir(sctx, un->val, sctx->cur_ino);
+                       ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
                         if (ret < 0)
                                 goto out;
                         if (ret) {
-                               ret = get_cur_path(sctx, un->val, un->aux,
-                                               valid_path);
+                               ret = get_cur_path(sctx, cur->dir,
+                                                  cur->dir_gen, valid_path);
                                 if (ret < 0)
                                         goto out;
                                 ret = send_rmdir(sctx, valid_path);
@@@ -3017,8 -3034,8 +3034,8 @@@
         ret = 0;
   
   out:
+       __free_recorded_refs(&check_dirs);
         free_recorded_refs(sctx);
-       ulist_free(check_dirs);
         fs_path_free(valid_path);
         return ret;
   }
@@@ -3119,6 -3136,8 +3136,8 @@@ out
   
   struct find_ref_ctx {
         u64 dir;
+       u64 dir_gen;
+       struct btrfs_root *root;
         struct fs_path *name;
         int found_idx;
   };
@@@ -3128,9 -3147,21 +3147,21 @@@ static int __find_iref(int num, u64 dir
                        void *ctx_)
   {
         struct find_ref_ctx *ctx = ctx_;
+       u64 dir_gen;
+       int ret;
   
         if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
             strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
+               /*
+                * To avoid doing extra lookups we'll only do this if everything
+                * else matches.
+                */
+               ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret)
+                       return ret;
+               if (dir_gen != ctx->dir_gen)
+                       return 0;
                 ctx->found_idx = num;
                 return 1;
         }
@@@ -3140,14 -3171,16 +3171,16 @@@
   static int find_iref(struct btrfs_root *root,
                      struct btrfs_path *path,
                      struct btrfs_key *key,
-                    u64 dir, struct fs_path *name)
+                    u64 dir, u64 dir_gen, struct fs_path *name)
   {
         int ret;
         struct find_ref_ctx ctx;
   
         ctx.dir = dir;
         ctx.name = name;
+       ctx.dir_gen = dir_gen;
         ctx.found_idx = -1;
+       ctx.root = root;
   
         ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
         if (ret < 0)
@@@ -3163,11 -3196,17 +3196,17 @@@ static int __record_changed_new_ref(in
                                     struct fs_path *name,
                                     void *ctx)
   {
+       u64 dir_gen;
         int ret;
         struct send_ctx *sctx = ctx;
   
+       ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+ 
         ret = find_iref(sctx->parent_root, sctx->right_path,
-                       sctx->cmp_key, dir, name);
+                       sctx->cmp_key, dir, dir_gen, name);
         if (ret == -ENOENT)
                 ret = __record_new_ref(num, dir, index, name, sctx);
         else if (ret > 0)
@@@ -3180,11 -3219,17 +3219,17 @@@ static int __record_changed_deleted_ref
                                         struct fs_path *name,
                                         void *ctx)
   {
+       u64 dir_gen;
         int ret;
         struct send_ctx *sctx = ctx;
   
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+ 
         ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
-                       dir, name);
+                       dir, dir_gen, name);
         if (ret == -ENOENT)
                 ret = __record_deleted_ref(num, dir, index, name, sctx);
         else if (ret > 0)
@@@ -3869,7 -3914,8 +3914,8 @@@ static int is_extent_unchanged(struct s
         btrfs_item_key_to_cpu(eb, &found_key, slot);
         if (found_key.objectid != key.objectid ||
             found_key.type != key.type) {
-               ret = 0;
+               /* If we're a hole then just pretend nothing changed */
+               ret = (left_disknr) ? 0 : 1;
                 goto out;
         }
   
@@@ -3895,7 -3941,8 +3941,8 @@@
                  * This may only happen on the first iteration.
                  */
                 if (found_key.offset + right_len <= ekey->offset) {
-                       ret = 0;
+                       /* If we're a hole just pretend nothing changed */
+                       ret = (left_disknr) ? 0 : 1;
                         goto out;
                 }
   
@@@ -3960,8 -4007,8 +4007,8 @@@ static int process_extent(struct send_c
                           struct btrfs_path *path,
                           struct btrfs_key *key)
   {
-       int ret = 0;
         struct clone_root *found_clone = NULL;
+       int ret = 0;
   
         if (S_ISLNK(sctx->cur_inode_mode))
                 return 0;
@@@ -3974,6 -4021,32 +4021,32 @@@
                         ret = 0;
                         goto out;
                 }
+       } else {
+               struct btrfs_file_extent_item *ei;
+               u8 type;
+ 
+               ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               type = btrfs_file_extent_type(path->nodes[0], ei);
+               if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+                   type == BTRFS_FILE_EXTENT_REG) {
+                       /*
+                        * The send spec does not have a prealloc command yet,
+                        * so just leave a hole for prealloc'ed extents until
+                        * we have enough commands queued up to justify rev'ing
+                        * the send spec.
+                        */
+                       if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+                               ret = 0;
+                               goto out;
+                       }
+ 
+                       /* Have a hole, just skip it. */
+                       if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
+                               ret = 0;
+                               goto out;
+                       }
+               }
         }
   
         ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@@ -4361,6 -4434,64 +4434,64 @@@ static int changed_extent(struct send_c
         return ret;
   }
   
+ static int dir_changed(struct send_ctx *sctx, u64 dir)
+ {
+       u64 orig_gen, new_gen;
+       int ret;
+ 
+       ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
+                            NULL, NULL);
+       if (ret)
+               return ret;
+ 
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+ 
+       return (orig_gen != new_gen) ? 1 : 0;
+ }
+ 
+ static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
+                       struct btrfs_key *key)
+ {
+       struct btrfs_inode_extref *extref;
+       struct extent_buffer *leaf;
+       u64 dirid = 0, last_dirid = 0;
+       unsigned long ptr;
+       u32 item_size;
+       u32 cur_offset = 0;
+       int ref_name_len;
+       int ret = 0;
+ 
+       /* Easy case, just check this one dirid */
+       if (key->type == BTRFS_INODE_REF_KEY) {
+               dirid = key->offset;
+ 
+               ret = dir_changed(sctx, dirid);
+               goto out;
+       }
+ 
+       leaf = path->nodes[0];
+       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       while (cur_offset < item_size) {
+               extref = (struct btrfs_inode_extref *)(ptr +
+                                                      cur_offset);
+               dirid = btrfs_inode_extref_parent(leaf, extref);
+               ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+               cur_offset += ref_name_len + sizeof(*extref);
+               if (dirid == last_dirid)
+                       continue;
+               ret = dir_changed(sctx, dirid);
+               if (ret)
+                       break;
+               last_dirid = dirid;
+       }
+ out:
+       return ret;
+ }
+ 
   /*
    * Updates compare related fields in sctx and simply forwards to the actual
    * changed_xxx functions.
@@@ -4376,6 -4507,19 +4507,19 @@@ static int changed_cb(struct btrfs_roo
         int ret = 0;
         struct send_ctx *sctx = ctx;
   
+       if (result == BTRFS_COMPARE_TREE_SAME) {
+               if (key->type != BTRFS_INODE_REF_KEY &&
+                   key->type != BTRFS_INODE_EXTREF_KEY)
+                       return 0;
+               ret = compare_refs(sctx, left_path, key);
+               if (!ret)
+                       return 0;
+               if (ret < 0)
+                       return ret;
+               result = BTRFS_COMPARE_TREE_CHANGED;
+               ret = 0;
+       }
+ 
         sctx->left_path = left_path;
         sctx->right_path = right_path;
         sctx->cmp_key = key;
diff --combined fs/btrfs/volumes.c

index 67a08538184557486ec802840d610828696edf46,0db165ee43405fccbcc044c5c56606e145503d92..0052ca8264d9b37cc171e52e0954b165fd6dad46
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -26,6 -26,7 +26,7 @@@
   #include <linux/ratelimit.h>
   #include <linux/kthread.h>
   #include <linux/raid/pq.h>
+ #include <linux/semaphore.h>
   #include <asm/div64.h>
   #include "compat.h"
   #include "ctree.h"
@@@ -62,6 -63,48 +63,48 @@@ static void unlock_chunks(struct btrfs_
         mutex_unlock(&root->fs_info->chunk_mutex);
   }
   
+ static struct btrfs_fs_devices *__alloc_fs_devices(void)
+ {
+       struct btrfs_fs_devices *fs_devs;
+ 
+       fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+       if (!fs_devs)
+               return ERR_PTR(-ENOMEM);
+ 
+       mutex_init(&fs_devs->device_list_mutex);
+ 
+       INIT_LIST_HEAD(&fs_devs->devices);
+       INIT_LIST_HEAD(&fs_devs->alloc_list);
+       INIT_LIST_HEAD(&fs_devs->list);
+ 
+       return fs_devs;
+ }
+ 
+ /**
+  * alloc_fs_devices - allocate struct btrfs_fs_devices
+  * @fsid:     a pointer to UUID for this FS.  If NULL a new UUID is
+  *            generated.
+  *
+  * Return: a pointer to a new &struct btrfs_fs_devices on success;
+  * ERR_PTR() on error.  Returned struct is not linked onto any lists and
+  * can be destroyed with kfree() right away.
+  */
+ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
+ {
+       struct btrfs_fs_devices *fs_devs;
+ 
+       fs_devs = __alloc_fs_devices();
+       if (IS_ERR(fs_devs))
+               return fs_devs;
+ 
+       if (fsid)
+               memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+       else
+               generate_random_uuid(fs_devs->fsid);
+ 
+       return fs_devs;
+ }
+ 
   static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
   {
         struct btrfs_device *device;
@@@ -101,6 -144,27 +144,27 @@@ void btrfs_cleanup_fs_uuids(void
         }
   }
   
+ static struct btrfs_device *__alloc_device(void)
+ {
+       struct btrfs_device *dev;
+ 
+       dev = kzalloc(sizeof(*dev), GFP_NOFS);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+ 
+       INIT_LIST_HEAD(&dev->dev_list);
+       INIT_LIST_HEAD(&dev->dev_alloc_list);
+ 
+       spin_lock_init(&dev->io_lock);
+ 
+       spin_lock_init(&dev->reada_lock);
+       atomic_set(&dev->reada_in_flight, 0);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+ 
+       return dev;
+ }
+ 
   static noinline struct btrfs_device *__find_device(struct list_head *head,
                                                    u64 devid, u8 *uuid)
   {
@@@ -395,16 -459,14 +459,14 @@@ static noinline int device_list_add(con
   
         fs_devices = find_fsid(disk_super->fsid);
         if (!fs_devices) {
-               fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-               if (!fs_devices)
-                       return -ENOMEM;
-               INIT_LIST_HEAD(&fs_devices->devices);
-               INIT_LIST_HEAD(&fs_devices->alloc_list);
+               fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return PTR_ERR(fs_devices);
+ 
                 list_add(&fs_devices->list, &fs_uuids);
-               memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                 fs_devices->latest_devid = devid;
                 fs_devices->latest_trans = found_transid;
-               mutex_init(&fs_devices->device_list_mutex);
+ 
                 device = NULL;
         } else {
                 device = __find_device(&fs_devices->devices, devid,
@@@ -414,17 -476,12 +476,12 @@@
                 if (fs_devices->opened)
                         return -EBUSY;
   
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device) {
+               device = btrfs_alloc_device(NULL, &devid,
+                                           disk_super->dev_item.uuid);
+               if (IS_ERR(device)) {
                         /* we can safely leave the fs_devices entry around */
-                       return -ENOMEM;
+                       return PTR_ERR(device);
                 }
-               device->devid = devid;
-               device->dev_stats_valid = 0;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, disk_super->dev_item.uuid,
-                      BTRFS_UUID_SIZE);
-               spin_lock_init(&device->io_lock);
   
                 name = rcu_string_strdup(path, GFP_NOFS);
                 if (!name) {
@@@ -432,22 -489,13 +489,13 @@@
                         return -ENOMEM;
                 }
                 rcu_assign_pointer(device->name, name);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
- 
-               /* init readahead state */
-               spin_lock_init(&device->reada_lock);
-               device->reada_curr_zone = NULL;
-               atomic_set(&device->reada_in_flight, 0);
-               device->reada_next = 0;
-               INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-               INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
   
                 mutex_lock(&fs_devices->device_list_mutex);
                 list_add_rcu(&device->dev_list, &fs_devices->devices);
+               fs_devices->num_devices++;
                 mutex_unlock(&fs_devices->device_list_mutex);
   
                 device->fs_devices = fs_devices;
-               fs_devices->num_devices++;
         } else if (!device->name || strcmp(device->name->str, path)) {
                 name = rcu_string_strdup(path, GFP_NOFS);
                 if (!name)
@@@ -474,25 -522,21 +522,21 @@@ static struct btrfs_fs_devices *clone_f
         struct btrfs_device *device;
         struct btrfs_device *orig_dev;
   
-       fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!fs_devices)
-               return ERR_PTR(-ENOMEM);
+       fs_devices = alloc_fs_devices(orig->fsid);
+       if (IS_ERR(fs_devices))
+               return fs_devices;
   
-       INIT_LIST_HEAD(&fs_devices->devices);
-       INIT_LIST_HEAD(&fs_devices->alloc_list);
-       INIT_LIST_HEAD(&fs_devices->list);
-       mutex_init(&fs_devices->device_list_mutex);
         fs_devices->latest_devid = orig->latest_devid;
         fs_devices->latest_trans = orig->latest_trans;
         fs_devices->total_devices = orig->total_devices;
-       memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
   
         /* We have held the volume lock, it is safe to get the devices. */
         list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                 struct rcu_string *name;
   
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device)
+               device = btrfs_alloc_device(NULL, &orig_dev->devid,
+                                           orig_dev->uuid);
+               if (IS_ERR(device))
                         goto error;
   
                 /*
@@@ -506,13 -550,6 +550,6 @@@
                 }
                 rcu_assign_pointer(device->name, name);
   
-               device->devid = orig_dev->devid;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
-               spin_lock_init(&device->io_lock);
-               INIT_LIST_HEAD(&device->dev_list);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
- 
                 list_add(&device->dev_list, &fs_devices->devices);
                 device->fs_devices = fs_devices;
                 fs_devices->num_devices++;
@@@ -636,23 -673,22 +673,22 @@@ static int __btrfs_close_devices(struc
   
                 if (device->can_discard)
                         fs_devices->num_can_discard--;
+               if (device->missing)
+                       fs_devices->missing_devices--;
   
-               new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
-               BUG_ON(!new_device); /* -ENOMEM */
-               memcpy(new_device, device, sizeof(*new_device));
+               new_device = btrfs_alloc_device(NULL, &device->devid,
+                                               device->uuid);
+               BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
   
                 /* Safe because we are under uuid_mutex */
                 if (device->name) {
                         name = rcu_string_strdup(device->name->str, GFP_NOFS);
-                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       BUG_ON(!name); /* -ENOMEM */
                         rcu_assign_pointer(new_device->name, name);
                 }
-               new_device->bdev = NULL;
-               new_device->writeable = 0;
-               new_device->in_fs_metadata = 0;
-               new_device->can_discard = 0;
-               spin_lock_init(&new_device->io_lock);
+ 
                 list_replace_rcu(&device->dev_list, &new_device->dev_list);
+               new_device->fs_devices = device->fs_devices;
   
                 call_rcu(&device->rcu, free_device);
         }
@@@ -865,7 -901,7 +901,7 @@@ int btrfs_scan_one_device(const char *p
         disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
   
         if (btrfs_super_bytenr(disk_super) != bytenr ||
-           disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
+           btrfs_super_magic(disk_super) != BTRFS_MAGIC)
                 goto error_unmap;
   
         devid = btrfs_stack_device_id(&disk_super->dev_item);
@@@ -880,8 -916,7 +916,7 @@@
                 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
         }
   
-       printk(KERN_CONT "devid %llu transid %llu %s\n",
-              (unsigned long long)devid, (unsigned long long)transid, path);
+       printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
   
         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
         if (!ret && fs_devices_ret)
@@@ -1278,8 -1313,7 +1313,7 @@@ static int btrfs_alloc_dev_extent(struc
         btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
   
         write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
-                   (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
-                   BTRFS_UUID_SIZE);
+                   btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
   
         btrfs_set_dev_extent_length(leaf, extent, num_bytes);
         btrfs_mark_buffer_dirty(leaf);
@@@ -1307,15 -1341,14 +1341,14 @@@ static u64 find_next_chunk(struct btrfs
         return ret;
   }
   
- static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
+ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
+                                   u64 *devid_ret)
   {
         int ret;
         struct btrfs_key key;
         struct btrfs_key found_key;
         struct btrfs_path *path;
   
-       root = root->fs_info->chunk_root;
- 
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@@ -1324,20 -1357,21 +1357,21 @@@
         key.type = BTRFS_DEV_ITEM_KEY;
         key.offset = (u64)-1;
   
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
         if (ret < 0)
                 goto error;
   
         BUG_ON(ret == 0); /* Corruption */
   
-       ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+       ret = btrfs_previous_item(fs_info->chunk_root, path,
+                                 BTRFS_DEV_ITEMS_OBJECTID,
                                   BTRFS_DEV_ITEM_KEY);
         if (ret) {
-               *objectid = 1;
+               *devid_ret = 1;
         } else {
                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                       path->slots[0]);
-               *objectid = found_key.offset + 1;
+               *devid_ret = found_key.offset + 1;
         }
         ret = 0;
   error:
@@@ -1391,9 -1425,9 +1425,9 @@@ static int btrfs_add_device(struct btrf
         btrfs_set_device_bandwidth(leaf, dev_item, 0);
         btrfs_set_device_start_offset(leaf, dev_item, 0);
   
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
         write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
-       ptr = (unsigned long)btrfs_device_fsid(dev_item);
+       ptr = btrfs_device_fsid(dev_item);
         write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
         btrfs_mark_buffer_dirty(leaf);
   
@@@ -1562,7 -1596,9 +1596,9 @@@ int btrfs_rm_device(struct btrfs_root *
                 clear_super = true;
         }
   
+       mutex_unlock(&uuid_mutex);
         ret = btrfs_shrink_device(device, 0);
+       mutex_lock(&uuid_mutex);
         if (ret)
                 goto error_undo;
   
@@@ -1586,7 -1622,11 +1622,11 @@@
         /*
          * the device list mutex makes sure that we don't change
          * the device list while someone else is writing out all
-        * the device supers.
+        * the device supers. Whoever is writing all supers, should
+        * lock the device list mutex before getting the number of
+        * devices in the super block (super_copy). Conversely,
+        * whoever updates the number of devices in the super block
+        * (super_copy) should hold the device list mutex.
          */
   
         cur_devices = device->fs_devices;
@@@ -1610,10 -1650,10 +1650,10 @@@
                 device->fs_devices->open_devices--;
   
         call_rcu(&device->rcu, free_device);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
   
         num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
         btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
   
         if (cur_devices->open_devices == 0) {
                 struct btrfs_fs_devices *fs_devices;
@@@ -1793,9 -1833,9 +1833,9 @@@ static int btrfs_prepare_sprout(struct 
         if (!fs_devices->seeding)
                 return -EINVAL;
   
-       seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!seed_devices)
-               return -ENOMEM;
+       seed_devices = __alloc_fs_devices();
+       if (IS_ERR(seed_devices))
+               return PTR_ERR(seed_devices);
   
         old_devices = clone_fs_devices(fs_devices);
         if (IS_ERR(old_devices)) {
@@@ -1814,7 -1854,6 +1854,6 @@@
         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
         list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
                               synchronize_rcu);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
   
         list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
         list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@@ -1830,6 -1869,8 +1869,8 @@@
         generate_random_uuid(fs_devices->fsid);
         memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
         memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ 
         super_flags = btrfs_super_flags(disk_super) &
                       ~BTRFS_SUPER_FLAG_SEEDING;
         btrfs_set_super_flags(disk_super, super_flags);
@@@ -1889,11 -1930,9 +1930,9 @@@ next_slot
                 dev_item = btrfs_item_ptr(leaf, path->slots[0],
                                           struct btrfs_dev_item);
                 devid = btrfs_device_id(leaf, dev_item);
-               read_extent_buffer(leaf, dev_uuid,
-                                  (unsigned long)btrfs_device_uuid(dev_item),
+               read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                                    BTRFS_UUID_SIZE);
-               read_extent_buffer(leaf, fs_uuid,
-                                  (unsigned long)btrfs_device_fsid(dev_item),
+               read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                                    BTRFS_UUID_SIZE);
                 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
                                            fs_uuid);
@@@ -1956,10 -1995,10 +1995,10 @@@ int btrfs_init_new_device(struct btrfs_
         }
         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
   
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
+       device = btrfs_alloc_device(root->fs_info, NULL, NULL);
+       if (IS_ERR(device)) {
                 /* we can safely leave the fs_devices entry around */
-               ret = -ENOMEM;
+               ret = PTR_ERR(device);
                 goto error;
         }
   
@@@ -1971,13 -2010,6 +2010,6 @@@
         }
         rcu_assign_pointer(device->name, name);
   
-       ret = find_next_devid(root, &device->devid);
-       if (ret) {
-               rcu_string_free(device->name);
-               kfree(device);
-               goto error;
-       }
- 
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans)) {
                 rcu_string_free(device->name);
@@@ -1992,9 -2024,6 +2024,6 @@@
         if (blk_queue_discard(q))
                 device->can_discard = 1;
         device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       spin_lock_init(&device->io_lock);
         device->generation = trans->transid;
         device->io_width = root->sectorsize;
         device->io_align = root->sectorsize;
@@@ -2121,6 -2150,7 +2150,7 @@@ int btrfs_init_dev_replace_tgtdev(struc
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct list_head *devices;
         struct rcu_string *name;
+       u64 devid = BTRFS_DEV_REPLACE_DEVID;
         int ret = 0;
   
         *device_out = NULL;
@@@ -2142,9 -2172,9 +2172,9 @@@
                 }
         }
   
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
-               ret = -ENOMEM;
+       device = btrfs_alloc_device(NULL, &devid, NULL);
+       if (IS_ERR(device)) {
+               ret = PTR_ERR(device);
                 goto error;
         }
   
@@@ -2161,10 -2191,6 +2191,6 @@@
                 device->can_discard = 1;
         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
         device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       device->devid = BTRFS_DEV_REPLACE_DEVID;
-       spin_lock_init(&device->io_lock);
         device->generation = 0;
         device->io_width = root->sectorsize;
         device->io_align = root->sectorsize;
@@@ -2971,10 -2997,6 +2997,6 @@@ again
                 if (found_key.objectid != key.objectid)
                         break;
   
-               /* chunk zero is special */
-               if (found_key.offset == 0)
-                       break;
- 
                 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
   
                 if (!counting) {
@@@ -3010,6 -3032,8 +3032,8 @@@
                         spin_unlock(&fs_info->balance_lock);
                 }
   loop:
+               if (found_key.offset == 0)
+                       break;
                 key.offset = found_key.offset - 1;
         }
   
@@@ -3074,9 -3098,6 +3098,6 @@@ static void __cancel_balance(struct btr
         atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
   }
   
- void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
-                              struct btrfs_ioctl_balance_args *bargs);
- 
   /*
    * Should be called with both balance and volume mutexes held
    */
@@@ -3139,7 -3160,7 +3160,7 @@@ int btrfs_balance(struct btrfs_balance_
              (bctl->data.target & ~allowed))) {
                 printk(KERN_ERR "btrfs: unable to start balance with target "
                        "data profile %llu\n",
-                      (unsigned long long)bctl->data.target);
+                      bctl->data.target);
                 ret = -EINVAL;
                 goto out;
         }
@@@ -3148,7 -3169,7 +3169,7 @@@
              (bctl->meta.target & ~allowed))) {
                 printk(KERN_ERR "btrfs: unable to start balance with target "
                        "metadata profile %llu\n",
-                      (unsigned long long)bctl->meta.target);
+                      bctl->meta.target);
                 ret = -EINVAL;
                 goto out;
         }
@@@ -3157,7 -3178,7 +3178,7 @@@
              (bctl->sys.target & ~allowed))) {
                 printk(KERN_ERR "btrfs: unable to start balance with target "
                        "system profile %llu\n",
-                      (unsigned long long)bctl->sys.target);
+                      bctl->sys.target);
                 ret = -EINVAL;
                 goto out;
         }
@@@ -3302,7 -3323,7 +3323,7 @@@ int btrfs_resume_balance_async(struct b
         }
   
         tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
- -      return PTR_RET(tsk);
+ +      return PTR_ERR_OR_ZERO(tsk);
   }
   
   int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
@@@ -3430,6 -3451,264 +3451,264 @@@ int btrfs_cancel_balance(struct btrfs_f
         return 0;
   }
   
+ static int btrfs_uuid_scan_kthread(void *data)
+ {
+       struct btrfs_fs_info *fs_info = data;
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_key key;
+       struct btrfs_key max_key;
+       struct btrfs_path *path = NULL;
+       int ret = 0;
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_root_item root_item;
+       u32 item_size;
+       struct btrfs_trans_handle *trans = NULL;
+ 
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       key.objectid = 0;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = 0;
+ 
+       max_key.objectid = (u64)-1;
+       max_key.type = BTRFS_ROOT_ITEM_KEY;
+       max_key.offset = (u64)-1;
+ 
+       path->keep_locks = 1;
+ 
+       while (1) {
+               ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+               if (ret) {
+                       if (ret > 0)
+                               ret = 0;
+                       break;
+               }
+ 
+               if (key.type != BTRFS_ROOT_ITEM_KEY ||
+                   (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
+                    key.objectid != BTRFS_FS_TREE_OBJECTID) ||
+                   key.objectid > BTRFS_LAST_FREE_OBJECTID)
+                       goto skip;
+ 
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               item_size = btrfs_item_size_nr(eb, slot);
+               if (item_size < sizeof(root_item))
+                       goto skip;
+ 
+               read_extent_buffer(eb, &root_item,
+                                  btrfs_item_ptr_offset(eb, slot),
+                                  (int)sizeof(root_item));
+               if (btrfs_root_refs(&root_item) == 0)
+                       goto skip;
+ 
+               if (!btrfs_is_empty_uuid(root_item.uuid) ||
+                   !btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       if (trans)
+                               goto update_tree;
+ 
+                       btrfs_release_path(path);
+                       /*
+                        * 1 - subvol uuid item
+                        * 1 - received_subvol uuid item
+                        */
+                       trans = btrfs_start_transaction(fs_info->uuid_root, 2);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               break;
+                       }
+                       continue;
+               } else {
+                       goto skip;
+               }
+ update_tree:
+               if (!btrfs_is_empty_uuid(root_item.uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.uuid,
+                                                 BTRFS_UUID_KEY_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+ 
+               if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.received_uuid,
+                                                BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+ 
+ skip:
+               if (trans) {
+                       ret = btrfs_end_transaction(trans, fs_info->uuid_root);
+                       trans = NULL;
+                       if (ret)
+                               break;
+               }
+ 
+               btrfs_release_path(path);
+               if (key.offset < (u64)-1) {
+                       key.offset++;
+               } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+               } else if (key.objectid < (u64)-1) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+                       key.objectid++;
+               } else {
+                       break;
+               }
+               cond_resched();
+       }
+ 
+ out:
+       btrfs_free_path(path);
+       if (trans && !IS_ERR(trans))
+               btrfs_end_transaction(trans, fs_info->uuid_root);
+       if (ret)
+               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+       else
+               fs_info->update_uuid_tree_gen = 1;
+       up(&fs_info->uuid_tree_rescan_sem);
+       return 0;
+ }
+ 
+ /*
+  * Callback for btrfs_uuid_tree_iterate().
+  * returns:
+  * 0  check succeeded, the entry is not outdated.
+  * < 0        if an error occured.
+  * > 0        if the check failed, which means the caller shall remove the entry.
+  */
+ static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
+                                      u8 *uuid, u8 type, u64 subid)
+ {
+       struct btrfs_key key;
+       int ret = 0;
+       struct btrfs_root *subvol_root;
+ 
+       if (type != BTRFS_UUID_KEY_SUBVOL &&
+           type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+               goto out;
+ 
+       key.objectid = subid;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(subvol_root)) {
+               ret = PTR_ERR(subvol_root);
+               if (ret == -ENOENT)
+                       ret = 1;
+               goto out;
+       }
+ 
+       switch (type) {
+       case BTRFS_UUID_KEY_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.received_uuid,
+                          BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       }
+ 
+ out:
+       return ret;
+ }
+ 
+ static int btrfs_uuid_rescan_kthread(void *data)
+ {
+       struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
+       int ret;
+ 
+       /*
+        * 1st step is to iterate through the existing UUID tree and
+        * to delete all entries that contain outdated data.
+        * 2nd step is to add all missing entries to the UUID tree.
+        */
+       ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
+       if (ret < 0) {
+               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               up(&fs_info->uuid_tree_rescan_sem);
+               return ret;
+       }
+       return btrfs_uuid_scan_kthread(data);
+ }
+ 
+ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *uuid_root;
+       struct task_struct *task;
+       int ret;
+ 
+       /*
+        * 1 - root node
+        * 1 - root item
+        */
+       trans = btrfs_start_transaction(tree_root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+ 
+       uuid_root = btrfs_create_tree(trans, fs_info,
+                                     BTRFS_UUID_TREE_OBJECTID);
+       if (IS_ERR(uuid_root)) {
+               btrfs_abort_transaction(trans, tree_root,
+                                       PTR_ERR(uuid_root));
+               return PTR_ERR(uuid_root);
+       }
+ 
+       fs_info->uuid_root = uuid_root;
+ 
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+ 
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_scan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+ 
+       return 0;
+ }
+ 
+ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
+ {
+       struct task_struct *task;
+ 
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+ 
+       return 0;
+ }
+ 
   /*
    * shrinking a device means finding all of the device extents past
    * the new size, and then following the back refs to the chunks.
@@@ -4194,13 -4473,13 +4473,13 @@@ int btrfs_num_copies(struct btrfs_fs_in
          * and exit, so return 1 so the callers don't try to use other copies.
          */
         if (!em) {
-               btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+               btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
                             logical+len);
                 return 1;
         }
   
         if (em->start > logical || em->start + em->len < logical) {
-               btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+               btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
                             "%Lu-%Lu\n", logical, logical+len, em->start,
                             em->start + em->len);
                 return 1;
@@@ -4375,8 -4654,7 +4654,7 @@@ static int __btrfs_map_block(struct btr
   
         if (!em) {
                 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)*length);
+                       logical, *length);
                 return -EINVAL;
         }
   
@@@ -4671,6 -4949,7 +4949,7 @@@
         }
         bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
         if (!bbio) {
+               kfree(raid_map);
                 ret = -ENOMEM;
                 goto out;
         }
@@@ -5246,9 -5525,7 +5525,7 @@@ int btrfs_map_bio(struct btrfs_root *ro
   
         if (map_length < length) {
                 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)length,
-                       (unsigned long long)map_length);
+                       logical, length, map_length);
                 BUG();
         }
   
@@@ -5314,23 -5591,72 +5591,72 @@@ static struct btrfs_device *add_missing
         struct btrfs_device *device;
         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
   
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device)
+       device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+       if (IS_ERR(device))
                 return NULL;
-       list_add(&device->dev_list,
-                &fs_devices->devices);
-       device->devid = devid;
-       device->work.func = pending_bios_fn;
+ 
+       list_add(&device->dev_list, &fs_devices->devices);
         device->fs_devices = fs_devices;
-       device->missing = 1;
         fs_devices->num_devices++;
+ 
+       device->missing = 1;
         fs_devices->missing_devices++;
-       spin_lock_init(&device->io_lock);
-       INIT_LIST_HEAD(&device->dev_alloc_list);
-       memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
+ 
         return device;
   }
   
+ /**
+  * btrfs_alloc_device - allocate struct btrfs_device
+  * @fs_info:  used only for generating a new devid, can be NULL if
+  *            devid is provided (i.e. @devid != NULL).
+  * @devid:    a pointer to devid for this device.  If NULL a new devid
+  *            is generated.
+  * @uuid:     a pointer to UUID for this device.  If NULL a new UUID
+  *            is generated.
+  *
+  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
+  * on error.  Returned struct is not linked onto any lists and can be
+  * destroyed with kfree() right away.
+  */
+ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+                                       const u64 *devid,
+                                       const u8 *uuid)
+ {
+       struct btrfs_device *dev;
+       u64 tmp;
+ 
+       if (!devid && !fs_info) {
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+ 
+       dev = __alloc_device();
+       if (IS_ERR(dev))
+               return dev;
+ 
+       if (devid)
+               tmp = *devid;
+       else {
+               int ret;
+ 
+               ret = find_next_devid(fs_info, &tmp);
+               if (ret) {
+                       kfree(dev);
+                       return ERR_PTR(ret);
+               }
+       }
+       dev->devid = tmp;
+ 
+       if (uuid)
+               memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
+       else
+               generate_random_uuid(dev->uuid);
+ 
+       dev->work.func = pending_bios_fn;
+ 
+       return dev;
+ }
+ 
   static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                           struct extent_buffer *leaf,
                           struct btrfs_chunk *chunk)
@@@ -5437,7 -5763,7 +5763,7 @@@ static void fill_device_from_item(struc
         WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
         device->is_tgtdev_for_dev_replace = 0;
   
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
         read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
   }
   
@@@ -5500,11 -5826,9 +5826,9 @@@ static int read_one_dev(struct btrfs_ro
         u8 dev_uuid[BTRFS_UUID_SIZE];
   
         devid = btrfs_device_id(leaf, dev_item);
-       read_extent_buffer(leaf, dev_uuid,
-                          (unsigned long)btrfs_device_uuid(dev_item),
+       read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                            BTRFS_UUID_SIZE);
-       read_extent_buffer(leaf, fs_uuid,
-                          (unsigned long)btrfs_device_fsid(dev_item),
+       read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                            BTRFS_UUID_SIZE);
   
         if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@@ -5519,8 -5843,7 +5843,7 @@@
                         return -EIO;
   
                 if (!device) {
-                       btrfs_warn(root->fs_info, "devid %llu missing",
-                               (unsigned long long)devid);
+                       btrfs_warn(root->fs_info, "devid %llu missing", devid);
                         device = add_missing_dev(root, devid, dev_uuid);
                         if (!device)
                                 return -ENOMEM;
@@@ -5644,14 -5967,15 +5967,15 @@@ int btrfs_read_chunk_tree(struct btrfs_
         mutex_lock(&uuid_mutex);
         lock_chunks(root);
   
-       /* first we search for all of the device items, and then we
-        * read in all of the chunk items.  This way we can create chunk
-        * mappings that reference all of the devices that are afound
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+        * is smaller than the lowest possible object id for a chunk
+        * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
          */
         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
         key.offset = 0;
         key.type = 0;
- again:
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
         if (ret < 0)
                 goto error;
@@@ -5667,17 -5991,13 +5991,13 @@@
                         break;
                 }
                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
-               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-                       if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
-                               break;
-                       if (found_key.type == BTRFS_DEV_ITEM_KEY) {
-                               struct btrfs_dev_item *dev_item;
-                               dev_item = btrfs_item_ptr(leaf, slot,
+               if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                       struct btrfs_dev_item *dev_item;
+                       dev_item = btrfs_item_ptr(leaf, slot,
                                                   struct btrfs_dev_item);
-                               ret = read_one_dev(root, leaf, dev_item);
-                               if (ret)
-                                       goto error;
-                       }
+                       ret = read_one_dev(root, leaf, dev_item);
+                       if (ret)
+                               goto error;
                 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                         struct btrfs_chunk *chunk;
                         chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@@ -5687,11 -6007,6 +6007,6 @@@
                 }
                 path->slots[0]++;
         }
-       if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-               key.objectid = 0;
-               btrfs_release_path(path);
-               goto again;
-       }
         ret = 0;
   error:
         unlock_chunks(root);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
		1	2
fs/btrfs/dev-replace.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/send.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history