]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 16:58:51 +0000 (09:58 -0700)
Pull btrfs updates from Chris Mason:
 "This is against 3.11-rc7, but was pulled and tested against your tree
  as of yesterday.  We do have two small incrementals queued up, but I
  wanted to get this bunch out the door before I hop on an airplane.

  This is a fairly large batch of fixes, performance improvements, and
  cleanups from the usual Btrfs suspects.

  We've included Stefan Behren's work to index subvolume UUIDs, which is
  targeted at speeding up send/receive with many subvolumes or snapshots
  in place.  It closes a long standing performance issue that was built
  in to the disk format.

  Mark Fasheh's offline dedup work is also here.  In this case offline
  means the FS is mounted and active, but the dedup work is not done
  inline during file IO.  This is a building block where utilities are
  able to ask the FS to dedup a series of extents.  The kernel takes
  care of verifying the data involved really is the same.  Today this
  involves reading both extents, but we'll continue to evolve the
  patches"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits)
  Btrfs: optimize key searches in btrfs_search_slot
  Btrfs: don't use an async starter for most of our workers
  Btrfs: only update disk_i_size as we remove extents
  Btrfs: fix deadlock in uuid scan kthread
  Btrfs: stop refusing the relocation of chunk 0
  Btrfs: fix memory leak of uuid_root in free_fs_info
  btrfs: reuse kbasename helper
  btrfs: return btrfs error code for dev excl ops err
  Btrfs: allow partial ordered extent completion
  Btrfs: convert all bug_ons in free-space-cache.c
  Btrfs: add support for asserts
  Btrfs: adjust the fs_devices->missing count on unmount
  Btrf: cleanup: don't check for root_refs == 0 twice
  Btrfs: fix for patch "cleanup: don't check the same thing twice"
  Btrfs: get rid of one BUG() in write_all_supers()
  Btrfs: allocate prelim_ref with a slab allocater
  Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC
  Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl
  Btrfs: fix race between removing a dev and writing sbs
  Btrfs: remove ourselves from the cluster list under lock
  ...

1  2 
fs/btrfs/dev-replace.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/send.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/dev-replace.c
index 5f8f3341c099ecd088226726ac8981908868ac00,af800ef677a0d334b0ec84fbae4e0c669e91780d..a64435359385e86a483f30696c932da2c8d5bdb0
@@@ -148,13 -148,13 +148,13 @@@ no_valid_dev_replace_entry_found
                    !btrfs_test_opt(dev_root, DEGRADED)) {
                        ret = -EIO;
                        pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
-                               (unsigned long long)src_devid);
+                               src_devid);
                }
                if (!dev_replace->tgtdev &&
                    !btrfs_test_opt(dev_root, DEGRADED)) {
                        ret = -EIO;
                        pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
-                               (unsigned long long)BTRFS_DEV_REPLACE_DEVID);
+                               BTRFS_DEV_REPLACE_DEVID);
                }
                if (dev_replace->tgtdev) {
                        if (dev_replace->srcdev) {
@@@ -747,7 -747,7 +747,7 @@@ int btrfs_resume_dev_replace_async(stru
        WARN_ON(atomic_xchg(
                &fs_info->mutually_exclusive_operation_running, 1));
        task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
 -      return PTR_RET(task);
 +      return PTR_ERR_OR_ZERO(task);
  }
  
  static int btrfs_dev_replace_kthread(void *data)
diff --combined fs/btrfs/file.c
index 4d2eb6417145964c8731bf5e56673a0e816d59aa,5ba87b0d2ef86d1563705df7ae62f48e59b7cbae..bc5072b2db537f0f27af1851532b7417b41a8489
@@@ -1334,7 -1334,6 +1334,6 @@@ fail
  static noinline int check_can_nocow(struct inode *inode, loff_t pos,
                                    size_t *write_bytes)
  {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ordered_extent *ordered;
        u64 lockstart, lockend;
                btrfs_put_ordered_extent(ordered);
        }
  
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-               return PTR_ERR(trans);
-       }
        num_bytes = lockend - lockstart + 1;
-       ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
-                              NULL);
-       btrfs_end_transaction(trans, root);
+       ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
        if (ret <= 0) {
                ret = 0;
        } else {
@@@ -1727,7 -1718,7 +1718,7 @@@ static ssize_t btrfs_file_aio_write(str
         */
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
 -      if (num_written > 0 || num_written == -EIOCBQUEUED) {
 +      if (num_written > 0) {
                err = generic_write_sync(file, pos, num_written);
                if (err < 0 && num_written > 0)
                        num_written = err;
diff --combined fs/btrfs/inode.c
index 7bdc83d04d54ca36006ae54e221092f2d789ef0c,6091ba9d249419765da7cc1ea692c4e9fab21de0..db1e4394857906e55000e9b0f85714a67e59260a
@@@ -230,12 -230,13 +230,13 @@@ fail
   * does the checks required to make sure the data is small enough
   * to fit as an inline extent.
   */
- static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                struct inode *inode, u64 start, u64 end,
-                                size_t compressed_size, int compress_type,
-                                struct page **compressed_pages)
+ static noinline int cow_file_range_inline(struct btrfs_root *root,
+                                         struct inode *inode, u64 start,
+                                         u64 end, size_t compressed_size,
+                                         int compress_type,
+                                         struct page **compressed_pages)
  {
+       struct btrfs_trans_handle *trans;
        u64 isize = i_size_read(inode);
        u64 actual_end = min(end + 1, isize);
        u64 inline_len = actual_end - start;
                return 1;
        }
  
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
        ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
-       if (ret)
-               return ret;
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out;
+       }
  
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
                                   compress_type, compressed_pages);
        if (ret && ret != -ENOSPC) {
                btrfs_abort_transaction(trans, root, ret);
-               return ret;
+               goto out;
        } else if (ret == -ENOSPC) {
-               return 1;
+               ret = 1;
+               goto out;
        }
  
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
        btrfs_delalloc_release_metadata(inode, end + 1 - start);
        btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
-       return 0;
+ out:
+       btrfs_end_transaction(trans, root);
+       return ret;
  }
  
  struct async_extent {
@@@ -343,7 -354,6 +354,6 @@@ static noinline int compress_file_range
                                        int *num_added)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        u64 num_bytes;
        u64 blocksize = root->sectorsize;
        u64 actual_end;
@@@ -461,45 -471,36 +471,36 @@@ again
        }
  cont:
        if (start == 0) {
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-                       trans = NULL;
-                       goto cleanup_and_out;
-               }
-               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                /* lets try to make an inline extent */
                if (ret || total_in < (actual_end - start)) {
                        /* we didn't compress the entire range, try
                         * to make an uncompressed inline extent.
                         */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end, 0, 0, NULL);
+                       ret = cow_file_range_inline(root, inode, start, end,
+                                                   0, 0, NULL);
                } else {
                        /* try making a compressed inline extent */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end,
+                       ret = cow_file_range_inline(root, inode, start, end,
                                                    total_compressed,
                                                    compress_type, pages);
                }
                if (ret <= 0) {
+                       unsigned long clear_flags = EXTENT_DELALLOC |
+                               EXTENT_DEFRAG;
+                       clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
                        /*
                         * inline extent creation worked or returned error,
                         * we don't need to create any more async work items.
                         * Unlock and free up our temp pages.
                         */
-                       extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, NULL,
-                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
-                       btrfs_end_transaction(trans, root);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                                    clear_flags, PAGE_UNLOCK |
+                                                    PAGE_CLEAR_DIRTY |
+                                                    PAGE_SET_WRITEBACK |
+                                                    PAGE_END_WRITEBACK);
                        goto free_pages_out;
                }
-               btrfs_end_transaction(trans, root);
        }
  
        if (will_compress) {
@@@ -590,20 -591,6 +591,6 @@@ free_pages_out
        kfree(pages);
  
        goto out;
- cleanup_and_out:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
-       if (!trans || IS_ERR(trans))
-               btrfs_error(root->fs_info, ret, "Failed to join transaction");
-       else
-               btrfs_abort_transaction(trans, root, ret);
-       goto free_pages_out;
  }
  
  /*
@@@ -617,7 -604,6 +604,6 @@@ static noinline int submit_compressed_e
  {
        struct async_extent *async_extent;
        u64 alloc_hint = 0;
-       struct btrfs_trans_handle *trans;
        struct btrfs_key ins;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@@ -678,20 -664,10 +664,10 @@@ retry
                lock_extent(io_tree, async_extent->start,
                            async_extent->start + async_extent->ram_size - 1);
  
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-               } else {
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_reserve_extent(trans, root,
+               ret = btrfs_reserve_extent(root,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1);
-                       if (ret && ret != -ENOSPC)
-                               btrfs_abort_transaction(trans, root, ret);
-                       btrfs_end_transaction(trans, root);
-               }
                if (ret) {
                        int i;
  
                /*
                 * clear dirty, set writeback and unlock the pages.
                 */
-               extent_clear_unlock_delalloc(inode,
-                               &BTRFS_I(inode)->io_tree,
-                               async_extent->start,
+               extent_clear_unlock_delalloc(inode, async_extent->start,
                                async_extent->start +
                                async_extent->ram_size - 1,
-                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK |
-                               EXTENT_CLEAR_DELALLOC |
-                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
+                               NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
+                               PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                               PAGE_SET_WRITEBACK);
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
                                    async_extent->ram_size,
@@@ -798,16 -770,13 +770,13 @@@ out
  out_free_reserve:
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
  out_free:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    async_extent->start,
+       extent_clear_unlock_delalloc(inode, async_extent->start,
                                     async_extent->start +
                                     async_extent->ram_size - 1,
-                                    NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                                    NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
        kfree(async_extent);
        goto again;
  }
@@@ -857,14 -826,13 +826,13 @@@ static u64 get_extent_allocation_hint(s
   * required to start IO on it.  It may be clean and already done with
   * IO when we return.
   */
- static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
-                                    struct inode *inode,
-                                    struct btrfs_root *root,
-                                    struct page *locked_page,
-                                    u64 start, u64 end, int *page_started,
-                                    unsigned long *nr_written,
-                                    int unlock)
+ static noinline int cow_file_range(struct inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+                                  unsigned long *nr_written,
+                                  int unlock)
  {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 alloc_hint = 0;
        u64 num_bytes;
        unsigned long ram_size;
        /* if this is a small write inside eof, kick off defrag */
        if (num_bytes < 64 * 1024 &&
            (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
-               btrfs_add_inode_defrag(trans, inode);
+               btrfs_add_inode_defrag(NULL, inode);
  
        if (start == 0) {
                /* lets try to make an inline extent */
-               ret = cow_file_range_inline(trans, root, inode,
-                                           start, end, 0, 0, NULL);
+               ret = cow_file_range_inline(root, inode, start, end, 0, 0,
+                                           NULL);
                if (ret == 0) {
-                       extent_clear_unlock_delalloc(inode,
-                                    &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                    EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG, PAGE_UNLOCK |
+                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+                                    PAGE_END_WRITEBACK);
  
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
                        goto out;
                } else if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
                        goto out_unlock;
                }
        }
                unsigned long op;
  
                cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                                           &ins, 1);
-               if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
+               if (ret < 0)
                        goto out_unlock;
-               }
  
                em = alloc_extent_map();
                if (!em) {
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, start,
                                                      cur_alloc_size);
-                       if (ret) {
-                               btrfs_abort_transaction(trans, root, ret);
+                       if (ret)
                                goto out_reserve;
-                       }
                }
  
                if (disk_num_bytes < cur_alloc_size)
                 * Do set the Private2 bit so we know this page was properly
                 * setup for writepage
                 */
-               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
-               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                       EXTENT_SET_PRIVATE2;
+               op = unlock ? PAGE_UNLOCK : 0;
+               op |= PAGE_SET_PRIVATE2;
  
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                            start, start + ram_size - 1,
-                                            locked_page, op);
+               extent_clear_unlock_delalloc(inode, start,
+                                            start + ram_size - 1, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC,
+                                            op);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@@ -1008,52 -967,14 +967,14 @@@ out
  out_reserve:
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
  out_unlock:
-       extent_clear_unlock_delalloc(inode,
-                    &BTRFS_I(inode)->io_tree,
-                    start, end, locked_page,
-                    EXTENT_CLEAR_UNLOCK_PAGE |
-                    EXTENT_CLEAR_UNLOCK |
-                    EXTENT_CLEAR_DELALLOC |
-                    EXTENT_CLEAR_DIRTY |
-                    EXTENT_SET_WRITEBACK |
-                    EXTENT_END_WRITEBACK);
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                    EXTENT_DELALLOC | EXTENT_DEFRAG,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
        goto out;
  }
  
- static noinline int cow_file_range(struct inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written,
-                                  int unlock)
- {
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-               return PTR_ERR(trans);
-       }
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-       ret = __cow_file_range(trans, inode, root, locked_page, start, end,
-                              page_started, nr_written, unlock);
-       btrfs_end_transaction(trans, root);
-       return ret;
- }
  /*
   * work queue call back to started compression on a file and pages
   */
@@@ -1221,15 -1142,13 +1142,13 @@@ static noinline int run_delalloc_nocow(
  
        path = btrfs_alloc_path();
        if (!path) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                return -ENOMEM;
        }
  
                trans = btrfs_join_transaction(root);
  
        if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                btrfs_free_path(path);
                return PTR_ERR(trans);
        }
@@@ -1369,9 -1286,9 +1286,9 @@@ out_check
  
                btrfs_release_path(path);
                if (cow_start != (u64)-1) {
-                       ret = __cow_file_range(trans, inode, root, locked_page,
-                                              cow_start, found_key.offset - 1,
-                                              page_started, nr_written, 1);
+                       ret = cow_file_range(inode, locked_page,
+                                            cow_start, found_key.offset - 1,
+                                            page_started, nr_written, 1);
                        if (ret) {
                                btrfs_abort_transaction(trans, root, ret);
                                goto error;
                        }
                }
  
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                               cur_offset, cur_offset + num_bytes - 1,
-                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                               EXTENT_SET_PRIVATE2);
+               extent_clear_unlock_delalloc(inode, cur_offset,
+                                            cur_offset + num_bytes - 1,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC, PAGE_UNLOCK |
+                                            PAGE_SET_PRIVATE2);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
        }
  
        if (cow_start != (u64)-1) {
-               ret = __cow_file_range(trans, inode, root, locked_page,
-                                      cow_start, end,
-                                      page_started, nr_written, 1);
+               ret = cow_file_range(inode, locked_page, cow_start, end,
+                                    page_started, nr_written, 1);
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto error;
@@@ -1460,16 -1376,13 +1376,13 @@@ error
                ret = err;
  
        if (ret && cur_offset < end)
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            cur_offset, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, cur_offset, end,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC | EXTENT_DEFRAG |
+                                            EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
        btrfs_free_path(path);
        return ret;
  }
@@@ -2132,6 -2045,7 +2045,7 @@@ static noinline int record_one_backref(
                WARN_ON(1);
                return ret;
        }
+       ret = 0;
  
        while (1) {
                cond_resched();
                    old->len || extent_offset + num_bytes <=
                    old->extent_offset + old->offset)
                        continue;
-               ret = 0;
                break;
        }
  
@@@ -2238,16 -2150,18 +2150,18 @@@ static noinline bool record_extent_back
  
  static int relink_is_mergable(struct extent_buffer *leaf,
                              struct btrfs_file_extent_item *fi,
-                             u64 disk_bytenr)
+                             struct new_sa_defrag_extent *new)
  {
-       if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+       if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
                return 0;
  
        if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
                return 0;
  
-       if (btrfs_file_extent_compression(leaf, fi) ||
-           btrfs_file_extent_encryption(leaf, fi) ||
+       if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
+               return 0;
+       if (btrfs_file_extent_encryption(leaf, fi) ||
            btrfs_file_extent_other_encoding(leaf, fi))
                return 0;
  
@@@ -2391,8 -2305,8 +2305,8 @@@ again
                                    struct btrfs_file_extent_item);
                extent_len = btrfs_file_extent_num_bytes(leaf, fi);
  
-               if (relink_is_mergable(leaf, fi, new->bytenr) &&
-                   extent_len + found_key.offset == start) {
+               if (extent_len + found_key.offset == start &&
+                   relink_is_mergable(leaf, fi, new)) {
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        extent_len + len);
                        btrfs_mark_buffer_dirty(leaf);
@@@ -2648,8 -2562,10 +2562,10 @@@ static int btrfs_finish_ordered_io(stru
        struct extent_state *cached_state = NULL;
        struct new_sa_defrag_extent *new = NULL;
        int compress_type = 0;
-       int ret;
+       int ret = 0;
+       u64 logical_len = ordered_extent->len;
        bool nolock;
+       bool truncated = false;
  
        nolock = btrfs_is_free_space_inode(inode);
  
                goto out;
        }
  
+       if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+               truncated = true;
+               logical_len = ordered_extent->truncated_len;
+               /* Truncated the entire extent, don't bother adding */
+               if (!logical_len)
+                       goto out;
+       }
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
                btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                ret = btrfs_mark_extent_written(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
-                                               ordered_extent->len);
+                                               logical_len);
        } else {
                BUG_ON(root == root->fs_info->tree_root);
                ret = insert_reserved_file_extent(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->start,
                                                ordered_extent->disk_len,
-                                               ordered_extent->len,
-                                               ordered_extent->len,
+                                               logical_len, logical_len,
                                                compress_type, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
        }
        if (trans)
                btrfs_end_transaction(trans, root);
  
-       if (ret) {
-               clear_extent_uptodate(io_tree, ordered_extent->file_offset,
-                                     ordered_extent->file_offset +
-                                     ordered_extent->len - 1, NULL, GFP_NOFS);
+       if (ret || truncated) {
+               u64 start, end;
+               if (truncated)
+                       start = ordered_extent->file_offset + logical_len;
+               else
+                       start = ordered_extent->file_offset;
+               end = ordered_extent->file_offset + ordered_extent->len - 1;
+               clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
+               /* Drop the cache for the part of the extent we didn't write. */
+               btrfs_drop_extent_cache(inode, start, end, 0);
  
                /*
                 * If the ordered extent had an IOERR or something else went
                 * wrong we need to return the space for this ordered extent
-                * back to the allocator.
+                * back to the allocator.  We only free the extent in the
+                * truncated case if we didn't write out the extent at all.
                 */
-               if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+               if ((ret || !logical_len) &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
                    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
                        btrfs_free_reserved_extent(root, ordered_extent->start,
                                                   ordered_extent->disk_len);
@@@ -2827,16 -2760,16 +2760,16 @@@ static int btrfs_writepage_end_io_hook(
   * if there's a match, we allow the bio to finish.  If not, the code in
   * extent_io.c will try to find good copies for us.
   */
- static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state, int mirror)
+ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
  {
        size_t offset = start - page_offset(page);
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        char *kaddr;
-       u64 private = ~(u32)0;
-       int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       u32 csum_expected;
        u32 csum = ~(u32)0;
        static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
                return 0;
        }
  
-       if (state && state->start == start) {
-               private = state->private;
-               ret = 0;
-       } else {
-               ret = get_state_private(io_tree, start, &private);
-       }
-       kaddr = kmap_atomic(page);
-       if (ret)
-               goto zeroit;
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
  
+       kaddr = kmap_atomic(page);
        csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
        btrfs_csum_final(csum, (char *)&csum);
-       if (csum != private)
+       if (csum != csum_expected)
                goto zeroit;
  
        kunmap_atomic(kaddr);
@@@ -2877,14 -2804,12 +2804,12 @@@ good
  
  zeroit:
        if (__ratelimit(&_rs))
-               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
-                       (unsigned long long)btrfs_ino(page->mapping->host),
-                       (unsigned long long)start, csum,
-                       (unsigned long long)private);
+               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                       btrfs_ino(page->mapping->host), start, csum, csum_expected);
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
-       if (private == 0)
+       if (csum_expected == 0)
                return 0;
        return -EIO;
  }
@@@ -2971,8 -2896,10 +2896,10 @@@ void btrfs_orphan_commit_root(struct bt
            btrfs_root_refs(&root->root_item) > 0) {
                ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
                                            root->root_key.objectid);
-               BUG_ON(ret);
-               root->orphan_item_inserted = 0;
+               if (ret)
+                       btrfs_abort_transaction(trans, root, ret);
+               else
+                       root->orphan_item_inserted = 0;
        }
  
        if (block_rsv) {
@@@ -3041,11 -2968,18 +2968,18 @@@ int btrfs_orphan_add(struct btrfs_trans
        /* insert an orphan item to track this unlinked/truncated file */
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
-               if (ret && ret != -EEXIST) {
-                       clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-                                 &BTRFS_I(inode)->runtime_flags);
-                       btrfs_abort_transaction(trans, root, ret);
-                       return ret;
+               if (ret) {
+                       if (reserve) {
+                               clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_orphan_release_metadata(inode);
+                       }
+                       if (ret != -EEXIST) {
+                               clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_abort_transaction(trans, root, ret);
+                               return ret;
+                       }
                }
                ret = 0;
        }
@@@ -3084,17 -3018,15 +3018,15 @@@ static int btrfs_orphan_del(struct btrf
                release_rsv = 1;
        spin_unlock(&root->orphan_lock);
  
-       if (trans && delete_item) {
+       if (trans && delete_item)
                ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-               BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
-       }
  
        if (release_rsv) {
                btrfs_orphan_release_metadata(inode);
                atomic_dec(&root->orphan_inodes);
        }
  
-       return 0;
+       return ret;
  }
  
  /*
@@@ -3174,7 -3106,7 +3106,7 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
 -              ret = PTR_RET(inode);
 +              ret = PTR_ERR_OR_ZERO(inode);
                if (ret && ret != -ESTALE)
                        goto out;
  
                                found_key.objectid);
                        ret = btrfs_del_orphan_item(trans, root,
                                                    found_key.objectid);
-                       BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
                        btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
                        continue;
                }
  
@@@ -3657,8 -3590,7 +3590,7 @@@ static int __btrfs_unlink_inode(struct 
        if (ret) {
                btrfs_info(root->fs_info,
                        "failed to delete reference to %.*s, inode %llu parent %llu",
-                       name_len, name,
-                       (unsigned long long)ino, (unsigned long long)dir_ino);
+                       name_len, name, ino, dir_ino);
                btrfs_abort_transaction(trans, root, ret);
                goto err;
        }
@@@ -3929,6 -3861,7 +3861,7 @@@ int btrfs_truncate_inode_items(struct b
        u64 extent_num_bytes = 0;
        u64 extent_offset = 0;
        u64 item_end = 0;
+       u64 last_size = (u64)-1;
        u32 found_type = (u8)-1;
        int found_extent;
        int del_item;
@@@ -4026,6 -3959,11 +3959,11 @@@ search_again
                if (found_type != BTRFS_EXTENT_DATA_KEY)
                        goto delete;
  
+               if (del_item)
+                       last_size = found_key.offset;
+               else
+                       last_size = new_size;
                if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                        u64 num_dec;
                        extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@@ -4137,6 -4075,8 +4075,8 @@@ out
                        btrfs_abort_transaction(trans, root, ret);
        }
  error:
+       if (last_size != (u64)-1)
+               btrfs_ordered_update_i_size(inode, last_size, NULL);
        btrfs_free_path(path);
        return err;
  }
@@@ -4465,8 -4405,26 +4405,26 @@@ static int btrfs_setsize(struct inode *
                btrfs_inode_resume_unlocked_dio(inode);
  
                ret = btrfs_truncate(inode);
-               if (ret && inode->i_nlink)
-                       btrfs_orphan_del(NULL, inode);
+               if (ret && inode->i_nlink) {
+                       int err;
+                       /*
+                        * failed to truncate, disk_i_size is only adjusted down
+                        * as we remove extents, so it should represent the true
+                        * size of the inode, so reset the in memory size and
+                        * delete our orphan entry.
+                        */
+                       trans = btrfs_join_transaction(root);
+                       if (IS_ERR(trans)) {
+                               btrfs_orphan_del(NULL, inode);
+                               return ret;
+                       }
+                       i_size_write(inode, BTRFS_I(inode)->disk_i_size);
+                       err = btrfs_orphan_del(trans, inode);
+                       if (err)
+                               btrfs_abort_transaction(trans, root, err);
+                       btrfs_end_transaction(trans, root);
+               }
        }
  
        return ret;
@@@ -4601,10 -4559,15 +4559,15 @@@ void btrfs_evict_inode(struct inode *in
  
        btrfs_free_block_rsv(root, rsv);
  
+       /*
+        * Errors here aren't a big deal, it just means we leave orphan items
+        * in the tree.  They will be cleaned up on the next mount.
+        */
        if (ret == 0) {
                trans->block_rsv = root->orphan_block_rsv;
-               ret = btrfs_orphan_del(trans, inode);
-               BUG_ON(ret);
+               btrfs_orphan_del(trans, inode);
+       } else {
+               btrfs_orphan_del(NULL, inode);
        }
  
        trans->block_rsv = &root->fs_info->trans_block_rsv;
@@@ -6161,10 -6124,7 +6124,7 @@@ insert
        btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
                btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
-                       (unsigned long long)em->start,
-                       (unsigned long long)em->len,
-                       (unsigned long long)start,
-                       (unsigned long long)len);
+                       em->start, em->len, start, len);
                err = -EIO;
                goto out;
        }
@@@ -6362,39 -6322,32 +6322,32 @@@ static struct extent_map *btrfs_new_ext
                                                  u64 start, u64 len)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
  
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               return ERR_CAST(trans);
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
        alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
                                   alloc_hint, &ins, 1);
-       if (ret) {
-               em = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               return ERR_PTR(ret);
  
        em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
                              ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em))
-               goto out;
+       if (IS_ERR(em)) {
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+               return em;
+       }
  
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
-               em = ERR_PTR(ret);
+               free_extent_map(em);
+               return ERR_PTR(ret);
        }
- out:
-       btrfs_end_transaction(trans, root);
        return em;
  }
  
   * returns 1 when the nocow is safe, < 1 on error, 0 if the
   * block must be cow'd
   */
- noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
-                             struct inode *inode, u64 offset, u64 *len,
+ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
                              u64 *ram_bytes)
  {
+       struct btrfs_trans_handle *trans;
        struct btrfs_path *path;
        int ret;
        struct extent_buffer *leaf;
        if (!path)
                return -ENOMEM;
  
-       ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
+       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
                                       offset, 0);
        if (ret < 0)
                goto out;
         * look for other files referencing this extent, if we
         * find any we must cow
         */
-       if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
-                                 key.offset - backref_offset, disk_bytenr))
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               ret = 0;
                goto out;
+       }
+       ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+                                   key.offset - backref_offset, disk_bytenr);
+       btrfs_end_transaction(trans, root);
+       if (ret) {
+               ret = 0;
+               goto out;
+       }
  
        /*
         * adjust disk_bytenr and num_bytes to cover just the bytes
@@@ -6633,7 -6596,6 +6596,6 @@@ static int btrfs_get_blocks_direct(stru
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-       struct btrfs_trans_handle *trans;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
  
                len = min(len, em->len - (start - em->start));
                block_start = em->block_start + (start - em->start);
  
-               /*
-                * we're not going to log anything, but we do need
-                * to make sure the current transaction stays open
-                * while we look for nocow cross refs
-                */
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans))
-                       goto must_cow;
-               if (can_nocow_extent(trans, inode, start, &len, &orig_start,
+               if (can_nocow_extent(inode, start, &len, &orig_start,
                                     &orig_block_len, &ram_bytes) == 1) {
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
                                                       block_start, len,
                                                       orig_block_len,
                                                       ram_bytes, type);
-                               if (IS_ERR(em)) {
-                                       btrfs_end_transaction(trans, root);
+                               if (IS_ERR(em))
                                        goto unlock_err;
-                               }
                        }
  
                        ret = btrfs_add_ordered_extent_dio(inode, start,
                                           block_start, len, len, type);
-                       btrfs_end_transaction(trans, root);
                        if (ret) {
                                free_extent_map(em);
                                goto unlock_err;
                        }
                        goto unlock;
                }
-               btrfs_end_transaction(trans, root);
        }
- must_cow:
        /*
         * this will cow the extent, reset the len in case we changed
         * it above
@@@ -6813,26 -6762,6 +6762,6 @@@ unlock_err
        return ret;
  }
  
- struct btrfs_dio_private {
-       struct inode *inode;
-       u64 logical_offset;
-       u64 disk_bytenr;
-       u64 bytes;
-       void *private;
-       /* number of bios pending for this dio */
-       atomic_t pending_bios;
-       /* IO errors */
-       int errors;
-       /* orig_bio is our btrfs_io_bio */
-       struct bio *orig_bio;
-       /* dio_bio came from fs/direct-io.c */
-       struct bio *dio_bio;
- };
  static void btrfs_endio_direct_read(struct bio *bio, int err)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct bio *dio_bio;
+       u32 *csums = (u32 *)dip->csum;
+       int index = 0;
        u64 start;
  
        start = dip->logical_offset;
                        struct page *page = bvec->bv_page;
                        char *kaddr;
                        u32 csum = ~(u32)0;
-                       u64 private = ~(u32)0;
                        unsigned long flags;
  
-                       if (get_state_private(&BTRFS_I(inode)->io_tree,
-                                             start, &private))
-                               goto failed;
                        local_irq_save(flags);
                        kaddr = kmap_atomic(page);
                        csum = btrfs_csum_data(kaddr + bvec->bv_offset,
                        local_irq_restore(flags);
  
                        flush_dcache_page(bvec->bv_page);
-                       if (csum != private) {
- failed:
-                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       (unsigned long long)start,
-                                       csum, (unsigned)private);
+                       if (csum != csums[index]) {
+                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                                         btrfs_ino(inode), start, csum,
+                                         csums[index]);
                                err = -EIO;
                        }
                }
  
                start += bvec->bv_len;
                bvec++;
+               index++;
        } while (bvec <= bvec_end);
  
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@@ -6956,7 -6882,7 +6882,7 @@@ static void btrfs_end_dio_bio(struct bi
        if (err) {
                printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
                      "sector %#Lx len %u err no %d\n",
-                     (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
+                     btrfs_ino(dip->inode), bio->bi_rw,
                      (unsigned long long)bio->bi_sector, bio->bi_size, err);
                dip->errors = 1;
  
@@@ -6992,6 -6918,7 +6918,7 @@@ static inline int __btrfs_submit_dio_bi
                                         int rw, u64 file_offset, int skip_sum,
                                         int async_submit)
  {
+       struct btrfs_dio_private *dip = bio->bi_private;
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
                if (ret)
                        goto err;
        } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+                                               file_offset);
                if (ret)
                        goto err;
        }
@@@ -7061,6 -6989,7 +6989,7 @@@ static int btrfs_submit_direct_hook(in
                bio_put(orig_bio);
                return -EIO;
        }
        if (map_length >= orig_bio->bi_size) {
                bio = orig_bio;
                goto submit;
@@@ -7156,19 -7085,28 +7085,28 @@@ static void btrfs_submit_direct(int rw
        struct btrfs_dio_private *dip;
        struct bio *io_bio;
        int skip_sum;
+       int sum_len;
        int write = rw & REQ_WRITE;
        int ret = 0;
+       u16 csum_size;
  
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
  
        io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
        if (!io_bio) {
                ret = -ENOMEM;
                goto free_ordered;
        }
  
-       dip = kmalloc(sizeof(*dip), GFP_NOFS);
+       if (!skip_sum && !write) {
+               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+               sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+               sum_len *= csum_size;
+       } else {
+               sum_len = 0;
+       }
+       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
        if (!dip) {
                ret = -ENOMEM;
                goto free_io_bio;
@@@ -7443,10 -7381,23 +7381,23 @@@ static void btrfs_invalidatepage(struc
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
                 */
-               if (TestClearPagePrivate2(page) &&
-                   btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
-                                                  PAGE_CACHE_SIZE, 1)) {
-                       btrfs_finish_ordered_io(ordered);
+               if (TestClearPagePrivate2(page)) {
+                       struct btrfs_ordered_inode_tree *tree;
+                       u64 new_len;
+                       tree = &BTRFS_I(inode)->ordered_tree;
+                       spin_lock_irq(&tree->lock);
+                       set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+                       new_len = page_start - ordered->file_offset;
+                       if (new_len < ordered->truncated_len)
+                               ordered->truncated_len = new_len;
+                       spin_unlock_irq(&tree->lock);
+                       if (btrfs_dec_test_ordered_pending(inode, &ordered,
+                                                          page_start,
+                                                          PAGE_CACHE_SIZE, 1))
+                               btrfs_finish_ordered_io(ordered);
                }
                btrfs_put_ordered_extent(ordered);
                cached_state = NULL;
@@@ -7612,7 -7563,6 +7563,6 @@@ static int btrfs_truncate(struct inode 
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
  
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
-       btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
  
        /*
         * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
@@@ -7876,7 -7826,7 +7826,7 @@@ void btrfs_destroy_inode(struct inode *
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                     &BTRFS_I(inode)->runtime_flags)) {
                btrfs_info(root->fs_info, "inode %llu still on the orphan list",
-                       (unsigned long long)btrfs_ino(inode));
+                       btrfs_ino(inode));
                atomic_dec(&root->orphan_inodes);
        }
  
                        break;
                else {
                        btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
-                               (unsigned long long)ordered->file_offset,
-                               (unsigned long long)ordered->len);
+                               ordered->file_offset, ordered->len);
                        btrfs_remove_ordered_extent(inode, ordered);
                        btrfs_put_ordered_extent(ordered);
                        btrfs_put_ordered_extent(ordered);
@@@ -8161,10 -8110,8 +8110,8 @@@ static int btrfs_rename(struct inode *o
                                                 new_dentry->d_name.name,
                                                 new_dentry->d_name.len);
                }
-               if (!ret && new_inode->i_nlink == 0) {
+               if (!ret && new_inode->i_nlink == 0)
                        ret = btrfs_orphan_add(trans, new_dentry->d_inode);
-                       BUG_ON(ret);
-               }
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto out_fail;
@@@ -8525,8 -8472,8 +8472,8 @@@ static int __btrfs_prealloc_file_range(
  
                cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
                cur_bytes = max(cur_bytes, min_size);
-               ret = btrfs_reserve_extent(trans, root, cur_bytes,
-                                          min_size, 0, *alloc_hint, &ins, 1);
+               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
+                                          *alloc_hint, &ins, 1);
                if (ret) {
                        if (own_trans)
                                btrfs_end_transaction(trans, root);
diff --combined fs/btrfs/send.c
index 2e14fd89a8b46e80622168ef30b9ac206b4b6a04,b4b15467426b0cfab6033c449a043e216991c464..e46e0ed7492555646e58659f4cbb4c94ddf4c4d1
@@@ -26,6 -26,7 +26,7 @@@
  #include <linux/radix-tree.h>
  #include <linux/crc32c.h>
  #include <linux/vmalloc.h>
+ #include <linux/string.h>
  
  #include "send.h"
  #include "backref.h"
@@@ -54,8 -55,8 +55,8 @@@ struct fs_path 
  
                        char *buf;
                        int buf_len;
-                       int reversed:1;
-                       int virtual_mem:1;
+                       unsigned int reversed:1;
+                       unsigned int virtual_mem:1;
                        char inline_buf[];
                };
                char pad[PAGE_SIZE];
@@@ -219,7 -220,7 +220,7 @@@ static int fs_path_ensure_buf(struct fs
        len = PAGE_ALIGN(len);
  
        if (p->buf == p->inline_buf) {
 -              tmp_buf = kmalloc(len, GFP_NOFS);
 +              tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN);
                if (!tmp_buf) {
                        tmp_buf = vmalloc(len);
                        if (!tmp_buf)
@@@ -1668,6 -1669,7 +1669,7 @@@ static int will_overwrite_ref(struct se
                              u64 *who_ino, u64 *who_gen)
  {
        int ret = 0;
+       u64 gen;
        u64 other_inode = 0;
        u8 other_type = 0;
  
        if (ret <= 0)
                goto out;
  
+       /*
+        * If we have a parent root we need to verify that the parent dir was
+        * not delted and then re-created, if it was then we have no overwrite
+        * and we can just unlink this entry.
+        */
+       if (sctx->parent_root) {
+               ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret < 0 && ret != -ENOENT)
+                       goto out;
+               if (ret) {
+                       ret = 0;
+                       goto out;
+               }
+               if (gen != dir_gen)
+                       goto out;
+       }
        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
                        &other_inode, &other_type);
        if (ret < 0 && ret != -ENOENT)
@@@ -2519,7 -2539,8 +2539,8 @@@ static int did_create_dir(struct send_c
                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
  
-               if (di_key.objectid < sctx->send_progress) {
+               if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+                   di_key.objectid < sctx->send_progress) {
                        ret = 1;
                        goto out;
                }
@@@ -2581,7 -2602,6 +2602,6 @@@ static int record_ref(struct list_head 
                      u64 dir_gen, struct fs_path *path)
  {
        struct recorded_ref *ref;
-       char *tmp;
  
        ref = kmalloc(sizeof(*ref), GFP_NOFS);
        if (!ref)
        ref->dir_gen = dir_gen;
        ref->full_path = path;
  
-       tmp = strrchr(ref->full_path->start, '/');
-       if (!tmp) {
-               ref->name_len = ref->full_path->end - ref->full_path->start;
-               ref->name = ref->full_path->start;
+       ref->name = (char *)kbasename(ref->full_path->start);
+       ref->name_len = ref->full_path->end - ref->name;
+       ref->dir_path = ref->full_path->start;
+       if (ref->name == ref->full_path->start)
                ref->dir_path_len = 0;
-               ref->dir_path = ref->full_path->start;
-       } else {
-               tmp++;
-               ref->name_len = ref->full_path->end - tmp;
-               ref->name = tmp;
-               ref->dir_path = ref->full_path->start;
+       else
                ref->dir_path_len = ref->full_path->end -
                                ref->full_path->start - 1 - ref->name_len;
-       }
  
        list_add_tail(&ref->list, head);
        return 0;
  }
  
+ static int dup_ref(struct recorded_ref *ref, struct list_head *list)
+ {
+       struct recorded_ref *new;
+       new = kmalloc(sizeof(*ref), GFP_NOFS);
+       if (!new)
+               return -ENOMEM;
+       new->dir = ref->dir;
+       new->dir_gen = ref->dir_gen;
+       new->full_path = NULL;
+       INIT_LIST_HEAD(&new->list);
+       list_add_tail(&new->list, list);
+       return 0;
+ }
  static void __free_recorded_refs(struct list_head *head)
  {
        struct recorded_ref *cur;
@@@ -2724,9 -2754,7 +2754,7 @@@ static int process_recorded_refs(struc
        int ret = 0;
        struct recorded_ref *cur;
        struct recorded_ref *cur2;
-       struct ulist *check_dirs = NULL;
-       struct ulist_iterator uit;
-       struct ulist_node *un;
+       struct list_head check_dirs;
        struct fs_path *valid_path = NULL;
        u64 ow_inode = 0;
        u64 ow_gen;
@@@ -2740,6 -2768,7 +2768,7 @@@ verbose_printk("btrfs: process_recorded
         * which is always '..'
         */
        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+       INIT_LIST_HEAD(&check_dirs);
  
        valid_path = fs_path_alloc();
        if (!valid_path) {
                goto out;
        }
  
-       check_dirs = ulist_alloc(GFP_NOFS);
-       if (!check_dirs) {
-               ret = -ENOMEM;
-               goto out;
-       }
        /*
         * First, check if the first ref of the current inode was overwritten
         * before. If yes, we know that the current inode was already orphanized
                                        goto out;
                        }
                }
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                if (ret < 0)
                        goto out;
        }
                }
  
                list_for_each_entry(cur, &sctx->deleted_refs, list) {
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                        if (ret < 0)
                                goto out;
                }
                 */
                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
                                list);
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                if (ret < 0)
                        goto out;
        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
                                if (ret < 0)
                                        goto out;
                        }
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                        if (ret < 0)
                                goto out;
                }
                /*
                 * If the inode is still orphan, unlink the orphan. This may
                 * happen when a previous inode did overwrite the first ref
         * deletion and if it's finally possible to perform the rmdir now.
         * We also update the inode stats of the parent dirs here.
         */
-       ULIST_ITER_INIT(&uit);
-       while ((un = ulist_next(check_dirs, &uit))) {
+       list_for_each_entry(cur, &check_dirs, list) {
                /*
                 * In case we had refs into dirs that were not processed yet,
                 * we don't need to do the utime and rmdir logic for these dirs.
                 * The dir will be processed later.
                 */
-               if (un->val > sctx->cur_ino)
+               if (cur->dir > sctx->cur_ino)
                        continue;
  
-               ret = get_cur_inode_state(sctx, un->val, un->aux);
+               ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
                if (ret < 0)
                        goto out;
  
                if (ret == inode_state_did_create ||
                    ret == inode_state_no_change) {
                        /* TODO delayed utimes */
-                       ret = send_utimes(sctx, un->val, un->aux);
+                       ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                        if (ret < 0)
                                goto out;
                } else if (ret == inode_state_did_delete) {
-                       ret = can_rmdir(sctx, un->val, sctx->cur_ino);
+                       ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
                        if (ret < 0)
                                goto out;
                        if (ret) {
-                               ret = get_cur_path(sctx, un->val, un->aux,
-                                               valid_path);
+                               ret = get_cur_path(sctx, cur->dir,
+                                                  cur->dir_gen, valid_path);
                                if (ret < 0)
                                        goto out;
                                ret = send_rmdir(sctx, valid_path);
        ret = 0;
  
  out:
+       __free_recorded_refs(&check_dirs);
        free_recorded_refs(sctx);
-       ulist_free(check_dirs);
        fs_path_free(valid_path);
        return ret;
  }
@@@ -3119,6 -3136,8 +3136,8 @@@ out
  
  struct find_ref_ctx {
        u64 dir;
+       u64 dir_gen;
+       struct btrfs_root *root;
        struct fs_path *name;
        int found_idx;
  };
@@@ -3128,9 -3147,21 +3147,21 @@@ static int __find_iref(int num, u64 dir
                       void *ctx_)
  {
        struct find_ref_ctx *ctx = ctx_;
+       u64 dir_gen;
+       int ret;
  
        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
+               /*
+                * To avoid doing extra lookups we'll only do this if everything
+                * else matches.
+                */
+               ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret)
+                       return ret;
+               if (dir_gen != ctx->dir_gen)
+                       return 0;
                ctx->found_idx = num;
                return 1;
        }
  static int find_iref(struct btrfs_root *root,
                     struct btrfs_path *path,
                     struct btrfs_key *key,
-                    u64 dir, struct fs_path *name)
+                    u64 dir, u64 dir_gen, struct fs_path *name)
  {
        int ret;
        struct find_ref_ctx ctx;
  
        ctx.dir = dir;
        ctx.name = name;
+       ctx.dir_gen = dir_gen;
        ctx.found_idx = -1;
+       ctx.root = root;
  
        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
        if (ret < 0)
@@@ -3163,11 -3196,17 +3196,17 @@@ static int __record_changed_new_ref(in
                                    struct fs_path *name,
                                    void *ctx)
  {
+       u64 dir_gen;
        int ret;
        struct send_ctx *sctx = ctx;
  
+       ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
        ret = find_iref(sctx->parent_root, sctx->right_path,
-                       sctx->cmp_key, dir, name);
+                       sctx->cmp_key, dir, dir_gen, name);
        if (ret == -ENOENT)
                ret = __record_new_ref(num, dir, index, name, sctx);
        else if (ret > 0)
@@@ -3180,11 -3219,17 +3219,17 @@@ static int __record_changed_deleted_ref
                                        struct fs_path *name,
                                        void *ctx)
  {
+       u64 dir_gen;
        int ret;
        struct send_ctx *sctx = ctx;
  
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
-                       dir, name);
+                       dir, dir_gen, name);
        if (ret == -ENOENT)
                ret = __record_deleted_ref(num, dir, index, name, sctx);
        else if (ret > 0)
@@@ -3869,7 -3914,8 +3914,8 @@@ static int is_extent_unchanged(struct s
        btrfs_item_key_to_cpu(eb, &found_key, slot);
        if (found_key.objectid != key.objectid ||
            found_key.type != key.type) {
-               ret = 0;
+               /* If we're a hole then just pretend nothing changed */
+               ret = (left_disknr) ? 0 : 1;
                goto out;
        }
  
                 * This may only happen on the first iteration.
                 */
                if (found_key.offset + right_len <= ekey->offset) {
-                       ret = 0;
+                       /* If we're a hole just pretend nothing changed */
+                       ret = (left_disknr) ? 0 : 1;
                        goto out;
                }
  
@@@ -3960,8 -4007,8 +4007,8 @@@ static int process_extent(struct send_c
                          struct btrfs_path *path,
                          struct btrfs_key *key)
  {
-       int ret = 0;
        struct clone_root *found_clone = NULL;
+       int ret = 0;
  
        if (S_ISLNK(sctx->cur_inode_mode))
                return 0;
                        ret = 0;
                        goto out;
                }
+       } else {
+               struct btrfs_file_extent_item *ei;
+               u8 type;
+               ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               type = btrfs_file_extent_type(path->nodes[0], ei);
+               if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+                   type == BTRFS_FILE_EXTENT_REG) {
+                       /*
+                        * The send spec does not have a prealloc command yet,
+                        * so just leave a hole for prealloc'ed extents until
+                        * we have enough commands queued up to justify rev'ing
+                        * the send spec.
+                        */
+                       if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+                               ret = 0;
+                               goto out;
+                       }
+                       /* Have a hole, just skip it. */
+                       if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
+                               ret = 0;
+                               goto out;
+                       }
+               }
        }
  
        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@@ -4361,6 -4434,64 +4434,64 @@@ static int changed_extent(struct send_c
        return ret;
  }
  
+ static int dir_changed(struct send_ctx *sctx, u64 dir)
+ {
+       u64 orig_gen, new_gen;
+       int ret;
+       ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
+                            NULL, NULL);
+       if (ret)
+               return ret;
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+       return (orig_gen != new_gen) ? 1 : 0;
+ }
+ static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
+                       struct btrfs_key *key)
+ {
+       struct btrfs_inode_extref *extref;
+       struct extent_buffer *leaf;
+       u64 dirid = 0, last_dirid = 0;
+       unsigned long ptr;
+       u32 item_size;
+       u32 cur_offset = 0;
+       int ref_name_len;
+       int ret = 0;
+       /* Easy case, just check this one dirid */
+       if (key->type == BTRFS_INODE_REF_KEY) {
+               dirid = key->offset;
+               ret = dir_changed(sctx, dirid);
+               goto out;
+       }
+       leaf = path->nodes[0];
+       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       while (cur_offset < item_size) {
+               extref = (struct btrfs_inode_extref *)(ptr +
+                                                      cur_offset);
+               dirid = btrfs_inode_extref_parent(leaf, extref);
+               ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+               cur_offset += ref_name_len + sizeof(*extref);
+               if (dirid == last_dirid)
+                       continue;
+               ret = dir_changed(sctx, dirid);
+               if (ret)
+                       break;
+               last_dirid = dirid;
+       }
+ out:
+       return ret;
+ }
  /*
   * Updates compare related fields in sctx and simply forwards to the actual
   * changed_xxx functions.
@@@ -4376,6 -4507,19 +4507,19 @@@ static int changed_cb(struct btrfs_roo
        int ret = 0;
        struct send_ctx *sctx = ctx;
  
+       if (result == BTRFS_COMPARE_TREE_SAME) {
+               if (key->type != BTRFS_INODE_REF_KEY &&
+                   key->type != BTRFS_INODE_EXTREF_KEY)
+                       return 0;
+               ret = compare_refs(sctx, left_path, key);
+               if (!ret)
+                       return 0;
+               if (ret < 0)
+                       return ret;
+               result = BTRFS_COMPARE_TREE_CHANGED;
+               ret = 0;
+       }
        sctx->left_path = left_path;
        sctx->right_path = right_path;
        sctx->cmp_key = key;
diff --combined fs/btrfs/volumes.c
index 67a08538184557486ec802840d610828696edf46,0db165ee43405fccbcc044c5c56606e145503d92..0052ca8264d9b37cc171e52e0954b165fd6dad46
@@@ -26,6 -26,7 +26,7 @@@
  #include <linux/ratelimit.h>
  #include <linux/kthread.h>
  #include <linux/raid/pq.h>
+ #include <linux/semaphore.h>
  #include <asm/div64.h>
  #include "compat.h"
  #include "ctree.h"
@@@ -62,6 -63,48 +63,48 @@@ static void unlock_chunks(struct btrfs_
        mutex_unlock(&root->fs_info->chunk_mutex);
  }
  
+ static struct btrfs_fs_devices *__alloc_fs_devices(void)
+ {
+       struct btrfs_fs_devices *fs_devs;
+       fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+       if (!fs_devs)
+               return ERR_PTR(-ENOMEM);
+       mutex_init(&fs_devs->device_list_mutex);
+       INIT_LIST_HEAD(&fs_devs->devices);
+       INIT_LIST_HEAD(&fs_devs->alloc_list);
+       INIT_LIST_HEAD(&fs_devs->list);
+       return fs_devs;
+ }
+ /**
+  * alloc_fs_devices - allocate struct btrfs_fs_devices
+  * @fsid:     a pointer to UUID for this FS.  If NULL a new UUID is
+  *            generated.
+  *
+  * Return: a pointer to a new &struct btrfs_fs_devices on success;
+  * ERR_PTR() on error.  Returned struct is not linked onto any lists and
+  * can be destroyed with kfree() right away.
+  */
+ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
+ {
+       struct btrfs_fs_devices *fs_devs;
+       fs_devs = __alloc_fs_devices();
+       if (IS_ERR(fs_devs))
+               return fs_devs;
+       if (fsid)
+               memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+       else
+               generate_random_uuid(fs_devs->fsid);
+       return fs_devs;
+ }
  static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
  {
        struct btrfs_device *device;
@@@ -101,6 -144,27 +144,27 @@@ void btrfs_cleanup_fs_uuids(void
        }
  }
  
+ static struct btrfs_device *__alloc_device(void)
+ {
+       struct btrfs_device *dev;
+       dev = kzalloc(sizeof(*dev), GFP_NOFS);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+       INIT_LIST_HEAD(&dev->dev_list);
+       INIT_LIST_HEAD(&dev->dev_alloc_list);
+       spin_lock_init(&dev->io_lock);
+       spin_lock_init(&dev->reada_lock);
+       atomic_set(&dev->reada_in_flight, 0);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+       return dev;
+ }
  static noinline struct btrfs_device *__find_device(struct list_head *head,
                                                   u64 devid, u8 *uuid)
  {
@@@ -395,16 -459,14 +459,14 @@@ static noinline int device_list_add(con
  
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
-               fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-               if (!fs_devices)
-                       return -ENOMEM;
-               INIT_LIST_HEAD(&fs_devices->devices);
-               INIT_LIST_HEAD(&fs_devices->alloc_list);
+               fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return PTR_ERR(fs_devices);
                list_add(&fs_devices->list, &fs_uuids);
-               memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                fs_devices->latest_devid = devid;
                fs_devices->latest_trans = found_transid;
-               mutex_init(&fs_devices->device_list_mutex);
                device = NULL;
        } else {
                device = __find_device(&fs_devices->devices, devid,
                if (fs_devices->opened)
                        return -EBUSY;
  
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device) {
+               device = btrfs_alloc_device(NULL, &devid,
+                                           disk_super->dev_item.uuid);
+               if (IS_ERR(device)) {
                        /* we can safely leave the fs_devices entry around */
-                       return -ENOMEM;
+                       return PTR_ERR(device);
                }
-               device->devid = devid;
-               device->dev_stats_valid = 0;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, disk_super->dev_item.uuid,
-                      BTRFS_UUID_SIZE);
-               spin_lock_init(&device->io_lock);
  
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name) {
                        return -ENOMEM;
                }
                rcu_assign_pointer(device->name, name);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
-               /* init readahead state */
-               spin_lock_init(&device->reada_lock);
-               device->reada_curr_zone = NULL;
-               atomic_set(&device->reada_in_flight, 0);
-               device->reada_next = 0;
-               INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-               INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
  
                mutex_lock(&fs_devices->device_list_mutex);
                list_add_rcu(&device->dev_list, &fs_devices->devices);
+               fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
  
                device->fs_devices = fs_devices;
-               fs_devices->num_devices++;
        } else if (!device->name || strcmp(device->name->str, path)) {
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
@@@ -474,25 -522,21 +522,21 @@@ static struct btrfs_fs_devices *clone_f
        struct btrfs_device *device;
        struct btrfs_device *orig_dev;
  
-       fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!fs_devices)
-               return ERR_PTR(-ENOMEM);
+       fs_devices = alloc_fs_devices(orig->fsid);
+       if (IS_ERR(fs_devices))
+               return fs_devices;
  
-       INIT_LIST_HEAD(&fs_devices->devices);
-       INIT_LIST_HEAD(&fs_devices->alloc_list);
-       INIT_LIST_HEAD(&fs_devices->list);
-       mutex_init(&fs_devices->device_list_mutex);
        fs_devices->latest_devid = orig->latest_devid;
        fs_devices->latest_trans = orig->latest_trans;
        fs_devices->total_devices = orig->total_devices;
-       memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
  
        /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                struct rcu_string *name;
  
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device)
+               device = btrfs_alloc_device(NULL, &orig_dev->devid,
+                                           orig_dev->uuid);
+               if (IS_ERR(device))
                        goto error;
  
                /*
                }
                rcu_assign_pointer(device->name, name);
  
-               device->devid = orig_dev->devid;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
-               spin_lock_init(&device->io_lock);
-               INIT_LIST_HEAD(&device->dev_list);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
                list_add(&device->dev_list, &fs_devices->devices);
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
@@@ -636,23 -673,22 +673,22 @@@ static int __btrfs_close_devices(struc
  
                if (device->can_discard)
                        fs_devices->num_can_discard--;
+               if (device->missing)
+                       fs_devices->missing_devices--;
  
-               new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
-               BUG_ON(!new_device); /* -ENOMEM */
-               memcpy(new_device, device, sizeof(*new_device));
+               new_device = btrfs_alloc_device(NULL, &device->devid,
+                                               device->uuid);
+               BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
  
                /* Safe because we are under uuid_mutex */
                if (device->name) {
                        name = rcu_string_strdup(device->name->str, GFP_NOFS);
-                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       BUG_ON(!name); /* -ENOMEM */
                        rcu_assign_pointer(new_device->name, name);
                }
-               new_device->bdev = NULL;
-               new_device->writeable = 0;
-               new_device->in_fs_metadata = 0;
-               new_device->can_discard = 0;
-               spin_lock_init(&new_device->io_lock);
                list_replace_rcu(&device->dev_list, &new_device->dev_list);
+               new_device->fs_devices = device->fs_devices;
  
                call_rcu(&device->rcu, free_device);
        }
@@@ -865,7 -901,7 +901,7 @@@ int btrfs_scan_one_device(const char *p
        disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
  
        if (btrfs_super_bytenr(disk_super) != bytenr ||
-           disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
+           btrfs_super_magic(disk_super) != BTRFS_MAGIC)
                goto error_unmap;
  
        devid = btrfs_stack_device_id(&disk_super->dev_item);
                printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
        }
  
-       printk(KERN_CONT "devid %llu transid %llu %s\n",
-              (unsigned long long)devid, (unsigned long long)transid, path);
+       printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
  
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
        if (!ret && fs_devices_ret)
@@@ -1278,8 -1313,7 +1313,7 @@@ static int btrfs_alloc_dev_extent(struc
        btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
  
        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
-                   (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
-                   BTRFS_UUID_SIZE);
+                   btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
  
        btrfs_set_dev_extent_length(leaf, extent, num_bytes);
        btrfs_mark_buffer_dirty(leaf);
@@@ -1307,15 -1341,14 +1341,14 @@@ static u64 find_next_chunk(struct btrfs
        return ret;
  }
  
- static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
+ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
+                                   u64 *devid_ret)
  {
        int ret;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
  
-       root = root->fs_info->chunk_root;
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = (u64)-1;
  
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
  
        BUG_ON(ret == 0); /* Corruption */
  
-       ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+       ret = btrfs_previous_item(fs_info->chunk_root, path,
+                                 BTRFS_DEV_ITEMS_OBJECTID,
                                  BTRFS_DEV_ITEM_KEY);
        if (ret) {
-               *objectid = 1;
+               *devid_ret = 1;
        } else {
                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                      path->slots[0]);
-               *objectid = found_key.offset + 1;
+               *devid_ret = found_key.offset + 1;
        }
        ret = 0;
  error:
@@@ -1391,9 -1425,9 +1425,9 @@@ static int btrfs_add_device(struct btrf
        btrfs_set_device_bandwidth(leaf, dev_item, 0);
        btrfs_set_device_start_offset(leaf, dev_item, 0);
  
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
-       ptr = (unsigned long)btrfs_device_fsid(dev_item);
+       ptr = btrfs_device_fsid(dev_item);
        write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
  
@@@ -1562,7 -1596,9 +1596,9 @@@ int btrfs_rm_device(struct btrfs_root *
                clear_super = true;
        }
  
+       mutex_unlock(&uuid_mutex);
        ret = btrfs_shrink_device(device, 0);
+       mutex_lock(&uuid_mutex);
        if (ret)
                goto error_undo;
  
        /*
         * the device list mutex makes sure that we don't change
         * the device list while someone else is writing out all
-        * the device supers.
+        * the device supers. Whoever is writing all supers, should
+        * lock the device list mutex before getting the number of
+        * devices in the super block (super_copy). Conversely,
+        * whoever updates the number of devices in the super block
+        * (super_copy) should hold the device list mutex.
         */
  
        cur_devices = device->fs_devices;
                device->fs_devices->open_devices--;
  
        call_rcu(&device->rcu, free_device);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
        btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
        if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
@@@ -1793,9 -1833,9 +1833,9 @@@ static int btrfs_prepare_sprout(struct 
        if (!fs_devices->seeding)
                return -EINVAL;
  
-       seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!seed_devices)
-               return -ENOMEM;
+       seed_devices = __alloc_fs_devices();
+       if (IS_ERR(seed_devices))
+               return PTR_ERR(seed_devices);
  
        old_devices = clone_fs_devices(fs_devices);
        if (IS_ERR(old_devices)) {
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
                              synchronize_rcu);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
        list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
        list_for_each_entry(device, &seed_devices->devices, dev_list) {
        generate_random_uuid(fs_devices->fsid);
        memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
        memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
        super_flags = btrfs_super_flags(disk_super) &
                      ~BTRFS_SUPER_FLAG_SEEDING;
        btrfs_set_super_flags(disk_super, super_flags);
@@@ -1889,11 -1930,9 +1930,9 @@@ next_slot
                dev_item = btrfs_item_ptr(leaf, path->slots[0],
                                          struct btrfs_dev_item);
                devid = btrfs_device_id(leaf, dev_item);
-               read_extent_buffer(leaf, dev_uuid,
-                                  (unsigned long)btrfs_device_uuid(dev_item),
+               read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                                   BTRFS_UUID_SIZE);
-               read_extent_buffer(leaf, fs_uuid,
-                                  (unsigned long)btrfs_device_fsid(dev_item),
+               read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                                   BTRFS_UUID_SIZE);
                device = btrfs_find_device(root->fs_info, devid, dev_uuid,
                                           fs_uuid);
@@@ -1956,10 -1995,10 +1995,10 @@@ int btrfs_init_new_device(struct btrfs_
        }
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
+       device = btrfs_alloc_device(root->fs_info, NULL, NULL);
+       if (IS_ERR(device)) {
                /* we can safely leave the fs_devices entry around */
-               ret = -ENOMEM;
+               ret = PTR_ERR(device);
                goto error;
        }
  
        }
        rcu_assign_pointer(device->name, name);
  
-       ret = find_next_devid(root, &device->devid);
-       if (ret) {
-               rcu_string_free(device->name);
-               kfree(device);
-               goto error;
-       }
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                rcu_string_free(device->name);
        if (blk_queue_discard(q))
                device->can_discard = 1;
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       spin_lock_init(&device->io_lock);
        device->generation = trans->transid;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@@ -2121,6 -2150,7 +2150,7 @@@ int btrfs_init_dev_replace_tgtdev(struc
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct list_head *devices;
        struct rcu_string *name;
+       u64 devid = BTRFS_DEV_REPLACE_DEVID;
        int ret = 0;
  
        *device_out = NULL;
                }
        }
  
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
-               ret = -ENOMEM;
+       device = btrfs_alloc_device(NULL, &devid, NULL);
+       if (IS_ERR(device)) {
+               ret = PTR_ERR(device);
                goto error;
        }
  
                device->can_discard = 1;
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       device->devid = BTRFS_DEV_REPLACE_DEVID;
-       spin_lock_init(&device->io_lock);
        device->generation = 0;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@@ -2971,10 -2997,6 +2997,6 @@@ again
                if (found_key.objectid != key.objectid)
                        break;
  
-               /* chunk zero is special */
-               if (found_key.offset == 0)
-                       break;
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
  
                if (!counting) {
                        spin_unlock(&fs_info->balance_lock);
                }
  loop:
+               if (found_key.offset == 0)
+                       break;
                key.offset = found_key.offset - 1;
        }
  
@@@ -3074,9 -3098,6 +3098,6 @@@ static void __cancel_balance(struct btr
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
  }
  
- void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
-                              struct btrfs_ioctl_balance_args *bargs);
  /*
   * Should be called with both balance and volume mutexes held
   */
@@@ -3139,7 -3160,7 +3160,7 @@@ int btrfs_balance(struct btrfs_balance_
             (bctl->data.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "data profile %llu\n",
-                      (unsigned long long)bctl->data.target);
+                      bctl->data.target);
                ret = -EINVAL;
                goto out;
        }
             (bctl->meta.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "metadata profile %llu\n",
-                      (unsigned long long)bctl->meta.target);
+                      bctl->meta.target);
                ret = -EINVAL;
                goto out;
        }
             (bctl->sys.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "system profile %llu\n",
-                      (unsigned long long)bctl->sys.target);
+                      bctl->sys.target);
                ret = -EINVAL;
                goto out;
        }
@@@ -3302,7 -3323,7 +3323,7 @@@ int btrfs_resume_balance_async(struct b
        }
  
        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
 -      return PTR_RET(tsk);
 +      return PTR_ERR_OR_ZERO(tsk);
  }
  
  int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
@@@ -3430,6 -3451,264 +3451,264 @@@ int btrfs_cancel_balance(struct btrfs_f
        return 0;
  }
  
+ static int btrfs_uuid_scan_kthread(void *data)
+ {
+       struct btrfs_fs_info *fs_info = data;
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_key key;
+       struct btrfs_key max_key;
+       struct btrfs_path *path = NULL;
+       int ret = 0;
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_root_item root_item;
+       u32 item_size;
+       struct btrfs_trans_handle *trans = NULL;
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       key.objectid = 0;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = 0;
+       max_key.objectid = (u64)-1;
+       max_key.type = BTRFS_ROOT_ITEM_KEY;
+       max_key.offset = (u64)-1;
+       path->keep_locks = 1;
+       while (1) {
+               ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+               if (ret) {
+                       if (ret > 0)
+                               ret = 0;
+                       break;
+               }
+               if (key.type != BTRFS_ROOT_ITEM_KEY ||
+                   (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
+                    key.objectid != BTRFS_FS_TREE_OBJECTID) ||
+                   key.objectid > BTRFS_LAST_FREE_OBJECTID)
+                       goto skip;
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               item_size = btrfs_item_size_nr(eb, slot);
+               if (item_size < sizeof(root_item))
+                       goto skip;
+               read_extent_buffer(eb, &root_item,
+                                  btrfs_item_ptr_offset(eb, slot),
+                                  (int)sizeof(root_item));
+               if (btrfs_root_refs(&root_item) == 0)
+                       goto skip;
+               if (!btrfs_is_empty_uuid(root_item.uuid) ||
+                   !btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       if (trans)
+                               goto update_tree;
+                       btrfs_release_path(path);
+                       /*
+                        * 1 - subvol uuid item
+                        * 1 - received_subvol uuid item
+                        */
+                       trans = btrfs_start_transaction(fs_info->uuid_root, 2);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               break;
+                       }
+                       continue;
+               } else {
+                       goto skip;
+               }
+ update_tree:
+               if (!btrfs_is_empty_uuid(root_item.uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.uuid,
+                                                 BTRFS_UUID_KEY_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+               if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.received_uuid,
+                                                BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+ skip:
+               if (trans) {
+                       ret = btrfs_end_transaction(trans, fs_info->uuid_root);
+                       trans = NULL;
+                       if (ret)
+                               break;
+               }
+               btrfs_release_path(path);
+               if (key.offset < (u64)-1) {
+                       key.offset++;
+               } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+               } else if (key.objectid < (u64)-1) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+                       key.objectid++;
+               } else {
+                       break;
+               }
+               cond_resched();
+       }
+ out:
+       btrfs_free_path(path);
+       if (trans && !IS_ERR(trans))
+               btrfs_end_transaction(trans, fs_info->uuid_root);
+       if (ret)
+               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+       else
+               fs_info->update_uuid_tree_gen = 1;
+       up(&fs_info->uuid_tree_rescan_sem);
+       return 0;
+ }
+ /*
+  * Callback for btrfs_uuid_tree_iterate().
+  * returns:
+  * 0  check succeeded, the entry is not outdated.
+  * < 0        if an error occured.
+  * > 0        if the check failed, which means the caller shall remove the entry.
+  */
+ static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
+                                      u8 *uuid, u8 type, u64 subid)
+ {
+       struct btrfs_key key;
+       int ret = 0;
+       struct btrfs_root *subvol_root;
+       if (type != BTRFS_UUID_KEY_SUBVOL &&
+           type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+               goto out;
+       key.objectid = subid;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(subvol_root)) {
+               ret = PTR_ERR(subvol_root);
+               if (ret == -ENOENT)
+                       ret = 1;
+               goto out;
+       }
+       switch (type) {
+       case BTRFS_UUID_KEY_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.received_uuid,
+                          BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       }
+ out:
+       return ret;
+ }
+ static int btrfs_uuid_rescan_kthread(void *data)
+ {
+       struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
+       int ret;
+       /*
+        * 1st step is to iterate through the existing UUID tree and
+        * to delete all entries that contain outdated data.
+        * 2nd step is to add all missing entries to the UUID tree.
+        */
+       ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
+       if (ret < 0) {
+               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               up(&fs_info->uuid_tree_rescan_sem);
+               return ret;
+       }
+       return btrfs_uuid_scan_kthread(data);
+ }
+ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *uuid_root;
+       struct task_struct *task;
+       int ret;
+       /*
+        * 1 - root node
+        * 1 - root item
+        */
+       trans = btrfs_start_transaction(tree_root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       uuid_root = btrfs_create_tree(trans, fs_info,
+                                     BTRFS_UUID_TREE_OBJECTID);
+       if (IS_ERR(uuid_root)) {
+               btrfs_abort_transaction(trans, tree_root,
+                                       PTR_ERR(uuid_root));
+               return PTR_ERR(uuid_root);
+       }
+       fs_info->uuid_root = uuid_root;
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_scan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+       return 0;
+ }
+ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
+ {
+       struct task_struct *task;
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+       return 0;
+ }
  /*
   * shrinking a device means finding all of the device extents past
   * the new size, and then following the back refs to the chunks.
@@@ -4194,13 -4473,13 +4473,13 @@@ int btrfs_num_copies(struct btrfs_fs_in
         * and exit, so return 1 so the callers don't try to use other copies.
         */
        if (!em) {
-               btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+               btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
                            logical+len);
                return 1;
        }
  
        if (em->start > logical || em->start + em->len < logical) {
-               btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+               btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
                            "%Lu-%Lu\n", logical, logical+len, em->start,
                            em->start + em->len);
                return 1;
@@@ -4375,8 -4654,7 +4654,7 @@@ static int __btrfs_map_block(struct btr
  
        if (!em) {
                btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)*length);
+                       logical, *length);
                return -EINVAL;
        }
  
        }
        bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
        if (!bbio) {
+               kfree(raid_map);
                ret = -ENOMEM;
                goto out;
        }
@@@ -5246,9 -5525,7 +5525,7 @@@ int btrfs_map_bio(struct btrfs_root *ro
  
        if (map_length < length) {
                btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)length,
-                       (unsigned long long)map_length);
+                       logical, length, map_length);
                BUG();
        }
  
@@@ -5314,23 -5591,72 +5591,72 @@@ static struct btrfs_device *add_missing
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
  
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device)
+       device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+       if (IS_ERR(device))
                return NULL;
-       list_add(&device->dev_list,
-                &fs_devices->devices);
-       device->devid = devid;
-       device->work.func = pending_bios_fn;
+       list_add(&device->dev_list, &fs_devices->devices);
        device->fs_devices = fs_devices;
-       device->missing = 1;
        fs_devices->num_devices++;
+       device->missing = 1;
        fs_devices->missing_devices++;
-       spin_lock_init(&device->io_lock);
-       INIT_LIST_HEAD(&device->dev_alloc_list);
-       memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
        return device;
  }
  
+ /**
+  * btrfs_alloc_device - allocate struct btrfs_device
+  * @fs_info:  used only for generating a new devid, can be NULL if
+  *            devid is provided (i.e. @devid != NULL).
+  * @devid:    a pointer to devid for this device.  If NULL a new devid
+  *            is generated.
+  * @uuid:     a pointer to UUID for this device.  If NULL a new UUID
+  *            is generated.
+  *
+  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
+  * on error.  Returned struct is not linked onto any lists and can be
+  * destroyed with kfree() right away.
+  */
+ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+                                       const u64 *devid,
+                                       const u8 *uuid)
+ {
+       struct btrfs_device *dev;
+       u64 tmp;
+       if (!devid && !fs_info) {
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+       dev = __alloc_device();
+       if (IS_ERR(dev))
+               return dev;
+       if (devid)
+               tmp = *devid;
+       else {
+               int ret;
+               ret = find_next_devid(fs_info, &tmp);
+               if (ret) {
+                       kfree(dev);
+                       return ERR_PTR(ret);
+               }
+       }
+       dev->devid = tmp;
+       if (uuid)
+               memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
+       else
+               generate_random_uuid(dev->uuid);
+       dev->work.func = pending_bios_fn;
+       return dev;
+ }
  static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                          struct extent_buffer *leaf,
                          struct btrfs_chunk *chunk)
@@@ -5437,7 -5763,7 +5763,7 @@@ static void fill_device_from_item(struc
        WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
        device->is_tgtdev_for_dev_replace = 0;
  
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
  }
  
@@@ -5500,11 -5826,9 +5826,9 @@@ static int read_one_dev(struct btrfs_ro
        u8 dev_uuid[BTRFS_UUID_SIZE];
  
        devid = btrfs_device_id(leaf, dev_item);
-       read_extent_buffer(leaf, dev_uuid,
-                          (unsigned long)btrfs_device_uuid(dev_item),
+       read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                           BTRFS_UUID_SIZE);
-       read_extent_buffer(leaf, fs_uuid,
-                          (unsigned long)btrfs_device_fsid(dev_item),
+       read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                           BTRFS_UUID_SIZE);
  
        if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
                        return -EIO;
  
                if (!device) {
-                       btrfs_warn(root->fs_info, "devid %llu missing",
-                               (unsigned long long)devid);
+                       btrfs_warn(root->fs_info, "devid %llu missing", devid);
                        device = add_missing_dev(root, devid, dev_uuid);
                        if (!device)
                                return -ENOMEM;
@@@ -5644,14 -5967,15 +5967,15 @@@ int btrfs_read_chunk_tree(struct btrfs_
        mutex_lock(&uuid_mutex);
        lock_chunks(root);
  
-       /* first we search for all of the device items, and then we
-        * read in all of the chunk items.  This way we can create chunk
-        * mappings that reference all of the devices that are afound
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+        * is smaller than the lowest possible object id for a chunk
+        * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
         */
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.offset = 0;
        key.type = 0;
- again:
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
                        break;
                }
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
-               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-                       if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
-                               break;
-                       if (found_key.type == BTRFS_DEV_ITEM_KEY) {
-                               struct btrfs_dev_item *dev_item;
-                               dev_item = btrfs_item_ptr(leaf, slot,
+               if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                       struct btrfs_dev_item *dev_item;
+                       dev_item = btrfs_item_ptr(leaf, slot,
                                                  struct btrfs_dev_item);
-                               ret = read_one_dev(root, leaf, dev_item);
-                               if (ret)
-                                       goto error;
-                       }
+                       ret = read_one_dev(root, leaf, dev_item);
+                       if (ret)
+                               goto error;
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
                }
                path->slots[0]++;
        }
-       if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-               key.objectid = 0;
-               btrfs_release_path(path);
-               goto again;
-       }
        ret = 0;
  error:
        unlock_chunks(root);