]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/inode.c
Merge branch 'setns'
[karo-tx-linux.git] / fs / btrfs / inode.c
index 01438e9ba2e2c64a8de31dcd9c38403f89546a9e..bb51bb1fa44f836ffaa519eaa3c49b96a6ebac03 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/posix_acl.h>
 #include <linux/falloc.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -341,6 +342,10 @@ static noinline int compress_file_range(struct inode *inode,
        int will_compress;
        int compress_type = root->fs_info->compress_type;
 
+       /* if this is a small write inside eof, kick off a defragbot */
+       if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
+               btrfs_add_inode_defrag(NULL, inode);
+
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -650,7 +655,7 @@ retry:
                                        async_extent->start +
                                        async_extent->ram_size - 1, 0);
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                BUG_ON(!em);
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
@@ -798,6 +803,10 @@ static noinline int cow_file_range(struct inode *inode,
        disk_num_bytes = num_bytes;
        ret = 0;
 
+       /* if this is a small write inside eof, kick off defrag */
+       if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024)
+               btrfs_add_inode_defrag(trans, inode);
+
        if (start == 0) {
                /* lets try to make an inline extent */
                ret = cow_file_range_inline(trans, root, inode,
@@ -836,7 +845,7 @@ static noinline int cow_file_range(struct inode *inode,
                                           (u64)-1, &ins, 1);
                BUG_ON(ret);
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                BUG_ON(!em);
                em->start = start;
                em->orig_start = em->start;
@@ -1018,7 +1027,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
        LIST_HEAD(list);
 
        ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
-                                      bytenr + num_bytes - 1, &list);
+                                      bytenr + num_bytes - 1, &list, 0);
        if (ret == 0 && list_empty(&list))
                return 0;
 
@@ -1176,7 +1185,7 @@ out_check:
                        goto next_slot;
                }
 
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                if (cow_start != (u64)-1) {
                        ret = cow_file_range(inode, locked_page, cow_start,
                                        found_key.offset - 1, page_started,
@@ -1189,7 +1198,7 @@ out_check:
                        struct extent_map *em;
                        struct extent_map_tree *em_tree;
                        em_tree = &BTRFS_I(inode)->extent_tree;
-                       em = alloc_extent_map(GFP_NOFS);
+                       em = alloc_extent_map();
                        BUG_ON(!em);
                        em->start = cur_offset;
                        em->orig_start = em->start;
@@ -1234,7 +1243,7 @@ out_check:
                if (cur_offset > end)
                        break;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        if (cur_offset <= end && cow_start == (u64)-1)
                cow_start = cur_offset;
@@ -1322,7 +1331,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
 
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
-        * but in this case, we are only testeing for the DELALLOC
+        * but in this case, we are only testing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
@@ -1355,7 +1364,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 {
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
-        * but in this case, we are only testeing for the DELALLOC
+        * but in this case, we are only testing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
@@ -1865,7 +1874,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                }
                read_unlock(&em_tree->lock);
 
-               if (!em || IS_ERR(em)) {
+               if (IS_ERR_OR_NULL(em)) {
                        kfree(failrec);
                        return -EIO;
                }
@@ -2014,13 +2023,11 @@ good:
        return 0;
 
 zeroit:
-       if (printk_ratelimit()) {
-               printk(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
+       printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
                       "private %llu\n",
                       (unsigned long long)btrfs_ino(page->mapping->host),
                       (unsigned long long)start, csum,
                       (unsigned long long)private);
-       }
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
@@ -2357,7 +2364,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                        break;
 
                /* release the path since we're done with it */
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
 
                /*
                 * this is where we are basically btrfs_lookup, without the
@@ -2659,11 +2666,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        int ret;
 
+       /*
+        * If root is tree root, it means this inode is used to
+        * store free space information. And these inodes are updated
+        * when committing the transaction, so they needn't delaye to
+        * be updated, or deadlock will occured.
+        */
+       if (!is_free_space_inode(root, inode)) {
+               ret = btrfs_delayed_update_inode(trans, root, inode);
+               if (!ret)
+                       btrfs_set_inode_last_trans(trans, inode);
+               return ret;
+       }
+
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
+
        path->leave_spinning = 1;
-       ret = btrfs_lookup_inode(trans, root, path,
-                                &BTRFS_I(inode)->location, 1);
+       ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
+                                1);
        if (ret) {
                if (ret > 0)
                        ret = -ENOENT;
@@ -2673,7 +2695,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        btrfs_unlock_up_safe(path, 1);
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
-                                 struct btrfs_inode_item);
+                                   struct btrfs_inode_item);
 
        fill_inode_item(trans, leaf, inode_item, inode);
        btrfs_mark_buffer_dirty(leaf);
@@ -2684,7 +2706,6 @@ failed:
        return ret;
 }
 
-
 /*
  * unlink helper that gets used here in inode.c and in the tree logging
  * recovery code.  It remove a link in a directory with a given name, and
@@ -2726,7 +2747,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        ret = btrfs_delete_one_dir_name(trans, root, path, di);
        if (ret)
                goto err;
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
                                  dir_ino, &index);
@@ -2737,18 +2758,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto err;
        }
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino,
-                                        index, name, name_len, -1);
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto err;
-       }
-       if (!di) {
-               ret = -ENOENT;
+       ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
+       if (ret)
                goto err;
-       }
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
-       btrfs_release_path(root, path);
 
        ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
                                         inode, dir_ino);
@@ -2877,7 +2889,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        } else {
                check_link = 0;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ret = btrfs_lookup_inode(trans, root, path,
                                &BTRFS_I(inode)->location, 0);
@@ -2891,7 +2903,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        } else {
                check_link = 0;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        if (ret == 0 && S_ISREG(inode->i_mode)) {
                ret = btrfs_lookup_file_extent(trans, root, path,
@@ -2903,7 +2915,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                BUG_ON(ret == 0);
                if (check_path_shared(root, path))
                        goto out;
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
        }
 
        if (!check_link) {
@@ -2924,7 +2936,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                err = 0;
                goto out;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ref = btrfs_lookup_inode_ref(trans, root, path,
                                dentry->d_name.name, dentry->d_name.len,
@@ -2937,8 +2949,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        if (check_path_shared(root, path))
                goto out;
        index = btrfs_inode_ref_index(path->nodes[0], ref);
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
+       /*
+        * This is a commit root search, if we can lookup inode item and other
+        * relative items in the commit root, it means the transaction of
+        * dir/file creation has been committed, and the dir index item that we
+        * delay to insert has also been inserted into the commit root. So
+        * we needn't worry about the delayed insertion of the dir index item
+        * here.
+        */
        di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
                                dentry->d_name.name, dentry->d_name.len, 0);
        if (IS_ERR(di)) {
@@ -3022,14 +3042,14 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 
        di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
                                   name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
+       BUG_ON(IS_ERR_OR_NULL(di));
 
        leaf = path->nodes[0];
        btrfs_dir_item_key_to_cpu(leaf, di, &key);
        WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
        ret = btrfs_delete_one_dir_name(trans, root, path, di);
        BUG_ON(ret);
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
                                 objectid, root->root_key.objectid,
@@ -3038,31 +3058,23 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
                BUG_ON(ret != -ENOENT);
                di = btrfs_search_dir_index_item(root, path, dir_ino,
                                                 name, name_len);
-               BUG_ON(!di || IS_ERR(di));
+               BUG_ON(IS_ERR_OR_NULL(di));
 
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                index = key.offset;
        }
+       btrfs_release_path(path);
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino,
-                                        index, name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
-
-       leaf = path->nodes[0];
-       btrfs_dir_item_key_to_cpu(leaf, di, &key);
-       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
        BUG_ON(ret);
-       btrfs_release_path(root, path);
 
        btrfs_i_size_write(dir, dir->i_size - name_len * 2);
        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, root, dir);
        BUG_ON(ret);
 
-       btrfs_free_path(path);
        return 0;
 }
 
@@ -3109,178 +3121,6 @@ out:
        return err;
 }
 
-#if 0
-/*
- * when truncating bytes in a file, it is possible to avoid reading
- * the leaves that contain only checksum items.  This can be the
- * majority of the IO required to delete a large file, but it must
- * be done carefully.
- *
- * The keys in the level just above the leaves are checked to make sure
- * the lowest key in a given leaf is a csum key, and starts at an offset
- * after the new  size.
- *
- * Then the key for the next leaf is checked to make sure it also has
- * a checksum item for the same file.  If it does, we know our target leaf
- * contains only checksum items, and it can be safely freed without reading
- * it.
- *
- * This is just an optimization targeted at large files.  It may do
- * nothing.  It will return 0 unless things went badly.
- */
-static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root,
-                                    struct btrfs_path *path,
-                                    struct inode *inode, u64 new_size)
-{
-       struct btrfs_key key;
-       int ret;
-       int nritems;
-       struct btrfs_key found_key;
-       struct btrfs_key other_key;
-       struct btrfs_leaf_ref *ref;
-       u64 leaf_gen;
-       u64 leaf_start;
-
-       path->lowest_level = 1;
-       key.objectid = inode->i_ino;
-       key.type = BTRFS_CSUM_ITEM_KEY;
-       key.offset = new_size;
-again:
-       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-       if (ret < 0)
-               goto out;
-
-       if (path->nodes[1] == NULL) {
-               ret = 0;
-               goto out;
-       }
-       ret = 0;
-       btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
-       nritems = btrfs_header_nritems(path->nodes[1]);
-
-       if (!nritems)
-               goto out;
-
-       if (path->slots[1] >= nritems)
-               goto next_node;
-
-       /* did we find a key greater than anything we want to delete? */
-       if (found_key.objectid > inode->i_ino ||
-          (found_key.objectid == inode->i_ino && found_key.type > key.type))
-               goto out;
-
-       /* we check the next key in the node to make sure the leave contains
-        * only checksum items.  This comparison doesn't work if our
-        * leaf is the last one in the node
-        */
-       if (path->slots[1] + 1 >= nritems) {
-next_node:
-               /* search forward from the last key in the node, this
-                * will bring us into the next node in the tree
-                */
-               btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
-
-               /* unlikely, but we inc below, so check to be safe */
-               if (found_key.offset == (u64)-1)
-                       goto out;
-
-               /* search_forward needs a path with locks held, do the
-                * search again for the original key.  It is possible
-                * this will race with a balance and return a path that
-                * we could modify, but this drop is just an optimization
-                * and is allowed to miss some leaves.
-                */
-               btrfs_release_path(root, path);
-               found_key.offset++;
-
-               /* setup a max key for search_forward */
-               other_key.offset = (u64)-1;
-               other_key.type = key.type;
-               other_key.objectid = key.objectid;
-
-               path->keep_locks = 1;
-               ret = btrfs_search_forward(root, &found_key, &other_key,
-                                          path, 0, 0);
-               path->keep_locks = 0;
-               if (ret || found_key.objectid != key.objectid ||
-                   found_key.type != key.type) {
-                       ret = 0;
-                       goto out;
-               }
-
-               key.offset = found_key.offset;
-               btrfs_release_path(root, path);
-               cond_resched();
-               goto again;
-       }
-
-       /* we know there's one more slot after us in the tree,
-        * read that key so we can verify it is also a checksum item
-        */
-       btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
-
-       if (found_key.objectid < inode->i_ino)
-               goto next_key;
-
-       if (found_key.type != key.type || found_key.offset < new_size)
-               goto next_key;
-
-       /*
-        * if the key for the next leaf isn't a csum key from this objectid,
-        * we can't be sure there aren't good items inside this leaf.
-        * Bail out
-        */
-       if (other_key.objectid != inode->i_ino || other_key.type != key.type)
-               goto out;
-
-       leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
-       leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
-       /*
-        * it is safe to delete this leaf, it contains only
-        * csum items from this inode at an offset >= new_size
-        */
-       ret = btrfs_del_leaf(trans, root, path, leaf_start);
-       BUG_ON(ret);
-
-       if (root->ref_cows && leaf_gen < trans->transid) {
-               ref = btrfs_alloc_leaf_ref(root, 0);
-               if (ref) {
-                       ref->root_gen = root->root_key.offset;
-                       ref->bytenr = leaf_start;
-                       ref->owner = 0;
-                       ref->generation = leaf_gen;
-                       ref->nritems = 0;
-
-                       btrfs_sort_leaf_ref(ref);
-
-                       ret = btrfs_add_leaf_ref(root, ref, 0);
-                       WARN_ON(ret);
-                       btrfs_free_leaf_ref(root, ref);
-               } else {
-                       WARN_ON(1);
-               }
-       }
-next_key:
-       btrfs_release_path(root, path);
-
-       if (other_key.objectid == inode->i_ino &&
-           other_key.type == key.type && other_key.offset > key.offset) {
-               key.offset = other_key.offset;
-               cond_resched();
-               goto again;
-       }
-       ret = 0;
-out:
-       /* fixup any changes we've made to the path */
-       path->lowest_level = 0;
-       path->keep_locks = 0;
-       btrfs_release_path(root, path);
-       return ret;
-}
-
-#endif
-
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -3323,6 +3163,15 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        if (root->ref_cows || root == root->fs_info->tree_root)
                btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 
+       /*
+        * This function is also used to drop the items in the log tree before
+        * we relog the inode, so if root != BTRFS_I(inode)->root, it means
+        * it is used to drop the loged items. So we shouldn't kill the delayed
+        * items.
+        */
+       if (min_type == 0 && root == BTRFS_I(inode)->root)
+               btrfs_kill_delayed_inode_items(inode);
+
        path = btrfs_alloc_path();
        BUG_ON(!path);
        path->reada = -1;
@@ -3445,7 +3294,6 @@ search_again:
                                    btrfs_file_extent_calc_inline_size(size);
                                ret = btrfs_truncate_item(trans, root, path,
                                                          size, 1);
-                               BUG_ON(ret);
                        } else if (root->ref_cows) {
                                inode_sub_bytes(inode, item_end + 1 -
                                                found_key.offset);
@@ -3496,7 +3344,7 @@ delete:
                                BUG_ON(ret);
                                pending_del_nr = 0;
                        }
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
                        goto search_again;
                } else {
                        path->slots[0]--;
@@ -3654,7 +3502,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        while (1) {
                em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                block_end - cur_offset, 0);
-               BUG_ON(IS_ERR(em) || !em);
+               BUG_ON(IS_ERR_OR_NULL(em));
                last_byte = min(extent_map_end(em), block_end);
                last_byte = (last_byte + mask) & ~mask;
                if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3864,7 +3712,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(di))
                ret = PTR_ERR(di);
 
-       if (!di || IS_ERR(di))
+       if (IS_ERR_OR_NULL(di))
                goto out_err;
 
        btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
@@ -3922,7 +3770,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
        if (ret)
                goto out;
 
-       btrfs_release_path(root->fs_info->tree_root, path);
+       btrfs_release_path(path);
 
        new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
        if (IS_ERR(new_root)) {
@@ -4232,7 +4080,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
 
-static unsigned char btrfs_filetype_table[] = {
+unsigned char btrfs_filetype_table[] = {
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
@@ -4246,6 +4094,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
+       struct list_head ins_list;
+       struct list_head del_list;
        int ret;
        struct extent_buffer *leaf;
        int slot;
@@ -4258,6 +4108,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        char tmp_name[32];
        char *name_ptr;
        int name_len;
+       int is_curr = 0;        /* filp->f_pos points to the current index? */
 
        /* FIXME, use a real flag for deciding about the key type */
        if (root->fs_info->tree_root == root)
@@ -4280,8 +4131,16 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                filp->f_pos = 2;
        }
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
        path->reada = 2;
 
+       if (key_type == BTRFS_DIR_INDEX_KEY) {
+               INIT_LIST_HEAD(&ins_list);
+               INIT_LIST_HEAD(&del_list);
+               btrfs_get_delayed_items(inode, &ins_list, &del_list);
+       }
+
        btrfs_set_key_type(&key, key_type);
        key.offset = filp->f_pos;
        key.objectid = btrfs_ino(inode);
@@ -4311,8 +4170,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                        break;
                if (found_key.offset < filp->f_pos)
                        goto next;
+               if (key_type == BTRFS_DIR_INDEX_KEY &&
+                   btrfs_should_delete_dir_index(&del_list,
+                                                 found_key.offset))
+                       goto next;
 
                filp->f_pos = found_key.offset;
+               is_curr = 1;
 
                di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
                di_cur = 0;
@@ -4367,6 +4231,15 @@ next:
                path->slots[0]++;
        }
 
+       if (key_type == BTRFS_DIR_INDEX_KEY) {
+               if (is_curr)
+                       filp->f_pos++;
+               ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
+                                                     &ins_list);
+               if (ret)
+                       goto nopos;
+       }
+
        /* Reached end of directory/root. Bump pos past the last item. */
        if (key_type == BTRFS_DIR_INDEX_KEY)
                /*
@@ -4379,6 +4252,8 @@ next:
 nopos:
        ret = 0;
 err:
+       if (key_type == BTRFS_DIR_INDEX_KEY)
+               btrfs_put_delayed_items(&ins_list, &del_list);
        btrfs_free_path(path);
        return ret;
 }
@@ -4438,27 +4313,25 @@ void btrfs_dirty_inode(struct inode *inode)
                btrfs_end_transaction(trans, root);
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
-                       if (printk_ratelimit()) {
-                               printk(KERN_ERR "btrfs: fail to "
+                       printk_ratelimited(KERN_ERR "btrfs: fail to "
                                       "dirty  inode %llu error %ld\n",
                                       (unsigned long long)btrfs_ino(inode),
                                       PTR_ERR(trans));
-                       }
                        return;
                }
                btrfs_set_trans_block_group(trans, inode);
 
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
-                       if (printk_ratelimit()) {
-                               printk(KERN_ERR "btrfs: fail to "
+                       printk_ratelimited(KERN_ERR "btrfs: fail to "
                                       "dirty  inode %llu error %d\n",
                                       (unsigned long long)btrfs_ino(inode),
                                       ret);
-                       }
                }
        }
        btrfs_end_transaction(trans, root);
+       if (BTRFS_I(inode)->delayed_node)
+               btrfs_balance_delayed_items(root);
 }
 
 /*
@@ -4527,9 +4400,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
        int ret = 0;
 
        if (BTRFS_I(dir)->index_cnt == (u64)-1) {
-               ret = btrfs_set_inode_index_count(dir);
-               if (ret)
-                       return ret;
+               ret = btrfs_inode_delayed_dir_index_count(dir);
+               if (ret) {
+                       ret = btrfs_set_inode_index_count(dir);
+                       if (ret)
+                               return ret;
+               }
        }
 
        *index = BTRFS_I(dir)->index_cnt;
@@ -4701,7 +4577,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 
        if (ret == 0) {
                ret = btrfs_insert_dir_item(trans, root, name, name_len,
-                                           parent_ino, &key,
+                                           parent_inode, &key,
                                            btrfs_inode_type(inode), index);
                BUG_ON(ret);
 
@@ -5100,7 +4976,7 @@ again:
                else
                        goto out;
        }
-       em = alloc_extent_map(GFP_NOFS);
+       em = alloc_extent_map();
        if (!em) {
                err = -ENOMEM;
                goto out;
@@ -5254,7 +5130,7 @@ again:
                                kunmap(page);
                                free_extent_map(em);
                                em = NULL;
-                               btrfs_release_path(root, path);
+                               btrfs_release_path(path);
                                trans = btrfs_join_transaction(root, 1);
                                if (IS_ERR(trans))
                                        return ERR_CAST(trans);
@@ -5280,7 +5156,7 @@ not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
        set_bit(EXTENT_FLAG_VACANCY, &em->flags);
 insert:
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
                printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
                       "[%llu %llu]\n", (unsigned long long)em->start,
@@ -5413,7 +5289,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                u64 hole_start = start;
                u64 hole_len = len;
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                if (!em) {
                        err = -ENOMEM;
                        goto out;
@@ -5503,6 +5379,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
+       if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
+               btrfs_add_inode_defrag(trans, inode);
+
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5514,7 +5393,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        }
 
        if (!em) {
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                if (!em) {
                        em = ERR_PTR(-ENOMEM);
                        goto out;
@@ -6814,12 +6693,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
        ei->dummy_inode = 0;
+       ei->in_defrag = 0;
        ei->force_compress = BTRFS_COMPRESS_NONE;
 
+       ei->delayed_node = NULL;
+
        inode = &ei->vfs_inode;
-       extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
-       extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
-       extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
+       extent_map_tree_init(&ei->extent_tree);
+       extent_io_tree_init(&ei->io_tree, &inode->i_data);
+       extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        mutex_init(&ei->log_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
@@ -6906,6 +6788,7 @@ void btrfs_destroy_inode(struct inode *inode)
        inode_tree_del(inode);
        btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
+       btrfs_remove_delayed_node(inode);
        call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
 
@@ -7216,58 +7099,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
 
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
-                                  int sync)
-{
-       struct btrfs_inode *binode;
-       struct inode *inode = NULL;
-
-       spin_lock(&root->fs_info->delalloc_lock);
-       while (!list_empty(&root->fs_info->delalloc_inodes)) {
-               binode = list_entry(root->fs_info->delalloc_inodes.next,
-                                   struct btrfs_inode, delalloc_inodes);
-               inode = igrab(&binode->vfs_inode);
-               if (inode) {
-                       list_move_tail(&binode->delalloc_inodes,
-                                      &root->fs_info->delalloc_inodes);
-                       break;
-               }
-
-               list_del_init(&binode->delalloc_inodes);
-               cond_resched_lock(&root->fs_info->delalloc_lock);
-       }
-       spin_unlock(&root->fs_info->delalloc_lock);
-
-       if (inode) {
-               if (sync) {
-                       filemap_write_and_wait(inode->i_mapping);
-                       /*
-                        * We have to do this because compression doesn't
-                        * actually set PG_writeback until it submits the pages
-                        * for IO, which happens in an async thread, so we could
-                        * race and not actually wait for any writeback pages
-                        * because they've not been submitted yet.  Technically
-                        * this could still be the case for the ordered stuff
-                        * since the async thread may not have started to do its
-                        * work yet.  If this becomes the case then we need to
-                        * figure out a way to make sure that in writepage we
-                        * wait for any async pages to be submitted before
-                        * returning so that fdatawait does what its supposed to
-                        * do.
-                        */
-                       btrfs_wait_ordered_range(inode, 0, (u64)-1);
-               } else {
-                       filemap_flush(inode->i_mapping);
-               }
-               if (delay_iput)
-                       btrfs_add_delayed_iput(inode);
-               else
-                       iput(inode);
-               return 1;
-       }
-       return 0;
-}
-
 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                         const char *symname)
 {
@@ -7347,6 +7178,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                                      datasize);
        if (err) {
                drop_inode = 1;
+               btrfs_free_path(path);
                goto out_unlock;
        }
        leaf = path->nodes[0];