]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/transaction.c
Btrfs: Throttle file_write when data=ordered is flushing the inode
[karo-tx-linux.git] / fs / btrfs / transaction.c
index 02721eea9a7ac7bb8d3ce8f474f66f0584a801a2..c85cb48d95ee3dcd2aa1c8dbb1ad46971202563a 100644 (file)
@@ -33,7 +33,7 @@ static struct workqueue_struct *trans_wq;
 #define BTRFS_ROOT_TRANS_TAG 0
 #define BTRFS_ROOT_DEFRAG_TAG 1
 
-static void put_transaction(struct btrfs_transaction *transaction)
+static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
        WARN_ON(transaction->use_count == 0);
        transaction->use_count--;
@@ -46,7 +46,7 @@ static void put_transaction(struct btrfs_transaction *transaction)
        }
 }
 
-static int join_transaction(struct btrfs_root *root)
+static noinline int join_transaction(struct btrfs_root *root)
 {
        struct btrfs_transaction *cur_trans;
        cur_trans = root->fs_info->running_transaction;
@@ -57,6 +57,8 @@ static int join_transaction(struct btrfs_root *root)
                BUG_ON(!cur_trans);
                root->fs_info->generation++;
                root->fs_info->running_transaction = cur_trans;
+               root->fs_info->last_alloc = 0;
+               root->fs_info->last_data_alloc = 0;
                cur_trans->num_writers = 1;
                cur_trans->num_joined = 0;
                cur_trans->transid = root->fs_info->generation;
@@ -66,8 +68,10 @@ static int join_transaction(struct btrfs_root *root)
                cur_trans->use_count = 1;
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
+               INIT_LIST_HEAD(&cur_trans->pending_snapshots);
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
-               extent_map_tree_init(&cur_trans->dirty_pages,
+               btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
+               extent_io_tree_init(&cur_trans->dirty_pages,
                                     root->fs_info->btree_inode->i_mapping,
                                     GFP_NOFS);
        } else {
@@ -78,7 +82,7 @@ static int join_transaction(struct btrfs_root *root)
        return 0;
 }
 
-static int record_root_in_trans(struct btrfs_root *root)
+static noinline int record_root_in_trans(struct btrfs_root *root)
 {
        u64 running_trans_id = root->fs_info->running_transaction->transid;
        if (root->ref_cows && root->last_trans < running_trans_id) {
@@ -150,7 +154,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
        int ret;
        int err;
        int werr = 0;
-       struct extent_map_tree *dirty_pages;
+       struct extent_io_tree *dirty_pages;
        struct page *page;
        struct inode *btree_inode = root->fs_info->btree_inode;
        u64 start;
@@ -194,35 +198,48 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
        return werr;
 }
 
-int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root)
+static int update_cowonly_root(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
 {
        int ret;
-       u64 old_extent_block;
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       struct btrfs_root *tree_root = fs_info->tree_root;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       u64 old_root_bytenr;
+       struct btrfs_root *tree_root = root->fs_info->tree_root;
 
-       btrfs_write_dirty_block_groups(trans, extent_root);
+       btrfs_write_dirty_block_groups(trans, root);
        while(1) {
-               old_extent_block = btrfs_root_bytenr(&extent_root->root_item);
-               if (old_extent_block == extent_root->node->start)
+               old_root_bytenr = btrfs_root_bytenr(&root->root_item);
+               if (old_root_bytenr == root->node->start)
                        break;
-               btrfs_set_root_bytenr(&extent_root->root_item,
-                                     extent_root->node->start);
-               btrfs_set_root_level(&extent_root->root_item,
-                                    btrfs_header_level(extent_root->node));
+               btrfs_set_root_bytenr(&root->root_item,
+                                      root->node->start);
+               btrfs_set_root_level(&root->root_item,
+                                    btrfs_header_level(root->node));
                ret = btrfs_update_root(trans, tree_root,
-                                       &extent_root->root_key,
-                                       &extent_root->root_item);
+                                       &root->root_key,
+                                       &root->root_item);
                BUG_ON(ret);
-               btrfs_write_dirty_block_groups(trans, extent_root);
+               btrfs_write_dirty_block_groups(trans, root);
        }
        return 0;
 }
 
-static int wait_for_commit(struct btrfs_root *root,
-                          struct btrfs_transaction *commit)
+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct list_head *next;
+
+       while(!list_empty(&fs_info->dirty_cowonly_roots)) {
+               next = fs_info->dirty_cowonly_roots.next;
+               list_del_init(next);
+               root = list_entry(next, struct btrfs_root, dirty_list);
+               update_cowonly_root(trans, root);
+       }
+       return 0;
+}
+
+static noinline int wait_for_commit(struct btrfs_root *root,
+                                   struct btrfs_transaction *commit)
 {
        DEFINE_WAIT(wait);
        mutex_lock(&root->fs_info->trans_mutex);
@@ -261,9 +278,9 @@ int btrfs_add_dead_root(struct btrfs_root *root,
        return 0;
 }
 
-static int add_dirty_roots(struct btrfs_trans_handle *trans,
-                          struct radix_tree_root *radix,
-                          struct list_head *list)
+static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
+                                   struct radix_tree_root *radix,
+                                   struct list_head *list)
 {
        struct dirty_root *dirty;
        struct btrfs_root *gang[8];
@@ -402,8 +419,8 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
        return err;
 }
 
-static int drop_dirty_roots(struct btrfs_root *tree_root,
-                           struct list_head *list)
+static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
+                                    struct list_head *list)
 {
        struct dirty_root *dirty;
        struct btrfs_trans_handle *trans;
@@ -422,6 +439,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
 
                num_bytes = btrfs_root_used(&dirty->root->root_item);
                root = dirty->latest_root;
+               root->fs_info->throttles++;
 
                while(1) {
                        trans = btrfs_start_transaction(tree_root, 1);
@@ -445,6 +463,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                        mutex_lock(&tree_root->fs_info->fs_mutex);
                }
                BUG_ON(ret);
+               root->fs_info->throttles--;
 
                num_bytes -= btrfs_root_used(&dirty->root->root_item);
                bytes_used = btrfs_root_used(&root->root_item);
@@ -473,6 +492,142 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
        return ret;
 }
 
+int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       struct inode *inode;
+       u64 root_objectid = 0;
+       u64 objectid = 0;
+       int ret;
+
+       root->fs_info->throttles++;
+       while(1) {
+               ret = btrfs_find_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid, &inode);
+               if (!ret)
+                       break;
+
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+
+               if (S_ISREG(inode->i_mode)) {
+                       atomic_inc(&BTRFS_I(inode)->ordered_writeback);
+                       filemap_fdatawrite(inode->i_mapping);
+                       atomic_dec(&BTRFS_I(inode)->ordered_writeback);
+               }
+               iput(inode);
+
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       while(1) {
+               root_objectid = 0;
+               objectid = 0;
+               ret = btrfs_find_del_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid, &inode);
+               if (!ret)
+                       break;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+
+               if (S_ISREG(inode->i_mode)) {
+                       atomic_inc(&BTRFS_I(inode)->ordered_writeback);
+                       filemap_write_and_wait(inode->i_mapping);
+                       atomic_dec(&BTRFS_I(inode)->ordered_writeback);
+               }
+               atomic_dec(&inode->i_count);
+               iput(inode);
+
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       root->fs_info->throttles--;
+       return 0;
+}
+
+static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+                                  struct btrfs_fs_info *fs_info,
+                                  struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_key key;
+       struct btrfs_root_item *new_root_item;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *root = pending->root;
+       struct extent_buffer *tmp;
+       int ret;
+       u64 objectid;
+
+       new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
+       if (!new_root_item) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
+       if (ret)
+               goto fail;
+
+       memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+
+       key.objectid = objectid;
+       key.offset = 1;
+       btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+
+       extent_buffer_get(root->node);
+       btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
+       free_extent_buffer(tmp);
+
+       btrfs_copy_root(trans, root, root->node, &tmp, objectid);
+
+       btrfs_set_root_bytenr(new_root_item, tmp->start);
+       btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
+       ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
+                               new_root_item);
+       free_extent_buffer(tmp);
+       if (ret)
+               goto fail;
+
+       /*
+        * insert the directory item
+        */
+       key.offset = (u64)-1;
+       ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
+                                   pending->name, strlen(pending->name),
+                                   root->fs_info->sb->s_root->d_inode->i_ino,
+                                   &key, BTRFS_FT_DIR);
+
+       if (ret)
+               goto fail;
+
+       ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
+                            pending->name, strlen(pending->name), objectid,
+                            root->fs_info->sb->s_root->d_inode->i_ino);
+fail:
+       kfree(new_root_item);
+       return ret;
+}
+
+static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
+                                            struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_pending_snapshot *pending;
+       struct list_head *head = &trans->transaction->pending_snapshots;
+       int ret;
+
+       while(!list_empty(head)) {
+               pending = list_entry(head->next,
+                                    struct btrfs_pending_snapshot, list);
+               ret = create_pending_snapshot(trans, fs_info, pending);
+               BUG_ON(ret);
+               list_del(&pending->list);
+               kfree(pending->name);
+               kfree(pending);
+       }
+       return 0;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
@@ -480,8 +635,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
+       struct btrfs_root *chunk_root = root->fs_info->chunk_root;
        struct list_head dirty_fs_roots;
-       struct extent_map_tree *pinned_copy;
+       struct extent_io_tree *pinned_copy;
        DEFINE_WAIT(wait);
        int ret;
 
@@ -510,7 +666,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        if (!pinned_copy)
                return -ENOMEM;
 
-       extent_map_tree_init(pinned_copy,
+       extent_io_tree_init(pinned_copy,
                             root->fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        trans->transaction->in_commit = 1;
@@ -550,10 +706,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
+               ret = btrfs_write_ordered_inodes(trans, root);
+
        } while (cur_trans->num_writers > 1 ||
                 (cur_trans->num_joined != joined));
 
+       ret = create_pending_snapshots(trans, root->fs_info);
+       BUG_ON(ret);
+
        WARN_ON(cur_trans != trans->transaction);
+
        ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
                              &dirty_fs_roots);
        BUG_ON(ret);
@@ -562,7 +724,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        cur_trans = root->fs_info->running_transaction;
+       spin_lock(&root->fs_info->new_trans_lock);
        root->fs_info->running_transaction = NULL;
+       spin_unlock(&root->fs_info->new_trans_lock);
        btrfs_set_super_generation(&root->fs_info->super_copy,
                                   cur_trans->transid);
        btrfs_set_super_root(&root->fs_info->super_copy,
@@ -570,6 +734,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        btrfs_set_super_root_level(&root->fs_info->super_copy,
                           btrfs_header_level(root->fs_info->tree_root->node));
 
+       btrfs_set_super_chunk_root(&root->fs_info->super_copy,
+                                  chunk_root->node->start);
+       btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
+                                        btrfs_header_level(chunk_root->node));
        write_extent_buffer(root->fs_info->sb_buffer,
                            &root->fs_info->super_copy, 0,
                            sizeof(root->fs_info->super_copy));
@@ -681,7 +849,7 @@ void btrfs_transaction_flush_work(struct btrfs_root *root)
 
 void __init btrfs_init_transaction_sys(void)
 {
-       trans_wq = create_workqueue("btrfs");
+       trans_wq = create_workqueue("btrfs-transaction");
 }
 
 void btrfs_exit_transaction_sys(void)