]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/transaction.c
Btrfs: Throttle file_write when data=ordered is flushing the inode
[karo-tx-linux.git] / fs / btrfs / transaction.c
index 750f35a37aae8c01fe7b8c61bcf624e287693257..c85cb48d95ee3dcd2aa1c8dbb1ad46971202563a 100644 (file)
@@ -33,7 +33,7 @@ static struct workqueue_struct *trans_wq;
 #define BTRFS_ROOT_TRANS_TAG 0
 #define BTRFS_ROOT_DEFRAG_TAG 1
 
-static void put_transaction(struct btrfs_transaction *transaction)
+static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
        WARN_ON(transaction->use_count == 0);
        transaction->use_count--;
@@ -46,7 +46,7 @@ static void put_transaction(struct btrfs_transaction *transaction)
        }
 }
 
-static int join_transaction(struct btrfs_root *root)
+static noinline int join_transaction(struct btrfs_root *root)
 {
        struct btrfs_transaction *cur_trans;
        cur_trans = root->fs_info->running_transaction;
@@ -57,6 +57,8 @@ static int join_transaction(struct btrfs_root *root)
                BUG_ON(!cur_trans);
                root->fs_info->generation++;
                root->fs_info->running_transaction = cur_trans;
+               root->fs_info->last_alloc = 0;
+               root->fs_info->last_data_alloc = 0;
                cur_trans->num_writers = 1;
                cur_trans->num_joined = 0;
                cur_trans->transid = root->fs_info->generation;
@@ -66,8 +68,10 @@ static int join_transaction(struct btrfs_root *root)
                cur_trans->use_count = 1;
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
+               INIT_LIST_HEAD(&cur_trans->pending_snapshots);
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
-               extent_map_tree_init(&cur_trans->dirty_pages,
+               btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
+               extent_io_tree_init(&cur_trans->dirty_pages,
                                     root->fs_info->btree_inode->i_mapping,
                                     GFP_NOFS);
        } else {
@@ -78,7 +82,7 @@ static int join_transaction(struct btrfs_root *root)
        return 0;
 }
 
-static int record_root_in_trans(struct btrfs_root *root)
+static noinline int record_root_in_trans(struct btrfs_root *root)
 {
        u64 running_trans_id = root->fs_info->running_transaction->transid;
        if (root->ref_cows && root->last_trans < running_trans_id) {
@@ -150,7 +154,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
        int ret;
        int err;
        int werr = 0;
-       struct extent_map_tree *dirty_pages;
+       struct extent_io_tree *dirty_pages;
        struct page *page;
        struct inode *btree_inode = root->fs_info->btree_inode;
        u64 start;
@@ -169,7 +173,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
                while(start <= end) {
                        index = start >> PAGE_CACHE_SHIFT;
-                       start = (index + 1) << PAGE_CACHE_SHIFT;
+                       start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
                        page = find_lock_page(btree_inode->i_mapping, index);
                        if (!page)
                                continue;
@@ -194,34 +198,48 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
        return werr;
 }
 
-int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root)
+static int update_cowonly_root(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
 {
        int ret;
-       u64 old_extent_block;
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       struct btrfs_root *tree_root = fs_info->tree_root;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       u64 old_root_bytenr;
+       struct btrfs_root *tree_root = root->fs_info->tree_root;
 
-       btrfs_write_dirty_block_groups(trans, extent_root);
+       btrfs_write_dirty_block_groups(trans, root);
        while(1) {
-               old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
-               if (old_extent_block ==
-                   extent_buffer_blocknr(extent_root->node))
+               old_root_bytenr = btrfs_root_bytenr(&root->root_item);
+               if (old_root_bytenr == root->node->start)
                        break;
-               btrfs_set_root_blocknr(&extent_root->root_item,
-                              extent_buffer_blocknr(extent_root->node));
+               btrfs_set_root_bytenr(&root->root_item,
+                                      root->node->start);
+               btrfs_set_root_level(&root->root_item,
+                                    btrfs_header_level(root->node));
                ret = btrfs_update_root(trans, tree_root,
-                                       &extent_root->root_key,
-                                       &extent_root->root_item);
+                                       &root->root_key,
+                                       &root->root_item);
                BUG_ON(ret);
-               btrfs_write_dirty_block_groups(trans, extent_root);
+               btrfs_write_dirty_block_groups(trans, root);
        }
        return 0;
 }
 
-static int wait_for_commit(struct btrfs_root *root,
-                          struct btrfs_transaction *commit)
+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct list_head *next;
+
+       while(!list_empty(&fs_info->dirty_cowonly_roots)) {
+               next = fs_info->dirty_cowonly_roots.next;
+               list_del_init(next);
+               root = list_entry(next, struct btrfs_root, dirty_list);
+               update_cowonly_root(trans, root);
+       }
+       return 0;
+}
+
+static noinline int wait_for_commit(struct btrfs_root *root,
+                                   struct btrfs_transaction *commit)
 {
        DEFINE_WAIT(wait);
        mutex_lock(&root->fs_info->trans_mutex);
@@ -260,9 +278,9 @@ int btrfs_add_dead_root(struct btrfs_root *root,
        return 0;
 }
 
-static int add_dirty_roots(struct btrfs_trans_handle *trans,
-                          struct radix_tree_root *radix,
-                          struct list_head *list)
+static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
+                                   struct radix_tree_root *radix,
+                                   struct list_head *list)
 {
        struct dirty_root *dirty;
        struct btrfs_root *gang[8];
@@ -284,8 +302,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
                                     (unsigned long)root->root_key.objectid,
                                     BTRFS_ROOT_TRANS_TAG);
                        if (root->commit_root == root->node) {
-                               WARN_ON(extent_buffer_blocknr(root->node) !=
-                                       btrfs_root_blocknr(&root->root_item));
+                               WARN_ON(root->node->start !=
+                                       btrfs_root_bytenr(&root->root_item));
                                free_extent_buffer(root->commit_root);
                                root->commit_root = NULL;
 
@@ -314,8 +332,10 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
                        root->commit_root = NULL;
 
                        root->root_key.offset = root->fs_info->generation;
-                       btrfs_set_root_blocknr(&root->root_item,
-                                      extent_buffer_blocknr(root->node));
+                       btrfs_set_root_bytenr(&root->root_item,
+                                             root->node->start);
+                       btrfs_set_root_level(&root->root_item,
+                                            btrfs_header_level(root->node));
                        err = btrfs_insert_root(trans, root->fs_info->tree_root,
                                                &root->root_key,
                                                &root->root_item);
@@ -350,7 +370,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 
        if (root->defrag_running)
                return 0;
-
        trans = btrfs_start_transaction(root, 1);
        while (1) {
                root->defrag_running = 1;
@@ -358,7 +377,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
                mutex_unlock(&info->fs_mutex);
-
                btrfs_btree_balance_dirty(info->tree_root, nr);
                cond_resched();
 
@@ -394,21 +412,21 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
                for (i = 0; i < ret; i++) {
                        root = gang[i];
                        last = root->root_key.objectid + 1;
-                       // btrfs_defrag_root(root, 1);
+                       btrfs_defrag_root(root, 1);
                }
        }
-       // btrfs_defrag_root(info->extent_root, 1);
+       btrfs_defrag_root(info->extent_root, 1);
        return err;
 }
 
-static int drop_dirty_roots(struct btrfs_root *tree_root,
-                           struct list_head *list)
+static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
+                                    struct list_head *list)
 {
        struct dirty_root *dirty;
        struct btrfs_trans_handle *trans;
        unsigned long nr;
-       u64 num_blocks;
-       u64 blocks_used;
+       u64 num_bytes;
+       u64 bytes_used;
        int ret = 0;
        int err;
 
@@ -419,8 +437,9 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                dirty = list_entry(list->next, struct dirty_root, list);
                list_del_init(&dirty->list);
 
-               num_blocks = btrfs_root_used(&dirty->root->root_item);
+               num_bytes = btrfs_root_used(&dirty->root->root_item);
                root = dirty->latest_root;
+               root->fs_info->throttles++;
 
                while(1) {
                        trans = btrfs_start_transaction(tree_root, 1);
@@ -440,18 +459,18 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                        BUG_ON(ret);
                        mutex_unlock(&tree_root->fs_info->fs_mutex);
                        btrfs_btree_balance_dirty(tree_root, nr);
-                       schedule();
-
+                       cond_resched();
                        mutex_lock(&tree_root->fs_info->fs_mutex);
                }
                BUG_ON(ret);
+               root->fs_info->throttles--;
 
-               num_blocks -= btrfs_root_used(&dirty->root->root_item);
-               blocks_used = btrfs_root_used(&root->root_item);
-               if (num_blocks) {
+               num_bytes -= btrfs_root_used(&dirty->root->root_item);
+               bytes_used = btrfs_root_used(&root->root_item);
+               if (num_bytes) {
                        record_root_in_trans(root);
                        btrfs_set_root_used(&root->root_item,
-                                                  blocks_used - num_blocks);
+                                           bytes_used - num_bytes);
                }
                ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
                if (ret) {
@@ -462,16 +481,153 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                ret = btrfs_end_transaction(trans, tree_root);
                BUG_ON(ret);
 
+               free_extent_buffer(dirty->root->node);
                kfree(dirty->root);
                kfree(dirty);
                mutex_unlock(&tree_root->fs_info->fs_mutex);
 
                btrfs_btree_balance_dirty(tree_root, nr);
-               schedule();
+               cond_resched();
        }
        return ret;
 }
 
+int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       struct inode *inode;
+       u64 root_objectid = 0;
+       u64 objectid = 0;
+       int ret;
+
+       root->fs_info->throttles++;
+       while(1) {
+               ret = btrfs_find_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid, &inode);
+               if (!ret)
+                       break;
+
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+
+               if (S_ISREG(inode->i_mode)) {
+                       atomic_inc(&BTRFS_I(inode)->ordered_writeback);
+                       filemap_fdatawrite(inode->i_mapping);
+                       atomic_dec(&BTRFS_I(inode)->ordered_writeback);
+               }
+               iput(inode);
+
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       while(1) {
+               root_objectid = 0;
+               objectid = 0;
+               ret = btrfs_find_del_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid, &inode);
+               if (!ret)
+                       break;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+
+               if (S_ISREG(inode->i_mode)) {
+                       atomic_inc(&BTRFS_I(inode)->ordered_writeback);
+                       filemap_write_and_wait(inode->i_mapping);
+                       atomic_dec(&BTRFS_I(inode)->ordered_writeback);
+               }
+               atomic_dec(&inode->i_count);
+               iput(inode);
+
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       root->fs_info->throttles--;
+       return 0;
+}
+
+static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+                                  struct btrfs_fs_info *fs_info,
+                                  struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_key key;
+       struct btrfs_root_item *new_root_item;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *root = pending->root;
+       struct extent_buffer *tmp;
+       int ret;
+       u64 objectid;
+
+       new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
+       if (!new_root_item) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
+       if (ret)
+               goto fail;
+
+       memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+
+       key.objectid = objectid;
+       key.offset = 1;
+       btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+
+       extent_buffer_get(root->node);
+       btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
+       free_extent_buffer(tmp);
+
+       btrfs_copy_root(trans, root, root->node, &tmp, objectid);
+
+       btrfs_set_root_bytenr(new_root_item, tmp->start);
+       btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
+       ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
+                               new_root_item);
+       free_extent_buffer(tmp);
+       if (ret)
+               goto fail;
+
+       /*
+        * insert the directory item
+        */
+       key.offset = (u64)-1;
+       ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
+                                   pending->name, strlen(pending->name),
+                                   root->fs_info->sb->s_root->d_inode->i_ino,
+                                   &key, BTRFS_FT_DIR);
+
+       if (ret)
+               goto fail;
+
+       ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
+                            pending->name, strlen(pending->name), objectid,
+                            root->fs_info->sb->s_root->d_inode->i_ino);
+fail:
+       kfree(new_root_item);
+       return ret;
+}
+
+static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
+                                            struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_pending_snapshot *pending;
+       struct list_head *head = &trans->transaction->pending_snapshots;
+       int ret;
+
+       while(!list_empty(head)) {
+               pending = list_entry(head->next,
+                                    struct btrfs_pending_snapshot, list);
+               ret = create_pending_snapshot(trans, fs_info, pending);
+               BUG_ON(ret);
+               list_del(&pending->list);
+               kfree(pending->name);
+               kfree(pending);
+       }
+       return 0;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
@@ -479,12 +635,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
+       struct btrfs_root *chunk_root = root->fs_info->chunk_root;
        struct list_head dirty_fs_roots;
-       struct radix_tree_root pinned_copy;
+       struct extent_io_tree *pinned_copy;
        DEFINE_WAIT(wait);
        int ret;
 
-       init_bit_radix(&pinned_copy);
        INIT_LIST_HEAD(&dirty_fs_roots);
 
        mutex_lock(&root->fs_info->trans_mutex);
@@ -505,6 +661,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                mutex_lock(&root->fs_info->fs_mutex);
                return 0;
        }
+
+       pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
+       if (!pinned_copy)
+               return -ENOMEM;
+
+       extent_io_tree_init(pinned_copy,
+                            root->fs_info->btree_inode->i_mapping, GFP_NOFS);
+
        trans->transaction->in_commit = 1;
        cur_trans = trans->transaction;
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -542,10 +706,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
+               ret = btrfs_write_ordered_inodes(trans, root);
+
        } while (cur_trans->num_writers > 1 ||
                 (cur_trans->num_joined != joined));
 
+       ret = create_pending_snapshots(trans, root->fs_info);
+       BUG_ON(ret);
+
        WARN_ON(cur_trans != trans->transaction);
+
        ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
                              &dirty_fs_roots);
        BUG_ON(ret);
@@ -554,26 +724,38 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        cur_trans = root->fs_info->running_transaction;
+       spin_lock(&root->fs_info->new_trans_lock);
        root->fs_info->running_transaction = NULL;
+       spin_unlock(&root->fs_info->new_trans_lock);
        btrfs_set_super_generation(&root->fs_info->super_copy,
                                   cur_trans->transid);
        btrfs_set_super_root(&root->fs_info->super_copy,
-                    extent_buffer_blocknr(root->fs_info->tree_root->node));
-
+                            root->fs_info->tree_root->node->start);
+       btrfs_set_super_root_level(&root->fs_info->super_copy,
+                          btrfs_header_level(root->fs_info->tree_root->node));
+
+       btrfs_set_super_chunk_root(&root->fs_info->super_copy,
+                                  chunk_root->node->start);
+       btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
+                                        btrfs_header_level(chunk_root->node));
        write_extent_buffer(root->fs_info->sb_buffer,
                            &root->fs_info->super_copy, 0,
                            sizeof(root->fs_info->super_copy));
 
-       btrfs_copy_pinned(root, &pinned_copy);
+       btrfs_copy_pinned(root, pinned_copy);
 
        mutex_unlock(&root->fs_info->trans_mutex);
        mutex_unlock(&root->fs_info->fs_mutex);
        ret = btrfs_write_and_wait_transaction(trans, root);
        BUG_ON(ret);
        write_ctree_super(trans, root);
+
        mutex_lock(&root->fs_info->fs_mutex);
-       btrfs_finish_extent_commit(trans, root, &pinned_copy);
+       btrfs_finish_extent_commit(trans, root, pinned_copy);
        mutex_lock(&root->fs_info->trans_mutex);
+
+       kfree(pinned_copy);
+
        cur_trans->commit_done = 1;
        root->fs_info->last_trans_committed = cur_trans->transid;
        wake_up(&cur_trans->commit_wait);
@@ -610,12 +792,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
        }
        return 0;
 }
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+void btrfs_transaction_cleaner(void *p)
+#else
 void btrfs_transaction_cleaner(struct work_struct *work)
+#endif
 {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       struct btrfs_fs_info *fs_info = p;
+#else
        struct btrfs_fs_info *fs_info = container_of(work,
                                                     struct btrfs_fs_info,
                                                     trans_work.work);
 
+#endif
        struct btrfs_root *root = fs_info->tree_root;
        struct btrfs_transaction *cur;
        struct btrfs_trans_handle *trans;
@@ -659,10 +849,10 @@ void btrfs_transaction_flush_work(struct btrfs_root *root)
 
 void __init btrfs_init_transaction_sys(void)
 {
-       trans_wq = create_workqueue("btrfs");
+       trans_wq = create_workqueue("btrfs-transaction");
 }
 
-void __exit btrfs_exit_transaction_sys(void)
+void btrfs_exit_transaction_sys(void)
 {
        destroy_workqueue(trans_wq);
 }