From 8b050d350c7846462a21e9e054c9154ede9b43cf Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Feb 2014 18:08:58 +0800 Subject: [PATCH] Btrfs: fix skipped error handle when log sync failed It is possible that many tasks sync the log tree at the same time, but only one task can do the sync work, the others will wait for it. But those wait tasks didn't get the result of the log sync, and returned 0 when they ended the wait. It caused those tasks skipped the error handle, and the serious problem was they told the users the file sync succeeded but in fact they failed. This patch fixes this problem by introducing a log context structure, we insert it into the a global list. When the sync fails, we will set the error number of every log context in the list, then the waiting tasks get the error number of the log context and handle the error if need. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 2 + fs/btrfs/file.c | 9 ++-- fs/btrfs/tree-log.c | 114 ++++++++++++++++++++++++++++++++++---------- fs/btrfs/tree-log.h | 16 ++++++- 5 files changed, 111 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 70c03f5f0953..906410719acb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1718,6 +1718,7 @@ struct btrfs_root { struct mutex log_mutex; wait_queue_head_t log_writer_wait; wait_queue_head_t log_commit_wait[2]; + struct list_head log_ctxs[2]; atomic_t log_writers; atomic_t log_commit[2]; atomic_t log_batch; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cc1b4237dc62..44f52d280b7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1200,6 +1200,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); + INIT_LIST_HEAD(&root->log_ctxs[0]); + INIT_LIST_HEAD(&root->log_ctxs[1]); atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 006af2f4dd98..6acccc4a7f2a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1864,8 +1864,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret = 0; struct btrfs_trans_handle *trans; + struct btrfs_log_ctx ctx; + int ret = 0; bool full_sync = 0; trace_btrfs_sync_file(file, datasync); @@ -1959,7 +1960,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - ret = btrfs_log_dentry_safe(trans, root, dentry); + btrfs_init_log_ctx(&ctx); + + ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ ret = 1; @@ -1979,7 +1982,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret != BTRFS_NO_LOG_SYNC) { if (!ret) { - ret = btrfs_sync_log(trans, root); + ret = btrfs_sync_log(trans, root, &ctx); if (!ret) { ret = btrfs_end_transaction(trans, root); goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 128a904ceac0..da6da274dce3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -136,8 +136,10 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, * syncing the tree wait for us to finish */ static int start_log_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, + struct btrfs_log_ctx *ctx) { + int index; int ret; mutex_lock(&root->log_mutex); @@ -151,6 +153,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans, atomic_inc(&root->log_batch); atomic_inc(&root->log_writers); + if (ctx) { + index = root->log_transid % 2; + list_add_tail(&ctx->list, &root->log_ctxs[index]); + } mutex_unlock(&root->log_mutex); return 0; } @@ -172,6 +178,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans, root->log_start_pid = current->pid; atomic_inc(&root->log_batch); atomic_inc(&root->log_writers); + if (ctx) { + index = root->log_transid % 2; + list_add_tail(&ctx->list, &root->log_ctxs[index]); + } out: mutex_unlock(&root->log_mutex); return ret; @@ -2361,12 +2371,11 @@ static int update_log_root(struct btrfs_trans_handle *trans, return ret; } -static int wait_log_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int transid) +static void wait_log_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int transid) { DEFINE_WAIT(wait); int index = transid % 2; - int ret = 0; /* * we only allow two pending log transactions at a time, @@ -2374,12 +2383,6 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, * current transaction, we're done */ do { - if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == - trans->transid) { - ret = -EAGAIN; - break; - } - prepare_to_wait(&root->log_commit_wait[index], &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); @@ -2392,27 +2395,55 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, mutex_lock(&root->log_mutex); } while (root->log_transid < transid + 2 && atomic_read(&root->log_commit[index])); - - return ret; } static void wait_for_writer(struct btrfs_trans_handle *trans, struct btrfs_root *root) { DEFINE_WAIT(wait); - while (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) != - trans->transid && atomic_read(&root->log_writers)) { + + while (atomic_read(&root->log_writers)) { prepare_to_wait(&root->log_writer_wait, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); - if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) != - trans->transid && atomic_read(&root->log_writers)) + if (atomic_read(&root->log_writers)) schedule(); mutex_lock(&root->log_mutex); finish_wait(&root->log_writer_wait, &wait); } } +static inline void btrfs_remove_log_ctx(struct btrfs_root *root, + struct btrfs_log_ctx *ctx) +{ + if (!ctx) + return; + + mutex_lock(&root->log_mutex); + list_del_init(&ctx->list); + mutex_unlock(&root->log_mutex); +} + +/* + * Invoked in log mutex context, or be sure there is no other task which + * can access the list. + */ +static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, + int index, int error) +{ + struct btrfs_log_ctx *ctx; + + if (!error) { + INIT_LIST_HEAD(&root->log_ctxs[index]); + return; + } + + list_for_each_entry(ctx, &root->log_ctxs[index], list) + ctx->log_ret = error; + + INIT_LIST_HEAD(&root->log_ctxs[index]); +} + /* * btrfs_sync_log does sends a given tree log down to the disk and * updates the super blocks to record it. When this call is done, @@ -2426,7 +2457,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, * that has happened. */ int btrfs_sync_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, struct btrfs_log_ctx *ctx) { int index1; int index2; @@ -2435,15 +2466,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; int log_transid = 0; + struct btrfs_log_ctx root_log_ctx; struct blk_plug plug; mutex_lock(&root->log_mutex); log_transid = root->log_transid; index1 = root->log_transid % 2; if (atomic_read(&root->log_commit[index1])) { - ret = wait_log_commit(trans, root, root->log_transid); + wait_log_commit(trans, root, root->log_transid); mutex_unlock(&root->log_mutex); - return ret; + return ctx->log_ret; } atomic_set(&root->log_commit[index1], 1); @@ -2534,13 +2566,18 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } index2 = log_root_tree->log_transid % 2; + + btrfs_init_log_ctx(&root_log_ctx); + list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + if (atomic_read(&log_root_tree->log_commit[index2])) { blk_finish_plug(&plug); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); - ret = wait_log_commit(trans, log_root_tree, - log_root_tree->log_transid); + wait_log_commit(trans, log_root_tree, + log_root_tree->log_transid); btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); + ret = root_log_ctx.log_ret; goto out; } atomic_set(&log_root_tree->log_commit[index2], 1); @@ -2609,12 +2646,31 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&root->log_mutex); out_wake_log_root: + /* + * We needn't get log_mutex here because we are sure all + * the other tasks are blocked. + */ + btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); + + /* + * It is dangerous if log_commit is changed before we set + * ->log_ret of log ctx. Because the readers may not get + * the return value. + */ + smp_wmb(); + atomic_set(&log_root_tree->log_commit[index2], 0); smp_mb(); if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) wake_up(&log_root_tree->log_commit_wait[index2]); out: + /* See above. */ + btrfs_remove_all_log_ctxs(root, index1, ret); + + /* See above. */ + smp_wmb(); atomic_set(&root->log_commit[index1], 0); + smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); @@ -4076,7 +4132,8 @@ out: */ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only) + struct dentry *parent, int exists_only, + struct btrfs_log_ctx *ctx) { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; @@ -4113,7 +4170,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_no_trans; } - ret = start_log_trans(trans, root); + ret = start_log_trans(trans, root, ctx); if (ret) goto end_no_trans; @@ -4163,6 +4220,9 @@ end_trans: root->fs_info->last_trans_log_full_commit = trans->transid; ret = 1; } + + if (ret) + btrfs_remove_log_ctx(root, ctx); btrfs_end_log_trans(root); end_no_trans: return ret; @@ -4175,12 +4235,14 @@ end_no_trans: * data on disk. */ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct dentry *dentry) + struct btrfs_root *root, struct dentry *dentry, + struct btrfs_log_ctx *ctx) { struct dentry *parent = dget_parent(dentry); int ret; - ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); + ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, + 0, ctx); dput(parent); return ret; @@ -4417,6 +4479,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, root->fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 1); + return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..59c1edb31d19 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -22,14 +22,26 @@ /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ #define BTRFS_NO_LOG_SYNC 256 +struct btrfs_log_ctx { + int log_ret; + struct list_head list; +}; + +static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) +{ + ctx->log_ret = 0; + INIT_LIST_HEAD(&ctx->list); +} + int btrfs_sync_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct btrfs_log_ctx *ctx); int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct dentry *dentry); + struct btrfs_root *root, struct dentry *dentry, + struct btrfs_log_ctx *ctx); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, -- 2.39.5