]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
ext4/jbd2: don't wait (forever) for stale tid caused by wraparound
authorTheodore Ts'o <tytso@mit.edu>
Thu, 4 Apr 2013 02:02:52 +0000 (22:02 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 8 May 2013 03:08:24 +0000 (20:08 -0700)
commit d76a3a77113db020d9bb1e894822869410450bd9 upstream.

In the case where an inode has a very stale transaction id (tid) in
i_datasync_tid or i_sync_tid, it's possible that after a very large
(2**31) number of transactions, that the tid number space might wrap,
causing tid_geq()'s calculations to fail.

Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified
by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily",
attempted to fix this problem, but it only avoided kjournald spinning
forever by fixing the logic in jbd2_log_start_commit().

Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c
that might call jbd2_log_start_commit() with a stale tid, those
functions will subsequently call jbd2_log_wait_commit() with the same
stale tid, and then wait for a very long time.  To fix this, we
replace the calls to jbd2_log_start_commit() and
jbd2_log_wait_commit() with a call to a new function,
jbd2_complete_transaction(), which will correctly handle stale tid's.

As a bonus, jbd2_complete_transaction() will avoid locking
j_state_lock for writing unless a commit needs to be started.  This
should have a small (but probably not measurable) improvement for
ext4's scalability.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Reported-by: George Barnett <gbarnett@atlassian.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/ext4/fsync.c
fs/ext4/inode.c
fs/jbd2/journal.c
include/linux/jbd2.h

index 3278e64e57b61ac51a41db3ceebeecd21003985a..e0ba8a408def07583b9cd04a57259e3c56183057 100644 (file)
@@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (journal->j_flags & JBD2_BARRIER &&
            !jbd2_trans_will_send_data_barrier(journal, commit_tid))
                needs_barrier = true;
-       jbd2_log_start_commit(journal, commit_tid);
-       ret = jbd2_log_wait_commit(journal, commit_tid);
+       ret = jbd2_complete_transaction(journal, commit_tid);
        if (needs_barrier) {
                err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
                if (!ret)
index 22c5c67ab4d1d49422eda0351038128c65568500..0dbc84a13385daed24b621c3bff6ced2a73a01ce 100644 (file)
@@ -216,8 +216,7 @@ void ext4_evict_inode(struct inode *inode)
                        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
                        tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
-                       jbd2_log_start_commit(journal, commit_tid);
-                       jbd2_log_wait_commit(journal, commit_tid);
+                       jbd2_complete_transaction(journal, commit_tid);
                        filemap_write_and_wait(&inode->i_data);
                }
                truncate_inode_pages(&inode->i_data, 0);
index dbf41f9452db602efdebb778f86cf544b28d0399..42f8cf6cd5da8ebc216ff8636d3ee389a0a94db0 100644 (file)
@@ -697,6 +697,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
        return err;
 }
 
+/*
+ * When this function returns the transaction corresponding to tid
+ * will be completed.  If the transaction has currently running, start
+ * committing that transaction before waiting for it to complete.  If
+ * the transaction id is stale, it is by definition already completed,
+ * so just return SUCCESS.
+ */
+int jbd2_complete_transaction(journal_t *journal, tid_t tid)
+{
+       int     need_to_wait = 1;
+
+       read_lock(&journal->j_state_lock);
+       if (journal->j_running_transaction &&
+           journal->j_running_transaction->t_tid == tid) {
+               if (journal->j_commit_request != tid) {
+                       /* transaction not yet started, so request it */
+                       read_unlock(&journal->j_state_lock);
+                       jbd2_log_start_commit(journal, tid);
+                       goto wait_commit;
+               }
+       } else if (!(journal->j_committing_transaction &&
+                    journal->j_committing_transaction->t_tid == tid))
+               need_to_wait = 0;
+       read_unlock(&journal->j_state_lock);
+       if (!need_to_wait)
+               return 0;
+wait_commit:
+       return jbd2_log_wait_commit(journal, tid);
+}
+EXPORT_SYMBOL(jbd2_complete_transaction);
+
 /*
  * Log buffer allocation routines:
  */
index e30b66346942a90a4c79cdc5a0362b3899db0521..1ac5255d57cabaca498c7d135bae4396d66778de 100644 (file)
@@ -1210,6 +1210,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
 int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
 int jbd2_journal_force_commit_nested(journal_t *journal);
 int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
+int jbd2_complete_transaction(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);