Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

[karo-tx-linux.git] / fs / ext4 / super.c
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 056474b7b8e0a364f0dff94912b47f0000692118..8553dfb310afd7ac2209d186125287e99a867f61 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
   * journal_end calls result in the superblock being marked dirty, so
   * that sync() will call the filesystem's write_super callback if
   * appropriate.
+ *
+ * To avoid j_barrier hold in userspace when a user calls freeze(),
+ * ext4 prevents a new handle from being started by s_frozen, which
+ * is in an upper layer.
   */
  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
  {
         journal_t *journal;
+       handle_t  *handle;
  
         if (sb->s_flags & MS_RDONLY)
                 return ERR_PTR(-EROFS);
  
-       vfs_check_frozen(sb, SB_FREEZE_TRANS);
-       /* Special case here: if the journal has aborted behind our
-        * backs (eg. EIO in the commit thread), then we still need to
-        * take the FS itself readonly cleanly. */
         journal = EXT4_SB(sb)->s_journal;
-       if (journal) {
-               if (is_journal_aborted(journal)) {
-                       ext4_abort(sb, "Detected aborted journal");
-                       return ERR_PTR(-EROFS);
-               }
-               return jbd2_journal_start(journal, nblocks);
+       handle = ext4_journal_current_handle();
+
+       /*
+        * If a handle has been started, it should be allowed to
+        * finish, otherwise deadlock could happen between freeze
+        * and others(e.g. truncate) due to the restart of the
+        * journal handle if the filesystem is forzen and active
+        * handles are not stopped.
+        */
+       if (!handle)
+               vfs_check_frozen(sb, SB_FREEZE_TRANS);
+
+       if (!journal)
+               return ext4_get_nojournal();
+       /*
+        * Special case here: if the journal has aborted behind our
+        * backs (eg. EIO in the commit thread), then we still need to
+        * take the FS itself readonly cleanly.
+        */
+       if (is_journal_aborted(journal)) {
+               ext4_abort(sb, "Detected aborted journal");
+               return ERR_PTR(-EROFS);
         }
-       return ext4_get_nojournal();
+       return jbd2_journal_start(journal, nblocks);
  }
  
  /*
@@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
         mutex_unlock(&ext4_li_info->li_list_mtx);
  
         sbi->s_li_request = elr;
+       /*
+        * set elr to NULL here since it has been inserted to
+        * the request_list and the removal and free of it is
+        * handled by ext4_clear_request_list from now on.
+        */
+       elr = NULL;
  
         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
                 ret = ext4_run_lazyinit_thread();
@@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
         spin_lock_init(&sbi->s_next_gen_lock);
  
+       init_timer(&sbi->s_err_report);
+       sbi->s_err_report.function = print_daily_error_info;
+       sbi->s_err_report.data = (unsigned long) sb;
+
         err = percpu_counter_init(&sbi->s_freeblocks_counter,
                         ext4_count_free_blocks(sb));
         if (!err) {
@@ -3646,9 +3673,6 @@ no_journal:
                  "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
                  *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
  
-       init_timer(&sbi->s_err_report);
-       sbi->s_err_report.function = print_daily_error_info;
-       sbi->s_err_report.data = (unsigned long) sb;
         if (es->s_error_count)
                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
  
@@ -3672,6 +3696,7 @@ failed_mount_wq:
                 sbi->s_journal = NULL;
         }
  failed_mount3:
+       del_timer(&sbi->s_err_report);
         if (sbi->s_flex_groups) {
                 if (is_vmalloc_addr(sbi->s_flex_groups))
                         vfree(sbi->s_flex_groups);
@@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
  /*
   * LVM calls this function before a (read-only) snapshot is created.  This
   * gives us a chance to flush the journal completely and mark the fs clean.
+ *
+ * Note that only this function cannot bring a filesystem to be in a clean
+ * state independently, because ext4 prevents a new handle from being started
+ * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
+ * the upper layer.
   */
  static int ext4_freeze(struct super_block *sb)
  {
@@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
  
  static int ext4_quota_off(struct super_block *sb, int type)
  {
+       struct inode *inode = sb_dqopt(sb)->files[type];
+       handle_t *handle;
+
         /* Force all delayed allocation blocks to be allocated.
          * Caller already holds s_umount sem */
         if (test_opt(sb, DELALLOC))
                 sync_filesystem(sb);
  
+       /* Update modification times of quota files when userspace can
+        * start looking at them */
+       handle = ext4_journal_start(inode, 1);
+       if (IS_ERR(handle))
+               goto out;
+       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+out:
         return dquot_quota_off(sb, type);
  }
  
@@ -4714,9 +4757,8 @@ out:
         if (inode->i_size < off + len) {
                 i_size_write(inode, off + len);
                 EXT4_I(inode)->i_disksize = inode->i_size;
+               ext4_mark_inode_dirty(handle, inode);
         }
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       ext4_mark_inode_dirty(handle, inode);
         mutex_unlock(&inode->i_mutex);
         return len;
  }