Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c

index feaf498feaa68ee709ea216de5ef87b85d209bcd..5e2ed4504eadd85401d9f86d5e3784ecd2e2e7c1 100644 (file)
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
                                 return error;
                         else {
                                 inode->i_mode = mode;
+                               inode->i_ctime = ext4_current_time(inode);
                                 ext4_mark_inode_dirty(handle, inode);
                                 if (error == 0)
                                         acl = NULL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index 95b7594c76f909d5b029e9d80001f90ef99e45a4..bd30799a43ed0c527a80b024c356eb55e457f953 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -377,14 +377,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
         ext4_grpblk_t bit;
         unsigned int i;
         struct ext4_group_desc *desc;
-       struct ext4_super_block *es;
-       struct ext4_sb_info *sbi;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
         int err = 0, ret, blk_free_count;
         ext4_grpblk_t blocks_freed;
         struct ext4_group_info *grp;
  
-       sbi = EXT4_SB(sb);
-       es = sbi->s_es;
         ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
  
         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -477,7 +474,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
         ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
         if (!err)
                 err = ret;
-       sb->s_dirt = 1;
  
  error_return:
         brelse(bitmap_bh);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c

index 5b6973fbf1bdde32bb1c2a4b6ff73a9d2df9d692..3db5084db9bd06c2d492cd76dd2a014e7bb07bf6 100644 (file)
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
  
         if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
             (start_blk + count < start_blk) ||
-           (start_blk + count > ext4_blocks_count(sbi->s_es)))
+           (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+               sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                 return 0;
+       }
         while (n) {
                 entry = rb_entry(n, struct ext4_system_zone, node);
                 if (start_blk + count - 1 < entry->start_blk)
                         n = n->rb_left;
                 else if (start_blk >= (entry->start_blk + entry->count))
                         n = n->rb_right;
-               else
+               else {
+                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                         return 0;
+               }
         }
         return 1;
  }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c

index ea5e6cb7e2a5e345b514d10b5f39a54ec021dbfe..374510f72baaaa02b3f8768ee4b5d8261ab1080c 100644 (file)
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
  }
  
  
-int ext4_check_dir_entry(const char *function, struct inode *dir,
-                        struct ext4_dir_entry_2 *de,
-                        struct buffer_head *bh,
-                        unsigned int offset)
+int __ext4_check_dir_entry(const char *function, unsigned int line,
+                          struct inode *dir,
+                          struct ext4_dir_entry_2 *de,
+                          struct buffer_head *bh,
+                          unsigned int offset)
  {
         const char *error_msg = NULL;
         const int rlen = ext4_rec_len_from_disk(de->rec_len,
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
                 error_msg = "inode out of bounds";
  
         if (error_msg != NULL)
-               ext4_error_inode(function, dir,
-                       "bad entry in directory: %s - block=%llu"
+               ext4_error_inode(dir, function, line, bh->b_blocknr,
+                       "bad entry in directory: %s - "
                         "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
-                       error_msg, (unsigned long long) bh->b_blocknr,
-                       (unsigned) (offset%bh->b_size), offset,
+                       error_msg, (unsigned) (offset%bh->b_size), offset,
                         le32_to_cpu(de->inode),
                         rlen, de->name_len);
         return error_msg == NULL ? 1 : 0;
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp,
                  * We don't set the inode dirty flag since it's not
                  * critical that it get flushed back to the disk.
                  */
-               ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
+               ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
+                                     EXT4_INODE_INDEX);
         }
         stored = 0;
         offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -193,7 +194,7 @@ revalidate:
                 while (!error && filp->f_pos < inode->i_size
                        && offset < sb->s_blocksize) {
                         de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
-                       if (!ext4_check_dir_entry("ext4_readdir", inode, de,
+                       if (!ext4_check_dir_entry(inode, de,
                                                   bh, offset)) {
                                 /*
                                  * On error, skip the f_pos to the next block
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
         struct dir_private_info *info;
         int len;
  
-       info = (struct dir_private_info *) dir_file->private_data;
+       info = dir_file->private_data;
         p = &info->root.rb_node;
  
         /* Create and allocate the fname structure */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 19a4de57128ad7e493ec7843050d1fccc7bda8a4..e03841d9f30b2254f41572d6bceaf4945afa7cf3 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -57,10 +57,13 @@
  #endif
  
  #define EXT4_ERROR_INODE(inode, fmt, a...) \
-       ext4_error_inode(__func__, (inode), (fmt), ## a)
+       ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
+
+#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...)                        \
+       ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
  
  #define EXT4_ERROR_FILE(file, fmt, a...)       \
-       ext4_error_file(__func__, (file), (fmt), ## a)
+       ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
  
  /* data type for block offset of block group */
  typedef int ext4_grpblk_t;
@@ -167,13 +170,15 @@ struct mpage_da_data {
  };
  #define        EXT4_IO_UNWRITTEN       0x1
  typedef struct ext4_io_end {
-       struct list_head        list;           /* per-file finished AIO list */
+       struct list_head        list;           /* per-file finished IO list */
         struct inode            *inode;         /* file being written to */
         unsigned int            flag;           /* unwritten or not */
         struct page             *page;          /* page struct for buffer write */
         loff_t                  offset;         /* offset in the file */
         ssize_t                 size;           /* size of the extent */
         struct work_struct      work;           /* data work queue */
+       struct kiocb            *iocb;          /* iocb struct for AIO */
+       int                     result;         /* error value for AIO */
  } ext4_io_end_t;
  
  /*
@@ -460,7 +465,7 @@ struct ext4_new_group_data {
  };
  
  /*
- * Flags used by ext4_get_blocks()
+ * Flags used by ext4_map_blocks()
   */
         /* Allocate any needed blocks and/or convert an unitialized
            extent to be an initialized ext4 */
@@ -873,7 +878,6 @@ struct ext4_inode_info {
  #define EXT4_MOUNT_POSIX_ACL           0x08000 /* POSIX Access Control Lists */
  #define EXT4_MOUNT_NO_AUTO_DA_ALLOC    0x10000 /* No auto delalloc mapping */
  #define EXT4_MOUNT_BARRIER             0x20000 /* Use block barriers */
-#define EXT4_MOUNT_NOBH                        0x40000 /* No bufferheads */
  #define EXT4_MOUNT_QUOTA               0x80000 /* Some quota option set */
  #define EXT4_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
  #define EXT4_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
@@ -982,7 +986,7 @@ struct ext4_super_block {
         __le32  s_last_orphan;          /* start of list of inodes to delete */
         __le32  s_hash_seed[4];         /* HTREE hash seed */
         __u8    s_def_hash_version;     /* Default hash version to use */
-       __u8    s_reserved_char_pad;
+       __u8    s_jnl_backup_type;
         __le16  s_desc_size;            /* size of group descriptor */
  /*100*/        __le32  s_default_mount_opts;
         __le32  s_first_meta_bg;        /* First metablock block group */
@@ -1000,12 +1004,34 @@ struct ext4_super_block {
         __le64  s_mmp_block;            /* Block for multi-mount protection */
         __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
         __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
-       __u8    s_reserved_char_pad2;
+       __u8    s_reserved_char_pad;
         __le16  s_reserved_pad;
         __le64  s_kbytes_written;       /* nr of lifetime kilobytes written */
-       __u32   s_reserved[160];        /* Padding to the end of the block */
+       __le32  s_snapshot_inum;        /* Inode number of active snapshot */
+       __le32  s_snapshot_id;          /* sequential ID of active snapshot */
+       __le64  s_snapshot_r_blocks_count; /* reserved blocks for active
+                                             snapshot's future use */
+       __le32  s_snapshot_list;        /* inode number of the head of the
+                                          on-disk snapshot list */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+       __le32  s_error_count;          /* number of fs errors */
+       __le32  s_first_error_time;     /* first time an error happened */
+       __le32  s_first_error_ino;      /* inode involved in first error */
+       __le64  s_first_error_block;    /* block involved of first error */
+       __u8    s_first_error_func[32]; /* function where the error happened */
+       __le32  s_first_error_line;     /* line number where error happened */
+       __le32  s_last_error_time;      /* most recent time of an error */
+       __le32  s_last_error_ino;       /* inode involved in last error */
+       __le32  s_last_error_line;      /* line number where error happened */
+       __le64  s_last_error_block;     /* block involved of last error */
+       __u8    s_last_error_func[32];  /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+       __u8    s_mount_opts[64];
+       __le32  s_reserved[112];        /* Padding to the end of the block */
  };
  
+#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
+
  #ifdef __KERNEL__
  
  /*
@@ -1143,6 +1169,9 @@ struct ext4_sb_info {
  
         /* workqueue for dio unwritten */
         struct workqueue_struct *dio_unwritten_wq;
+
+       /* timer for periodic error stats printing */
+       struct timer_list s_err_report;
  };
  
  static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1313,6 +1342,10 @@ EXT4_INODE_BIT_FNS(state, state_flags)
  #define EXT4_DEFM_JMODE_DATA   0x0020
  #define EXT4_DEFM_JMODE_ORDERED        0x0040
  #define EXT4_DEFM_JMODE_WBACK  0x0060
+#define EXT4_DEFM_NOBARRIER    0x0100
+#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
+#define EXT4_DEFM_DISCARD      0x0400
+#define EXT4_DEFM_NODELALLOC   0x0800
  
  /*
   * Default journal batch times
@@ -1378,6 +1411,43 @@ struct ext4_dir_entry_2 {
                                          ~EXT4_DIR_ROUND)
  #define EXT4_MAX_REC_LEN               ((1<<16)-1)
  
+/*
+ * If we ever get support for fs block sizes > page_size, we'll need
+ * to remove the #if statements in the next two functions...
+ */
+static inline unsigned int
+ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+       unsigned len = le16_to_cpu(dlen);
+
+#if (PAGE_CACHE_SIZE >= 65536)
+       if (len == EXT4_MAX_REC_LEN || len == 0)
+               return blocksize;
+       return (len & 65532) | ((len & 3) << 16);
+#else
+       return len;
+#endif
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+       if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+               BUG();
+#if (PAGE_CACHE_SIZE >= 65536)
+       if (len < 65536)
+               return cpu_to_le16(len);
+       if (len == blocksize) {
+               if (blocksize == 65536)
+                       return cpu_to_le16(EXT4_MAX_REC_LEN);
+               else
+                       return cpu_to_le16(0);
+       }
+       return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+#else
+       return cpu_to_le16(len);
+#endif
+}
+
  /*
   * Hash Tree Directory indexing
   * (c) Daniel Phillips, 2001
@@ -1510,9 +1580,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
                 ext4_init_block_bitmap(sb, NULL, group, desc)
  
  /* dir.c */
-extern int ext4_check_dir_entry(const char *, struct inode *,
-                               struct ext4_dir_entry_2 *,
-                               struct buffer_head *, unsigned int);
+extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
+                                 struct ext4_dir_entry_2 *,
+                                 struct buffer_head *, unsigned int);
+#define ext4_check_dir_entry(dir, de, bh, offset) \
+       __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
  extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
                                     __u32 minor_hash,
                                     struct ext4_dir_entry_2 *dirent);
@@ -1601,8 +1673,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
  extern int ext4_ext_migrate(struct inode *);
  
  /* namei.c */
-extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
-extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
  extern int ext4_orphan_add(handle_t *, struct inode *);
  extern int ext4_orphan_del(handle_t *, struct inode *);
  extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -1616,25 +1686,38 @@ extern int ext4_group_extend(struct super_block *sb,
                                 ext4_fsblk_t n_blocks_count);
  
  /* super.c */
-extern void __ext4_error(struct super_block *, const char *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-#define ext4_error(sb, message...)     __ext4_error(sb, __func__, ## message)
-extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-extern void ext4_error_file(const char *, struct file *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_std_error(struct super_block *, const char *, int);
-extern void ext4_abort(struct super_block *, const char *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_warning(struct super_block *, const char *,
+extern void __ext4_error(struct super_block *, const char *, unsigned int,
+                        const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
+#define ext4_error(sb, message...)     __ext4_error(sb, __func__,      \
+                                                    __LINE__, ## message)
+extern void ext4_error_inode(struct inode *, const char *, unsigned int,
+                            ext4_fsblk_t, const char *, ...)
+       __attribute__ ((format (printf, 5, 6)));
+extern void ext4_error_file(struct file *, const char *, unsigned int,
+                           const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_std_error(struct super_block *, const char *,
+                            unsigned int, int);
+extern void __ext4_abort(struct super_block *, const char *, unsigned int,
+                      const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
+#define ext4_abort(sb, message...)     __ext4_abort(sb, __func__, \
+                                                      __LINE__, ## message)
+extern void __ext4_warning(struct super_block *, const char *, unsigned int,
                           const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-#define ext4_warning(sb, message...)   __ext4_warning(sb, __func__, ## message)
+       __attribute__ ((format (printf, 4, 5)));
+#define ext4_warning(sb, message...)   __ext4_warning(sb, __func__, \
+                                                      __LINE__, ## message)
  extern void ext4_msg(struct super_block *, const char *, const char *, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
-                               const char *, const char *, ...)
-       __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_grp_locked_error(const char *, unsigned int, \
+                                   struct super_block *, ext4_group_t, \
+                                   unsigned long, ext4_fsblk_t, \
+                                   const char *, ...)
+       __attribute__ ((format (printf, 7, 8)));
+#define ext4_grp_locked_error(sb, grp, message...) \
+       __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
  extern void ext4_update_dynamic_rev(struct super_block *sb);
  extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
                                         __u32 compat);
@@ -1768,7 +1851,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
  #define ext4_std_error(sb, errno)                              \
  do {                                                           \
         if ((errno))                                            \
-               __ext4_std_error((sb), __func__, (errno));      \
+               __ext4_std_error((sb), __func__, __LINE__, (errno));    \
  } while (0)
  
  #ifdef CONFIG_SMP
@@ -1860,6 +1943,12 @@ static inline void ext4_unlock_group(struct super_block *sb,
         spin_unlock(ext4_group_lock_ptr(sb, group));
  }
  
+static inline void ext4_mark_super_dirty(struct super_block *sb)
+{
+       if (EXT4_SB(sb)->s_journal == NULL)
+               sb->s_dirt =1;
+}
+
  /*
   * Inodes and files operations
   */
@@ -1905,9 +1994,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                           ssize_t len);
  extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
                            struct ext4_map_blocks *map, int flags);
-extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
-                          sector_t block, unsigned int max_blocks,
-                          struct buffer_head *bh, int flags);
  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         __u64 start, __u64 len);
  /* move_extent.c */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c

index 53d2764d71caee518c5a3639c65d5708456ed25c..6e272ef6ba96c4938cc357ef498c760bde67d39e 100644 (file)
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,29 +6,29 @@
  
  #include <trace/events/ext4.h>
  
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
-                               struct buffer_head *bh)
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+                                  handle_t *handle, struct buffer_head *bh)
  {
         int err = 0;
  
         if (ext4_handle_valid(handle)) {
                 err = jbd2_journal_get_undo_access(handle, bh);
                 if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
+                       ext4_journal_abort_handle(where, line, __func__, bh,
                                                   handle, err);
         }
         return err;
  }
  
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
-                               struct buffer_head *bh)
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+                                   handle_t *handle, struct buffer_head *bh)
  {
         int err = 0;
  
         if (ext4_handle_valid(handle)) {
                 err = jbd2_journal_get_write_access(handle, bh);
                 if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
+                       ext4_journal_abort_handle(where, line, __func__, bh,
                                                   handle, err);
         }
         return err;
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
   * If the handle isn't valid we're not journaling, but we still need to
   * call into ext4_journal_revoke() to put the buffer head.
   */
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
-                 struct inode *inode, struct buffer_head *bh,
-                 ext4_fsblk_t blocknr)
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+                 int is_metadata, struct inode *inode,
+                 struct buffer_head *bh, ext4_fsblk_t blocknr)
  {
         int err;
  
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
                         BUFFER_TRACE(bh, "call jbd2_journal_forget");
                         err = jbd2_journal_forget(handle, bh);
                         if (err)
-                               ext4_journal_abort_handle(where, __func__, bh,
-                                                         handle, err);
+                               ext4_journal_abort_handle(where, line, __func__,
+                                                         bh, handle, err);
                         return err;
                 }
                 return 0;
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
         BUFFER_TRACE(bh, "call jbd2_journal_revoke");
         err = jbd2_journal_revoke(handle, blocknr, bh);
         if (err) {
-               ext4_journal_abort_handle(where, __func__, bh, handle, err);
-               ext4_abort(inode->i_sb, __func__,
+               ext4_journal_abort_handle(where, line, __func__,
+                                         bh, handle, err);
+               __ext4_abort(inode->i_sb, where, line,
                            "error %d when attempting revoke", err);
         }
         BUFFER_TRACE(bh, "exit");
         return err;
  }
  
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
                                 handle_t *handle, struct buffer_head *bh)
  {
         int err = 0;
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where,
         if (ext4_handle_valid(handle)) {
                 err = jbd2_journal_get_create_access(handle, bh);
                 if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
-                                                 handle, err);
+                       ext4_journal_abort_handle(where, line, __func__,
+                                                 bh, handle, err);
         }
         return err;
  }
  
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
-                                struct inode *inode, struct buffer_head *bh)
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+                                handle_t *handle, struct inode *inode,
+                                struct buffer_head *bh)
  {
         int err = 0;
  
         if (ext4_handle_valid(handle)) {
                 err = jbd2_journal_dirty_metadata(handle, bh);
                 if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
-                                                 handle, err);
+                       ext4_journal_abort_handle(where, line, __func__,
+                                                 bh, handle, err);
         } else {
                 if (inode)
                         mark_buffer_dirty_inode(bh, inode);
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
                 if (inode && inode_needs_sync(inode)) {
                         sync_dirty_buffer(bh);
                         if (buffer_req(bh) && !buffer_uptodate(bh)) {
-                               ext4_error(inode->i_sb,
-                                          "IO error syncing inode, "
-                                          "inode=%lu, block=%llu",
-                                          inode->i_ino,
-                                          (unsigned long long) bh->b_blocknr);
+                               struct ext4_super_block *es;
+
+                               es = EXT4_SB(inode->i_sb)->s_es;
+                               es->s_last_error_block =
+                                       cpu_to_le64(bh->b_blocknr);
+                               ext4_error_inode(inode, where, line,
+                                                bh->b_blocknr,
+                                       "IO error syncing itable block");
                                 err = -EIO;
                         }
                 }
         }
         return err;
  }
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+                             handle_t *handle, struct super_block *sb)
+{
+       struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
+       int err = 0;
+
+       if (ext4_handle_valid(handle)) {
+               err = jbd2_journal_dirty_metadata(handle, bh);
+               if (err)
+                       ext4_journal_abort_handle(where, line, __func__,
+                                                 bh, handle, err);
+       } else
+               sb->s_dirt = 1;
+       return err;
+}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h

index dade0c024797f7fed6082ea4dff6de927d698afd..b0bd792c58c5f075bf5853ad19738fc78c98f959 100644 (file)
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
  /*
   * Wrapper functions with which ext4 calls into JBD.
   */
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+                              const char *err_fn,
                 struct buffer_head *bh, handle_t *handle, int err);
  
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
-                               struct buffer_head *bh);
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+                                  handle_t *handle, struct buffer_head *bh);
  
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
-                               struct buffer_head *bh);
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+                                   handle_t *handle, struct buffer_head *bh);
  
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
-                 struct inode *inode, struct buffer_head *bh,
-                 ext4_fsblk_t blocknr);
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+                 int is_metadata, struct inode *inode,
+                 struct buffer_head *bh, ext4_fsblk_t blocknr);
  
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
                                 handle_t *handle, struct buffer_head *bh);
  
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
-                                struct inode *inode, struct buffer_head *bh);
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+                                handle_t *handle, struct inode *inode,
+                                struct buffer_head *bh);
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+                             handle_t *handle, struct super_block *sb);
  
  #define ext4_journal_get_undo_access(handle, bh) \
-       __ext4_journal_get_undo_access(__func__, (handle), (bh))
+       __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
  #define ext4_journal_get_write_access(handle, bh) \
-       __ext4_journal_get_write_access(__func__, (handle), (bh))
+       __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
  #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
-       __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
-                     (block_nr))
+       __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
+                     (bh), (block_nr))
  #define ext4_journal_get_create_access(handle, bh) \
-       __ext4_journal_get_create_access(__func__, (handle), (bh))
+       __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
  #define ext4_handle_dirty_metadata(handle, inode, bh) \
-       __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
+       __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
+                                    (bh))
+#define ext4_handle_dirty_super(handle, sb) \
+       __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
  
  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext4_journal_stop(const char *where, handle_t *handle);
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
  
  #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
  
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
  }
  
  #define ext4_journal_stop(handle) \
-       __ext4_journal_stop(__func__, (handle))
+       __ext4_journal_stop(__func__, __LINE__, (handle))
  
  static inline handle_t *ext4_journal_current_handle(void)
  {
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode)
   * This function controls whether or not we should try to go down the
   * dioread_nolock code paths, which makes it safe to avoid taking
   * i_mutex for direct I/O reads.  This only works for extent-based
- * files, and it doesn't work for nobh or if data journaling is
- * enabled, since the dioread_nolock code uses b_private to pass
- * information back to the I/O completion handler, and this conflicts
- * with the jbd's use of b_private.
+ * files, and it doesn't work if data journaling is enabled, since the
+ * dioread_nolock code uses b_private to pass information back to the
+ * I/O completion handler, and this conflicts with the jbd's use of
+ * b_private.
   */
  static inline int ext4_should_dioread_nolock(struct inode *inode)
  {
         if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
                 return 0;
-       if (test_opt(inode->i_sb, NOBH))
-               return 0;
         if (!S_ISREG(inode->i_mode))
                 return 0;
         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index bf029c7d5518d1352d276fe4d26206be3d3007be..06328d3e5717fd368ce7872fe0aa3db879cf6817 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -401,9 +401,9 @@ static int ext4_valid_extent_entries(struct inode *inode,
         return 1;
  }
  
-static int __ext4_ext_check(const char *function, struct inode *inode,
-                                       struct ext4_extent_header *eh,
-                                       int depth)
+static int __ext4_ext_check(const char *function, unsigned int line,
+                           struct inode *inode, struct ext4_extent_header *eh,
+                           int depth)
  {
         const char *error_msg;
         int max = 0;
@@ -436,7 +436,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
         return 0;
  
  corrupted:
-       ext4_error_inode(function, inode,
+       ext4_error_inode(inode, function, line, 0,
                         "bad header/extent: %s - magic %x, "
                         "entries %u, max %u(%u), depth %u(%u)",
                         error_msg, le16_to_cpu(eh->eh_magic),
@@ -447,7 +447,7 @@ corrupted:
  }
  
  #define ext4_ext_check(inode, eh, depth)       \
-       __ext4_ext_check(__func__, inode, eh, depth)
+       __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
  
  int ext4_ext_check_inode(struct inode *inode)
  {
@@ -1083,7 +1083,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
  {
         struct ext4_ext_path *curp = path;
         struct ext4_extent_header *neh;
-       struct ext4_extent_idx *fidx;
         struct buffer_head *bh;
         ext4_fsblk_t newblock;
         int err = 0;
@@ -1144,10 +1143,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
         ext4_idx_store_pblock(curp->p_idx, newblock);
  
         neh = ext_inode_hdr(inode);
-       fidx = EXT_FIRST_INDEX(neh);
         ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
                   le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
-                 le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
+                 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+                 idx_pblock(EXT_FIRST_INDEX(neh)));
  
         neh->eh_depth = cpu_to_le16(path->p_depth + 1);
         err = ext4_ext_dirty(handle, inode, curp);
@@ -2954,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
         struct ext4_extent *ex1 = NULL;
         struct ext4_extent *ex2 = NULL;
         struct ext4_extent *ex3 = NULL;
-       struct ext4_extent_header *eh;
         ext4_lblk_t ee_block, eof_block;
         unsigned int allocated, ee_len, depth;
         ext4_fsblk_t newblock;
@@ -2971,7 +2969,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                 eof_block = map->m_lblk + map->m_len;
  
         depth = ext_depth(inode);
-       eh = path[depth].p_hdr;
         ex = path[depth].p_ext;
         ee_block = le32_to_cpu(ex->ee_block);
         ee_len = ext4_ext_get_actual_len(ex);
@@ -3058,7 +3055,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                         err = PTR_ERR(path);
                         goto out;
                 }
-               eh = path[depth].p_hdr;
                 ex = path[depth].p_ext;
                 if (ex2 != &newex)
                         ex2 = ex;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index 5313ae4cda2d2149d58efc5f228e4973f1e8f33f..ee92b66d45589c4ec97d6b7b2e5d93d871df5320 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
                 size_t length = iov_length(iov, nr_segs);
  
-               if (pos > sbi->s_bitmap_maxbytes)
+               if ((pos > sbi->s_bitmap_maxbytes ||
+                   (pos == sbi->s_bitmap_maxbytes && length > 0)))
                         return -EFBIG;
  
                 if (pos + length > sbi->s_bitmap_maxbytes) {
@@ -123,7 +124,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                 if (!IS_ERR(cp)) {
                         memcpy(sbi->s_es->s_last_mounted, cp,
                                sizeof(sbi->s_es->s_last_mounted));
-                       sb->s_dirt = 1;
+                       ext4_mark_super_dirty(sb);
                 }
         }
         return dquot_file_open(inode, filp);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index 25c4b3173fd935f1550ce89116344a72f9f02374..ac377505ed57a2aa4b319cd868ffcd90db29dc5d 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -279,7 +279,7 @@ out:
                 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
                 if (!fatal)
                         fatal = err;
-               sb->s_dirt = 1;
+               ext4_mark_super_dirty(sb);
         } else
                 ext4_error(sb, "bit already cleared for inode %lu", ino);
  
@@ -965,7 +965,7 @@ got:
         percpu_counter_dec(&sbi->s_freeinodes_counter);
         if (S_ISDIR(mode))
                 percpu_counter_inc(&sbi->s_dirs_counter);
-       sb->s_dirt = 1;
+       ext4_mark_super_dirty(sb);
  
         if (sbi->s_log_groups_per_flex) {
                 flex_group = ext4_flex_group(sbi, group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 0afc8c1d8cf3597bf075ecb731a16accb67687ad..a0ab3754d0d61a26aa366a68b9e5704b292b8924 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -221,6 +221,7 @@ void ext4_delete_inode(struct inode *inode)
                                      "couldn't extend journal (err %d)", err);
                 stop_handle:
                         ext4_journal_stop(handle);
+                       ext4_orphan_del(NULL, inode);
                         goto no_delete;
                 }
         }
@@ -337,9 +338,11 @@ static int ext4_block_to_path(struct inode *inode,
         return n;
  }
  
-static int __ext4_check_blockref(const char *function, struct inode *inode,
+static int __ext4_check_blockref(const char *function, unsigned int line,
+                                struct inode *inode,
                                  __le32 *p, unsigned int max)
  {
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
         __le32 *bref = p;
         unsigned int blk;
  
@@ -348,8 +351,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
                 if (blk &&
                     unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                     blk, 1))) {
-                       ext4_error_inode(function, inode,
-                                        "invalid block reference %u", blk);
+                       es->s_last_error_block = cpu_to_le64(blk);
+                       ext4_error_inode(inode, function, line, blk,
+                                        "invalid block");
                         return -EIO;
                 }
         }
@@ -358,11 +362,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
  
  
  #define ext4_check_indirect_blockref(inode, bh)                         \
-       __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             (__le32 *)(bh)->b_data,                   \
                               EXT4_ADDR_PER_BLOCK((inode)->i_sb))
  
  #define ext4_check_inode_blockref(inode)                                \
-       __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             EXT4_I(inode)->i_data,                    \
                               EXT4_NDIR_BLOCKS)
  
  /**
@@ -1128,20 +1134,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
                 ext4_discard_preallocations(inode);
  }
  
-static int check_block_validity(struct inode *inode, const char *func,
+static int __check_block_validity(struct inode *inode, const char *func,
+                               unsigned int line,
                                 struct ext4_map_blocks *map)
  {
         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
                                    map->m_len)) {
-               ext4_error_inode(func, inode,
-                          "lblock %lu mapped to illegal pblock %llu "
-                          "(length %d)", (unsigned long) map->m_lblk,
-                                map->m_pblk, map->m_len);
+               ext4_error_inode(inode, func, line, map->m_pblk,
+                                "lblock %lu mapped to illegal pblock "
+                                "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_len);
                 return -EIO;
         }
         return 0;
  }
  
+#define check_block_validity(inode, map)       \
+       __check_block_validity((inode), __func__, __LINE__, (map))
+
  /*
   * Return the number of contiguous dirty pages in a given inode
   * starting at page frame idx.
@@ -1244,7 +1254,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         up_read((&EXT4_I(inode)->i_data_sem));
  
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode, __func__, map);
+               int ret = check_block_validity(inode, map);
                 if (ret != 0)
                         return ret;
         }
@@ -1324,9 +1334,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
  
         up_write((&EXT4_I(inode)->i_data_sem));
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode,
-                                              "ext4_map_blocks_after_alloc",
-                                              map);
+               int ret = check_block_validity(inode, map);
                 if (ret != 0)
                         return ret;
         }
@@ -1519,9 +1527,25 @@ static int walk_page_buffers(handle_t *handle,
  static int do_journal_get_write_access(handle_t *handle,
                                        struct buffer_head *bh)
  {
+       int dirty = buffer_dirty(bh);
+       int ret;
+
         if (!buffer_mapped(bh) || buffer_freed(bh))
                 return 0;
-       return ext4_journal_get_write_access(handle, bh);
+       /*
+        * __block_prepare_write() could have dirtied some buffers. Clean
+        * the dirty bit as jbd2_journal_get_write_access() could complain
+        * otherwise about fs integrity issues. Setting of the dirty bit
+        * by __block_prepare_write() isn't a real problem here as we clear
+        * the bit before releasing a page lock and thus writeback cannot
+        * ever write the buffer.
+        */
+       if (dirty)
+               clear_buffer_dirty(bh);
+       ret = ext4_journal_get_write_access(handle, bh);
+       if (!ret && dirty)
+               ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+       return ret;
  }
  
  /*
@@ -2194,7 +2218,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         BUG_ON(!handle);
  
         /*
-        * Call ext4_get_blocks() to allocate any delayed allocation
+        * Call ext4_map_blocks() to allocate any delayed allocation
          * blocks, or to convert an uninitialized extent to be
          * initialized (in the case where we have written into
          * one or more preallocated blocks).
@@ -2203,7 +2227,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
          * indicate that we are on the delayed allocation path.  This
          * affects functions in many different parts of the allocation
          * call path.  This flag exists primarily because we don't
-        * want to change *many* call functions, so ext4_get_blocks()
+        * want to change *many* call functions, so ext4_map_blocks()
          * will set the magic i_delalloc_reserved_flag once the
          * inode's allocation semaphore is taken.
          *
@@ -2221,6 +2245,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
  
         blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
         if (blks < 0) {
+               struct super_block *sb = mpd->inode->i_sb;
+
                 err = blks;
                 /*
                  * If get block returns with error we simply
@@ -2231,7 +2257,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                         return 0;
  
                 if (err == -ENOSPC &&
-                   ext4_count_free_blocks(mpd->inode->i_sb)) {
+                   ext4_count_free_blocks(sb)) {
                         mpd->retval = err;
                         return 0;
                 }
@@ -2243,16 +2269,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                  * writepage and writepages will again try to write
                  * the same.
                  */
-               ext4_msg(mpd->inode->i_sb, KERN_CRIT,
-                        "delayed block allocation failed for inode %lu at "
-                        "logical offset %llu with max blocks %zd with "
-                        "error %d", mpd->inode->i_ino,
-                        (unsigned long long) next,
-                        mpd->b_size >> mpd->inode->i_blkbits, err);
-               printk(KERN_CRIT "This should not happen!!  "
-                      "Data will be lost\n");
-               if (err == -ENOSPC) {
-                       ext4_print_free_blocks(mpd->inode);
+               if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+                       ext4_msg(sb, KERN_CRIT,
+                                "delayed block allocation failed for inode %lu "
+                                "at logical offset %llu with max blocks %zd "
+                                "with error %d", mpd->inode->i_ino,
+                                (unsigned long long) next,
+                                mpd->b_size >> mpd->inode->i_blkbits, err);
+                       ext4_msg(sb, KERN_CRIT,
+                               "This should not happen!! Data will be lost\n");
+                       if (err == -ENOSPC)
+                               ext4_print_free_blocks(mpd->inode);
                 }
                 /* invalidate all the pages */
                 ext4_da_block_invalidatepages(mpd, next,
@@ -2320,7 +2347,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
          * XXX Don't go larger than mballoc is willing to allocate
          * This is a stopgap solution.  We eventually need to fold
          * mpage_da_submit_io() into this function and then call
-        * ext4_get_blocks() multiple times in a loop
+        * ext4_map_blocks() multiple times in a loop
          */
         if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
                 goto flush_it;
@@ -2553,18 +2580,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
  /*
   * This function is used as a standard get_block_t calback function
   * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write(), nobh_writepage(), and
- * block_write_full_page().  These functions should only try to map a
- * single block at a time.
+ * callback function for block_prepare_write() and block_write_full_page().
+ * These functions should only try to map a single block at a time.
   *
   * Since this function doesn't do block allocations even if the caller
   * requests it by passing in create=1, it is critically important that
   * any caller checks to make sure that any buffer heads are returned
   * by this function are either all already mapped or marked for
- * delayed allocation before calling nobh_writepage() or
- * block_write_full_page().  Otherwise, b_blocknr could be left
- * unitialized, and the page write functions will be taken by
- * surprise.
+ * delayed allocation before calling  block_write_full_page().  Otherwise,
+ * b_blocknr could be left unitialized, and the page write functions will
+ * be taken by surprise.
   */
  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
@@ -2749,9 +2774,7 @@ static int ext4_writepage(struct page *page,
                 return __ext4_journalled_writepage(page, len);
         }
  
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, noalloc_get_block_write, wbc);
-       else if (page_bufs && buffer_uninit(page_bufs)) {
+       if (page_bufs && buffer_uninit(page_bufs)) {
                 ext4_set_bh_endio(page_bufs, inode);
                 ret = block_write_full_page_endio(page, noalloc_get_block_write,
                                             wbc, ext4_end_io_buffer_write);
@@ -3146,13 +3169,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
         int ret, retries = 0;
         struct page *page;
         pgoff_t index;
-       unsigned from, to;
         struct inode *inode = mapping->host;
         handle_t *handle;
  
         index = pos >> PAGE_CACHE_SHIFT;
-       from = pos & (PAGE_CACHE_SIZE - 1);
-       to = from + len;
  
         if (ext4_nonda_switch(inode->i_sb)) {
                 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3668,6 +3688,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
                 return ret;
         }
  
+       if (io->iocb)
+               aio_complete(io->iocb, io->result, 0);
         /* clear the DIO AIO unwritten flag */
         io->flag = 0;
         return ret;
@@ -3767,6 +3789,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
                 io->offset = 0;
                 io->size = 0;
                 io->page = NULL;
+               io->iocb = NULL;
+               io->result = 0;
                 INIT_WORK(&io->work, ext4_end_io_work);
                 INIT_LIST_HEAD(&io->list);
         }
@@ -3796,12 +3820,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         if (io_end->flag != EXT4_IO_UNWRITTEN){
                 ext4_free_io_end(io_end);
                 iocb->private = NULL;
-               goto out;
+out:
+               if (is_async)
+                       aio_complete(iocb, ret, 0);
+               return;
         }
  
         io_end->offset = offset;
         io_end->size = size;
-       io_end->flag = EXT4_IO_UNWRITTEN;
+       if (is_async) {
+               io_end->iocb = iocb;
+               io_end->result = ret;
+       }
         wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
  
         /* queue the work to convert unwritten extents to written */
@@ -3813,9 +3843,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         list_add_tail(&io_end->list, &ei->i_completed_io_list);
         spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
         iocb->private = NULL;
-out:
-       if (is_async)
-               aio_complete(iocb, ret, 0);
  }
  
  static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3941,7 +3968,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                                 return -ENOMEM;
                         /*
                          * we save the io structure for current async
-                        * direct IO, so that later ext4_get_blocks()
+                        * direct IO, so that later ext4_map_blocks()
                          * could flag the io structure whether there
                          * is a unwritten extents needs to be converted
                          * when IO is completed.
@@ -4132,17 +4159,6 @@ int ext4_block_truncate_page(handle_t *handle,
         length = blocksize - (offset & (blocksize - 1));
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
-       /*
-        * For "nobh" option,  we can only work if we don't need to
-        * read-in the page - otherwise we create buffers to do the IO.
-        */
-       if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-            ext4_should_writeback_data(inode) && PageUptodate(page)) {
-               zero_user(page, offset, length);
-               set_page_dirty(page);
-               goto unlock;
-       }
-
         if (!page_has_buffers(page))
                 create_empty_buffers(page, blocksize, 0);
  
@@ -4492,9 +4508,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                          * (should be rare).
                          */
                         if (!bh) {
-                               EXT4_ERROR_INODE(inode,
-                                                "Read failure block=%llu",
-                                                (unsigned long long) nr);
+                               EXT4_ERROR_INODE_BLOCK(inode, nr,
+                                                      "Read failure");
                                 continue;
                         }
  
@@ -4505,27 +4520,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                         (__le32 *) bh->b_data + addr_per_block,
                                         depth);
  
-                       /*
-                        * We've probably journalled the indirect block several
-                        * times during the truncate.  But it's no longer
-                        * needed and we now drop it from the transaction via
-                        * jbd2_journal_revoke().
-                        *
-                        * That's easy if it's exclusively part of this
-                        * transaction.  But if it's part of the committing
-                        * transaction then jbd2_journal_forget() will simply
-                        * brelse() it.  That means that if the underlying
-                        * block is reallocated in ext4_get_block(),
-                        * unmap_underlying_metadata() will find this block
-                        * and will try to get rid of it.  damn, damn.
-                        *
-                        * If this block has already been committed to the
-                        * journal, a revoke record will be written.  And
-                        * revoke records must be emitted *before* clearing
-                        * this block's bit in the bitmaps.
-                        */
-                       ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
-
                         /*
                          * Everything below this this pointer has been
                          * released.  Now let this top-of-subtree go.
@@ -4550,8 +4544,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                             blocks_for_truncate(inode));
                         }
  
+                       /*
+                        * The forget flag here is critical because if
+                        * we are journaling (and not doing data
+                        * journaling), we have to make sure a revoke
+                        * record is written to prevent the journal
+                        * replay from overwriting the (former)
+                        * indirect block if it gets reallocated as a
+                        * data block.  This must happen in the same
+                        * transaction where the data blocks are
+                        * actually freed.
+                        */
                         ext4_free_blocks(handle, inode, 0, nr, 1,
-                                        EXT4_FREE_BLOCKS_METADATA);
+                                        EXT4_FREE_BLOCKS_METADATA|
+                                        EXT4_FREE_BLOCKS_FORGET);
  
                         if (parent_bh) {
                                 /*
@@ -4809,8 +4815,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
  
         bh = sb_getblk(sb, block);
         if (!bh) {
-               EXT4_ERROR_INODE(inode, "unable to read inode block - "
-                                "block %llu", block);
+               EXT4_ERROR_INODE_BLOCK(inode, block,
+                                      "unable to read itable block");
                 return -EIO;
         }
         if (!buffer_uptodate(bh)) {
@@ -4908,8 +4914,8 @@ make_io:
                 submit_bh(READ_META, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
-                       EXT4_ERROR_INODE(inode, "unable to read inode "
-                                        "block %llu", block);
+                       EXT4_ERROR_INODE_BLOCK(inode, block,
+                                              "unable to read itable block");
                         brelse(bh);
                         return -EIO;
                 }
@@ -4980,7 +4986,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
                 /* we are using combined 48 bit field */
                 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
                                         le32_to_cpu(raw_inode->i_blocks_lo);
-               if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+               if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
                         /* i_blocks represent file system block size */
                         return i_blocks  << (inode->i_blkbits - 9);
                 } else {
@@ -5076,7 +5082,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 transaction_t *transaction;
                 tid_t tid;
  
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
                 if (journal->j_running_transaction)
                         transaction = journal->j_running_transaction;
                 else
@@ -5085,7 +5091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                         tid = transaction->t_tid;
                 else
                         tid = journal->j_commit_sequence;
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 ei->i_sync_tid = tid;
                 ei->i_datasync_tid = tid;
         }
@@ -5130,7 +5136,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                                  ei->i_file_acl);
                 ret = -EIO;
                 goto bad_inode;
-       } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+       } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                     (S_ISLNK(inode->i_mode) &&
                      !ext4_inode_is_fast_symlink(inode)))
@@ -5410,9 +5416,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
                 if (wbc->sync_mode == WB_SYNC_ALL)
                         sync_dirty_buffer(iloc.bh);
                 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       EXT4_ERROR_INODE(inode,
-                               "IO error syncing inode (block=%llu)",
-                               (unsigned long long) iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+                                        "IO error syncing inode");
                         err = -EIO;
                 }
                 brelse(iloc.bh);
@@ -5487,10 +5492,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  
-                       if (attr->ia_size > sbi->s_bitmap_maxbytes) {
-                               error = -EFBIG;
-                               goto err_out;
-                       }
+                       if (attr->ia_size > sbi->s_bitmap_maxbytes)
+                               return -EFBIG;
                 }
         }
  
@@ -5692,7 +5695,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
   * Calculate the journal credits for a chunk of data modification.
   *
   * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
+ * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
   *
   * journal buffers for data blocks are not included here, as DIO
   * and fallocate do no need to journal data buffers.
@@ -5758,7 +5761,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
  {
         struct ext4_inode *raw_inode;
         struct ext4_xattr_ibody_header *header;
-       struct ext4_xattr_entry *entry;
  
         if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
                 return 0;
@@ -5766,7 +5768,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
         raw_inode = ext4_raw_inode(&iloc);
  
         header = IHDR(inode, raw_inode);
-       entry = IFIRST(header);
  
         /* No extended attributes present */
         if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index 0e83dfd351d516d30aa37a9fc44f3ade471ec670..4b4ad4b7ce57ccb9ed8e2c773865323f1c0c646a 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -446,10 +446,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                         blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
                         blocknr += first + i;
                         ext4_grp_locked_error(sb, e4b->bd_group,
-                                  __func__, "double-free of inode"
-                                  " %lu's block %llu(bit %u in group %u)",
-                                  inode ? inode->i_ino : 0, blocknr,
-                                  first + i, e4b->bd_group);
+                                             inode ? inode->i_ino : 0,
+                                             blocknr,
+                                             "freeing block already freed "
+                                             "(bit %u)",
+                                             first + i);
                 }
                 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
         }
@@ -712,9 +713,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
         grp->bb_fragments = fragments;
  
         if (free != grp->bb_free) {
-               ext4_grp_locked_error(sb, group,  __func__,
-                       "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
-                       group, free, grp->bb_free);
+               ext4_grp_locked_error(sb, group, 0, 0,
+                                     "%u blocks in bitmap, %u in gd",
+                                     free, grp->bb_free);
                 /*
                  * If we intent to continue, we consider group descritor
                  * corrupt and update bb_free using bitmap value
@@ -1296,10 +1297,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                         blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
                         blocknr += block;
                         ext4_grp_locked_error(sb, e4b->bd_group,
-                                  __func__, "double-free of inode"
-                                  " %lu's block %llu(bit %u in group %u)",
-                                  inode ? inode->i_ino : 0, blocknr, block,
-                                  e4b->bd_group);
+                                             inode ? inode->i_ino : 0,
+                                             blocknr,
+                                             "freeing already freed block "
+                                             "(bit %u)", block);
                 }
                 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
                 e4b->bd_info->bb_counters[order]++;
@@ -1788,8 +1789,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                          * free blocks even though group info says we
                          * we have free blocks
                          */
-                       ext4_grp_locked_error(sb, e4b->bd_group,
-                                       __func__, "%d free blocks as per "
+                       ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+                                       "%d free blocks as per "
                                         "group info. But bitmap says 0",
                                         free);
                         break;
@@ -1798,8 +1799,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
                 BUG_ON(ex.fe_len <= 0);
                 if (free < ex.fe_len) {
-                       ext4_grp_locked_error(sb, e4b->bd_group,
-                                       __func__, "%d free blocks as per "
+                       ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+                                       "%d free blocks as per "
                                         "group info. But got %d blocks",
                                         free, ex.fe_len);
                         /*
@@ -1821,8 +1822,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
  
  /*
   * This is a special case for storages like raid5
- * we try to find stripe-aligned chunks for stripe-size requests
- * XXX should do so at least for multiples of stripe size as well
+ * we try to find stripe-aligned chunks for stripe-size-multiple requests
   */
  static noinline_for_stack
  void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
@@ -1999,7 +1999,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
         ext4_group_t ngroups, group, i;
         int cr;
         int err = 0;
-       int bsbits;
         struct ext4_sb_info *sbi;
         struct super_block *sb;
         struct ext4_buddy e4b;
@@ -2041,8 +2040,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
                         ac->ac_2order = i - 1;
         }
  
-       bsbits = ac->ac_sb->s_blocksize_bits;
-
         /* if stream allocation is enabled, use global goal */
         if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
                 /* TBD: may be hot point */
@@ -2094,8 +2091,8 @@ repeat:
                         ac->ac_groups_scanned++;
                         if (cr == 0)
                                 ext4_mb_simple_scan_group(ac, &e4b);
-                       else if (cr == 1 &&
-                                       ac->ac_g_ex.fe_len == sbi->s_stripe)
+                       else if (cr == 1 && sbi->s_stripe &&
+                                       !(ac->ac_g_ex.fe_len % sbi->s_stripe))
                                 ext4_mb_scan_aligned(ac, &e4b);
                         else
                                 ext4_mb_complex_scan_group(ac, &e4b);
@@ -2221,7 +2218,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
  
         rc = seq_open(file, &ext4_mb_seq_groups_ops);
         if (rc == 0) {
-               struct seq_file *m = (struct seq_file *)file->private_data;
+               struct seq_file *m = file->private_data;
                 m->private = sb;
         }
         return rc;
@@ -2560,6 +2557,22 @@ int ext4_mb_release(struct super_block *sb)
         return 0;
  }
  
+static inline void ext4_issue_discard(struct super_block *sb,
+               ext4_group_t block_group, ext4_grpblk_t block, int count)
+{
+       int ret;
+       ext4_fsblk_t discard_block;
+
+       discard_block = block + ext4_group_first_block_no(sb, block_group);
+       trace_ext4_discard_blocks(sb,
+                       (unsigned long long) discard_block, count);
+       ret = sb_issue_discard(sb, discard_block, count);
+       if (ret == EOPNOTSUPP) {
+               ext4_warning(sb, "discard not supported, disabling");
+               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+       }
+}
+
  /*
   * This function is called by the jbd2 layer once the commit has finished,
   * so we know we can free the blocks that were released with that commit.
@@ -2579,22 +2592,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
                          entry->count, entry->group, entry);
  
-               if (test_opt(sb, DISCARD)) {
-                       int ret;
-                       ext4_fsblk_t discard_block;
-
-                       discard_block = entry->start_blk +
-                               ext4_group_first_block_no(sb, entry->group);
-                       trace_ext4_discard_blocks(sb,
-                                       (unsigned long long)discard_block,
-                                       entry->count);
-                       ret = sb_issue_discard(sb, discard_block, entry->count);
-                       if (ret == EOPNOTSUPP) {
-                               ext4_warning(sb,
-                                       "discard not supported, disabling");
-                               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
-                       }
-               }
+               if (test_opt(sb, DISCARD))
+                       ext4_issue_discard(sb, entry->group,
+                                       entry->start_blk, entry->count);
  
                 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
                 /* we expect to find existing buddy because it's pinned */
@@ -2712,7 +2712,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                 handle_t *handle, unsigned int reserv_blks)
  {
         struct buffer_head *bitmap_bh = NULL;
-       struct ext4_super_block *es;
         struct ext4_group_desc *gdp;
         struct buffer_head *gdp_bh;
         struct ext4_sb_info *sbi;
@@ -2725,8 +2724,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
  
         sb = ac->ac_sb;
         sbi = EXT4_SB(sb);
-       es = sbi->s_es;
-
  
         err = -EIO;
         bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2812,7 +2809,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
         err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
  
  out_err:
-       sb->s_dirt = 1;
+       ext4_mark_super_dirty(sb);
         brelse(bitmap_bh);
         return err;
  }
@@ -2850,7 +2847,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
         int bsbits, max;
         ext4_lblk_t end;
         loff_t size, orig_size, start_off;
-       ext4_lblk_t start, orig_start;
+       ext4_lblk_t start;
         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
         struct ext4_prealloc_space *pa;
  
@@ -2881,6 +2878,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
         size = size << bsbits;
         if (size < i_size_read(ac->ac_inode))
                 size = i_size_read(ac->ac_inode);
+       orig_size = size;
  
         /* max size of free chunks */
         max = 2 << bsbits;
@@ -2922,8 +2920,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
                 size      = ac->ac_o_ex.fe_len << bsbits;
         }
-       orig_size = size = size >> bsbits;
-       orig_start = start = start_off >> bsbits;
+       size = size >> bsbits;
+       start = start_off >> bsbits;
  
         /* don't cover already allocated blocks in selected range */
         if (ar->pleft && start <= ar->lleft) {
@@ -3547,7 +3545,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
         ext4_group_t group;
         ext4_grpblk_t bit;
         unsigned long long grp_blk_start;
-       sector_t start;
         int err = 0;
         int free = 0;
  
@@ -3567,10 +3564,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                 if (bit >= end)
                         break;
                 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
-               start = ext4_group_first_block_no(sb, group) + bit;
                 mb_debug(1, "    free preallocated %u/%u in group %u\n",
-                               (unsigned) start, (unsigned) next - bit,
-                               (unsigned) group);
+                        (unsigned) ext4_group_first_block_no(sb, group) + bit,
+                        (unsigned) next - bit, (unsigned) group);
                 free += next - bit;
  
                 if (ac) {
@@ -3581,7 +3577,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                         trace_ext4_mballoc_discard(ac);
                 }
  
-               trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
+               trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
                                                next - bit);
                 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                 bit = next + 1;
@@ -3591,8 +3587,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                         pa, (unsigned long) pa->pa_lstart,
                         (unsigned long) pa->pa_pstart,
                         (unsigned long) pa->pa_len);
-               ext4_grp_locked_error(sb, group,
-                                       __func__, "free %u, pa_free %u",
+               ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
                                         free, pa->pa_free);
                 /*
                  * pa is already deleted so we use the value obtained
@@ -3613,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
         ext4_group_t group;
         ext4_grpblk_t bit;
  
-       trace_ext4_mb_release_group_pa(ac, pa);
+       trace_ext4_mb_release_group_pa(sb, ac, pa);
         BUG_ON(pa->pa_deleted == 0);
         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
         struct super_block *sb = ac->ac_sb;
         ext4_group_t ngroups, i;
  
+       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+               return;
+
         printk(KERN_ERR "EXT4-fs: Can't allocate:"
                         " Allocation context details:\n");
         printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
@@ -4255,7 +4253,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
   * to usual allocation
   */
  ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
-                                struct ext4_allocation_request *ar, int *errp)
+                               struct ext4_allocation_request *ar, int *errp)
  {
         int freed;
         struct ext4_allocation_context *ac = NULL;
@@ -4299,7 +4297,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                 inquota = ar->len;
                 if (ar->len == 0) {
                         *errp = -EDQUOT;
-                       goto out3;
+                       goto out;
                 }
         }
  
@@ -4307,13 +4305,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
         if (!ac) {
                 ar->len = 0;
                 *errp = -ENOMEM;
-               goto out1;
+               goto out;
         }
  
         *errp = ext4_mb_initialize_context(ac, ar);
         if (*errp) {
                 ar->len = 0;
-               goto out2;
+               goto out;
         }
  
         ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
@@ -4322,7 +4320,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                 ext4_mb_normalize_request(ac, ar);
  repeat:
                 /* allocate space in core */
-               ext4_mb_regular_allocator(ac);
+               *errp = ext4_mb_regular_allocator(ac);
+               if (*errp)
+                       goto errout;
  
                 /* as we've just preallocated more space than
                  * user requested orinally, we store allocated
@@ -4333,7 +4333,7 @@ repeat:
         }
         if (likely(ac->ac_status == AC_STATUS_FOUND)) {
                 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
-               if (*errp ==  -EAGAIN) {
+               if (*errp == -EAGAIN) {
                         /*
                          * drop the reference that we took
                          * in ext4_mb_use_best_found
@@ -4344,12 +4344,10 @@ repeat:
                         ac->ac_b_ex.fe_len = 0;
                         ac->ac_status = AC_STATUS_CONTINUE;
                         goto repeat;
-               } else if (*errp) {
+               } else if (*errp)
+               errout:
                         ext4_discard_allocated_blocks(ac);
-                       ac->ac_b_ex.fe_len = 0;
-                       ar->len = 0;
-                       ext4_mb_show_ac(ac);
-               } else {
+               else {
                         block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
                         ar->len = ac->ac_b_ex.fe_len;
                 }
@@ -4358,19 +4356,19 @@ repeat:
                 if (freed)
                         goto repeat;
                 *errp = -ENOSPC;
+       }
+
+       if (*errp) {
                 ac->ac_b_ex.fe_len = 0;
                 ar->len = 0;
                 ext4_mb_show_ac(ac);
         }
-
         ext4_mb_release_context(ac);
-
-out2:
-       kmem_cache_free(ext4_ac_cachep, ac);
-out1:
+out:
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
         if (inquota && ar->len < inquota)
                 dquot_free_block(ar->inode, inquota - ar->len);
-out3:
         if (!ar->len) {
                 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
                         /* release all the reserved blocks if non delalloc */
@@ -4402,6 +4400,7 @@ static noinline_for_stack int
  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                       struct ext4_free_data *new_entry)
  {
+       ext4_group_t group = e4b->bd_group;
         ext4_grpblk_t block;
         struct ext4_free_data *entry;
         struct ext4_group_info *db = e4b->bd_info;
@@ -4434,9 +4433,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                 else if (block >= (entry->start_blk + entry->count))
                         n = &(*n)->rb_right;
                 else {
-                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
-                                       "Double free of blocks %d (%d %d)",
-                                       block, entry->start_blk, entry->count);
+                       ext4_grp_locked_error(sb, group, 0,
+                               ext4_group_first_block_no(sb, group) + block,
+                               "Block already on to-be-freed list");
                         return 0;
                 }
         }
@@ -4494,7 +4493,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
         struct super_block *sb = inode->i_sb;
         struct ext4_allocation_context *ac = NULL;
         struct ext4_group_desc *gdp;
-       struct ext4_super_block *es;
         unsigned long freed = 0;
         unsigned int overflow;
         ext4_grpblk_t bit;
@@ -4513,7 +4511,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
         }
  
         sbi = EXT4_SB(sb);
-       es = EXT4_SB(sb)->s_es;
         if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
             !ext4_data_block_valid(sbi, block, count)) {
                 ext4_error(sb, "Freeing blocks not in datazone - "
@@ -4647,6 +4644,8 @@ do_more:
                 mb_clear_bits(bitmap_bh->b_data, bit, count);
                 mb_free_blocks(inode, &e4b, bit, count);
                 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
+               if (test_opt(sb, DISCARD))
+                       ext4_issue_discard(sb, block_group, bit, count);
         }
  
         ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4680,7 +4679,7 @@ do_more:
                 put_bh(bitmap_bh);
                 goto do_more;
         }
-       sb->s_dirt = 1;
+       ext4_mark_super_dirty(sb);
  error_return:
         if (freed)
                 dquot_free_block(inode, freed);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c

index 6f3a27ec30bfb1b9951a9053ae6721e135762f5d..1765c2c50a9b9b6c699c134ea44423700d59883c 100644 (file)
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
          * We have the extent map build with the tmp inode.
          * Now copy the i_data across
          */
-       ei->i_flags |= EXT4_EXTENTS_FL;
+       ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
         memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
  
         /*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c

index 52abfa12762a6020c20ee9b62f62227fd16c078f..5f1ed9fc913c207d5bcc99f3ce63413a4a931ad8 100644 (file)
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
   */
  static int
  mext_check_null_inode(struct inode *inode1, struct inode *inode2,
-               const char *function)
+                     const char *function, unsigned int line)
  {
         int ret = 0;
  
         if (inode1 == NULL) {
-               __ext4_error(inode2->i_sb, function,
+               __ext4_error(inode2->i_sb, function, line,
                         "Both inodes should not be NULL: "
                         "inode1 NULL inode2 %lu", inode2->i_ino);
                 ret = -EIO;
         } else if (inode2 == NULL) {
-               __ext4_error(inode1->i_sb, function,
+               __ext4_error(inode1->i_sb, function, line,
                         "Both inodes should not be NULL: "
                         "inode1 %lu inode2 NULL", inode1->i_ino);
                 ret = -EIO;
@@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
  
         BUG_ON(inode1 == NULL && inode2 == NULL);
  
-       ret = mext_check_null_inode(inode1, inode2, __func__);
+       ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
         if (ret < 0)
                 goto out;
  
@@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
  
         BUG_ON(inode1 == NULL && inode2 == NULL);
  
-       ret = mext_check_null_inode(inode1, inode2, __func__);
+       ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
         if (ret < 0)
                 goto out;
  
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index a43e6617b35119e6bb46815e4f73aa2a518a4e30..314c0d3b3fa9aece3015a113b6d348b7bc2be4aa 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
  static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                              struct inode *inode);
  
-unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
-       unsigned len = le16_to_cpu(dlen);
-
-       if (len == EXT4_MAX_REC_LEN || len == 0)
-               return blocksize;
-       return (len & 65532) | ((len & 3) << 16);
-}
-
-__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
-       if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
-               BUG();
-       if (len < 65536)
-               return cpu_to_le16(len);
-       if (len == blocksize) {
-               if (blocksize == 65536)
-                       return cpu_to_le16(EXT4_MAX_REC_LEN);
-               else
-                       return cpu_to_le16(0);
-       }
-       return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-}
-
  /*
   * p is at least 6 bytes before the end of page
   */
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                                            dir->i_sb->s_blocksize -
                                            EXT4_DIR_REC_LEN(0));
         for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
-               if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+               if (!ext4_check_dir_entry(dir, de, bh,
                                         (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
                                                 +((char *)de - bh->b_data))) {
                         /* On error, skip the f_pos to the next block. */
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
                 if ((char *) de + namelen <= dlimit &&
                     ext4_match (namelen, name, de)) {
                         /* found a match - just to be sure, do a full check */
-                       if (!ext4_check_dir_entry("ext4_find_entry",
-                                                 dir, de, bh, offset))
+                       if (!ext4_check_dir_entry(dir, de, bh, offset))
                                 return -1;
                         *res_dir = de;
                         return 1;
@@ -1019,7 +994,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                         int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
                                   + ((char *) de - bh->b_data);
  
-                       if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
+                       if (!ext4_check_dir_entry(dir, de, bh, off)) {
                                 brelse(bh);
                                 *err = ERR_BAD_DX_DIR;
                                 goto errout;
@@ -1088,7 +1063,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
  struct dentry *ext4_get_parent(struct dentry *child)
  {
         __u32 ino;
-       struct inode *inode;
         static const struct qstr dotdot = {
                 .name = "..",
                 .len = 2,
@@ -1097,7 +1071,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
         struct buffer_head *bh;
  
         bh = ext4_find_entry(child->d_inode, &dotdot, &de);
-       inode = NULL;
         if (!bh)
                 return ERR_PTR(-ENOENT);
         ino = le32_to_cpu(de->inode);
@@ -1305,8 +1278,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                 de = (struct ext4_dir_entry_2 *)bh->b_data;
                 top = bh->b_data + blocksize - reclen;
                 while ((char *) de <= top) {
-                       if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
-                                                 bh, offset))
+                       if (!ext4_check_dir_entry(dir, de, bh, offset))
                                 return -EIO;
                         if (ext4_match(namelen, name, de))
                                 return -EEXIST;
@@ -1673,7 +1645,7 @@ static int ext4_delete_entry(handle_t *handle,
         pde = NULL;
         de = (struct ext4_dir_entry_2 *) bh->b_data;
         while (i < bh->b_size) {
-               if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
+               if (!ext4_check_dir_entry(dir, de, bh, i))
                         return -EIO;
                 if (de == de_del)  {
                         BUFFER_TRACE(bh, "get_write_access");
@@ -1956,7 +1928,7 @@ static int empty_dir(struct inode *inode)
                         }
                         de = (struct ext4_dir_entry_2 *) bh->b_data;
                 }
-               if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+               if (!ext4_check_dir_entry(inode, de, bh, offset)) {
                         de = (struct ext4_dir_entry_2 *)(bh->b_data +
                                                          sb->s_blocksize);
                         offset = (offset | (sb->s_blocksize - 1)) + 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index 6df797eb9aeb60ab1504298f266edb0977d10be9..ca5c8aa00a2fe10a621348913a0c85908c6e7f6e 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -921,8 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                            &sbi->s_flex_groups[flex_group].free_inodes);
         }
  
-       ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
-       sb->s_dirt = 1;
+       ext4_handle_dirty_super(handle, sb);
  
  exit_journal:
         mutex_unlock(&sbi->s_resize_lock);
@@ -953,7 +952,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                       ext4_fsblk_t n_blocks_count)
  {
         ext4_fsblk_t o_blocks_count;
-       ext4_group_t o_groups_count;
         ext4_grpblk_t last;
         ext4_grpblk_t add;
         struct buffer_head *bh;
@@ -965,7 +963,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
          * yet: we're going to revalidate es->s_blocks_count after
          * taking the s_resize_lock below. */
         o_blocks_count = ext4_blocks_count(es);
-       o_groups_count = EXT4_SB(sb)->s_groups_count;
  
         if (test_opt(sb, DEBUG))
                 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
@@ -1045,13 +1042,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                 goto exit_put;
         }
         ext4_blocks_count_set(es, o_blocks_count + add);
-       ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
-       sb->s_dirt = 1;
         mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
         ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
                    o_blocks_count + add);
         /* We add the blocks to the bitmap and set the group need init bit */
         ext4_add_groupblocks(handle, sb, o_blocks_count, add);
+       ext4_handle_dirty_super(handle, sb);
         ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
                    o_blocks_count + add);
         if ((err = ext4_journal_stop(handle)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index e72d3235b2fdbd8836896e4cd60dc81d155da2ff..8d65575f8c8c38a32c3ab358a1cb9a49a2760dc6 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -241,14 +241,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
         if (sb->s_flags & MS_RDONLY)
                 return ERR_PTR(-EROFS);
  
-       vfs_check_frozen(sb, SB_FREEZE_WRITE);
+       vfs_check_frozen(sb, SB_FREEZE_TRANS);
         /* Special case here: if the journal has aborted behind our
          * backs (eg. EIO in the commit thread), then we still need to
          * take the FS itself readonly cleanly. */
         journal = EXT4_SB(sb)->s_journal;
         if (journal) {
                 if (is_journal_aborted(journal)) {
-                       ext4_abort(sb, __func__, "Detected aborted journal");
+                       ext4_abort(sb, "Detected aborted journal");
                         return ERR_PTR(-EROFS);
                 }
                 return jbd2_journal_start(journal, nblocks);
@@ -262,7 +262,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
   * that sync() will call the filesystem's write_super callback if
   * appropriate.
   */
-int __ext4_journal_stop(const char *where, handle_t *handle)
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
  {
         struct super_block *sb;
         int err;
@@ -279,12 +279,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
         if (!err)
                 err = rc;
         if (err)
-               __ext4_std_error(sb, where, err);
+               __ext4_std_error(sb, where, line, err);
         return err;
  }
  
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
-               struct buffer_head *bh, handle_t *handle, int err)
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+                              const char *err_fn, struct buffer_head *bh,
+                              handle_t *handle, int err)
  {
         char nbuf[16];
         const char *errstr = ext4_decode_error(NULL, err, nbuf);
@@ -300,12 +301,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
         if (is_handle_aborted(handle))
                 return;
  
-       printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
-              caller, errstr, err_fn);
+       printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
+              caller, line, errstr, err_fn);
  
         jbd2_journal_abort_handle(handle);
  }
  
+static void __save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+       es->s_last_error_time = cpu_to_le32(get_seconds());
+       strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+       es->s_last_error_line = cpu_to_le32(line);
+       if (!es->s_first_error_time) {
+               es->s_first_error_time = es->s_last_error_time;
+               strncpy(es->s_first_error_func, func,
+                       sizeof(es->s_first_error_func));
+               es->s_first_error_line = cpu_to_le32(line);
+               es->s_first_error_ino = es->s_last_error_ino;
+               es->s_first_error_block = es->s_last_error_block;
+       }
+       /*
+        * Start the daily error reporting function if it hasn't been
+        * started already
+        */
+       if (!es->s_error_count)
+               mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
+       es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+}
+
+static void save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       __save_error_info(sb, func, line);
+       ext4_commit_super(sb, 1);
+}
+
+
  /* Deal with the reporting of failure conditions on a filesystem such as
   * inconsistencies detected or read IO failures.
   *
@@ -323,11 +359,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
  
  static void ext4_handle_error(struct super_block *sb)
  {
-       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-
         if (sb->s_flags & MS_RDONLY)
                 return;
  
@@ -342,19 +373,19 @@ static void ext4_handle_error(struct super_block *sb)
                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                 sb->s_flags |= MS_RDONLY;
         }
-       ext4_commit_super(sb, 1);
         if (test_opt(sb, ERRORS_PANIC))
                 panic("EXT4-fs (device %s): panic forced after error\n",
                         sb->s_id);
  }
  
  void __ext4_error(struct super_block *sb, const char *function,
-               const char *fmt, ...)
+                 unsigned int line, const char *fmt, ...)
  {
         va_list args;
  
         va_start(args, fmt);
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
+              sb->s_id, function, line, current->comm);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
@@ -362,14 +393,22 @@ void __ext4_error(struct super_block *sb, const char *function,
         ext4_handle_error(sb);
  }
  
-void ext4_error_inode(const char *function, struct inode *inode,
+void ext4_error_inode(struct inode *inode, const char *function,
+                     unsigned int line, ext4_fsblk_t block,
                       const char *fmt, ...)
  {
         va_list args;
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
  
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       save_error_info(inode->i_sb, function, line);
         va_start(args, fmt);
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
-              inode->i_sb->s_id, function, inode->i_ino, current->comm);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+              inode->i_sb->s_id, function, line, inode->i_ino);
+       if (block)
+               printk("block %llu: ", block);
+       printk("comm %s: ", current->comm);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
@@ -377,20 +416,26 @@ void ext4_error_inode(const char *function, struct inode *inode,
         ext4_handle_error(inode->i_sb);
  }
  
-void ext4_error_file(const char *function, struct file *file,
-                    const char *fmt, ...)
+void ext4_error_file(struct file *file, const char *function,
+                    unsigned int line, const char *fmt, ...)
  {
         va_list args;
+       struct ext4_super_block *es;
         struct inode *inode = file->f_dentry->d_inode;
         char pathname[80], *path;
  
+       es = EXT4_SB(inode->i_sb)->s_es;
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       save_error_info(inode->i_sb, function, line);
         va_start(args, fmt);
         path = d_path(&(file->f_path), pathname, sizeof(pathname));
         if (!path)
                 path = "(unknown)";
         printk(KERN_CRIT
-              "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
-              inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
+              "EXT4-fs error (device %s): %s:%d: inode #%lu "
+              "(comm %s path %s): ",
+              inode->i_sb->s_id, function, line, inode->i_ino,
+              current->comm, path);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
@@ -435,7 +480,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
  /* __ext4_std_error decodes expected errors from journaling functions
   * automatically and invokes the appropriate error response.  */
  
-void __ext4_std_error(struct super_block *sb, const char *function, int errno)
+void __ext4_std_error(struct super_block *sb, const char *function,
+                     unsigned int line, int errno)
  {
         char nbuf[16];
         const char *errstr;
@@ -448,8 +494,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
                 return;
  
         errstr = ext4_decode_error(sb, errno, nbuf);
-       printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
-              sb->s_id, function, errstr);
+       printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+              sb->s_id, function, line, errstr);
+       save_error_info(sb, function, line);
  
         ext4_handle_error(sb);
  }
@@ -464,29 +511,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
   * case we take the easy way out and panic immediately.
   */
  
-void ext4_abort(struct super_block *sb, const char *function,
-               const char *fmt, ...)
+void __ext4_abort(struct super_block *sb, const char *function,
+               unsigned int line, const char *fmt, ...)
  {
         va_list args;
  
+       save_error_info(sb, function, line);
         va_start(args, fmt);
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
+              function, line);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
  
+       if ((sb->s_flags & MS_RDONLY) == 0) {
+               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+               sb->s_flags |= MS_RDONLY;
+               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               if (EXT4_SB(sb)->s_journal)
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+               save_error_info(sb, function, line);
+       }
         if (test_opt(sb, ERRORS_PANIC))
                 panic("EXT4-fs panic from previous error\n");
-
-       if (sb->s_flags & MS_RDONLY)
-               return;
-
-       ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       sb->s_flags |= MS_RDONLY;
-       EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
-       if (EXT4_SB(sb)->s_journal)
-               jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
  }
  
  void ext4_msg (struct super_block * sb, const char *prefix,
@@ -502,38 +549,47 @@ void ext4_msg (struct super_block * sb, const char *prefix,
  }
  
  void __ext4_warning(struct super_block *sb, const char *function,
-                 const char *fmt, ...)
+                   unsigned int line, const char *fmt, ...)
  {
         va_list args;
  
         va_start(args, fmt);
-       printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
-              sb->s_id, function);
+       printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
+              sb->s_id, function, line);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
  }
  
-void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
-                          const char *function, const char *fmt, ...)
+void __ext4_grp_locked_error(const char *function, unsigned int line,
+                            struct super_block *sb, ext4_group_t grp,
+                            unsigned long ino, ext4_fsblk_t block,
+                            const char *fmt, ...)
  __releases(bitlock)
  __acquires(bitlock)
  {
         va_list args;
         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  
+       es->s_last_error_ino = cpu_to_le32(ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       __save_error_info(sb, function, line);
         va_start(args, fmt);
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+              sb->s_id, function, line, grp);
+       if (ino)
+               printk("inode %lu: ", ino);
+       if (block)
+               printk("block %llu:", (unsigned long long) block);
         vprintk(fmt, args);
         printk("\n");
         va_end(args);
  
         if (test_opt(sb, ERRORS_CONT)) {
-               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
                 ext4_commit_super(sb, 0);
                 return;
         }
+
         ext4_unlock_group(sb, grp);
         ext4_handle_error(sb);
         /*
@@ -660,8 +716,7 @@ static void ext4_put_super(struct super_block *sb)
                 err = jbd2_journal_destroy(sbi->s_journal);
                 sbi->s_journal = NULL;
                 if (err < 0)
-                       ext4_abort(sb, __func__,
-                                  "Couldn't clean up the journal");
+                       ext4_abort(sb, "Couldn't clean up the journal");
         }
  
         ext4_release_system_zone(sb);
@@ -946,14 +1001,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",journal_async_commit");
         else if (test_opt(sb, JOURNAL_CHECKSUM))
                 seq_puts(seq, ",journal_checksum");
-       if (test_opt(sb, NOBH))
-               seq_puts(seq, ",nobh");
         if (test_opt(sb, I_VERSION))
                 seq_puts(seq, ",i_version");
-       if (!test_opt(sb, DELALLOC))
+       if (!test_opt(sb, DELALLOC) &&
+           !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                 seq_puts(seq, ",nodelalloc");
  
-
         if (sbi->s_stripe)
                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
         /*
@@ -977,7 +1030,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
         if (test_opt(sb, NO_AUTO_DA_ALLOC))
                 seq_puts(seq, ",noauto_da_alloc");
  
-       if (test_opt(sb, DISCARD))
+       if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
                 seq_puts(seq, ",discard");
  
         if (test_opt(sb, NOLOAD))
@@ -986,6 +1039,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
         if (test_opt(sb, DIOREAD_NOLOCK))
                 seq_puts(seq, ",dioread_nolock");
  
+       if (test_opt(sb, BLOCK_VALIDITY) &&
+           !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
+               seq_puts(seq, ",block_validity");
+
         ext4_show_quota_options(seq, sb);
  
         return 0;
@@ -1065,6 +1122,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
  static int ext4_write_info(struct super_block *sb, int type);
  static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                                 char *path);
+static int ext4_quota_off(struct super_block *sb, int type);
  static int ext4_quota_on_mount(struct super_block *sb, int type);
  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                                size_t len, loff_t off);
@@ -1086,7 +1144,7 @@ static const struct dquot_operations ext4_quota_operations = {
  
  static const struct quotactl_ops ext4_qctl_operations = {
         .quota_on       = ext4_quota_on,
-       .quota_off      = dquot_quota_off,
+       .quota_off      = ext4_quota_off,
         .quota_sync     = dquot_quota_sync,
         .get_info       = dquot_get_dqinfo,
         .set_info       = dquot_set_dqinfo,
@@ -1624,10 +1682,12 @@ set_qf_format:
                         *n_blocks_count = option;
                         break;
                 case Opt_nobh:
-                       set_opt(sbi->s_mount_opt, NOBH);
+                       ext4_msg(sb, KERN_WARNING,
+                                "Ignoring deprecated nobh option");
                         break;
                 case Opt_bh:
-                       clear_opt(sbi->s_mount_opt, NOBH);
+                       ext4_msg(sb, KERN_WARNING,
+                                "Ignoring deprecated bh option");
                         break;
                 case Opt_i_version:
                         set_opt(sbi->s_mount_opt, I_VERSION);
@@ -2249,6 +2309,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
  {
         struct super_block *sb = sbi->s_buddy_cache->i_sb;
  
+       if (!sb->s_bdev->bd_part)
+               return snprintf(buf, PAGE_SIZE, "0\n");
         return snprintf(buf, PAGE_SIZE, "%lu\n",
                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
                          sbi->s_sectors_written_start) >> 1);
@@ -2259,6 +2321,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
  {
         struct super_block *sb = sbi->s_buddy_cache->i_sb;
  
+       if (!sb->s_bdev->bd_part)
+               return snprintf(buf, PAGE_SIZE, "0\n");
         return snprintf(buf, PAGE_SIZE, "%llu\n",
                         (unsigned long long)(sbi->s_kbytes_written +
                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@@ -2431,6 +2495,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
         return 1;
  }
  
+/*
+ * This function is called once a day if we have errors logged
+ * on the file system
+ */
+static void print_daily_error_info(unsigned long arg)
+{
+       struct super_block *sb = (struct super_block *) arg;
+       struct ext4_sb_info *sbi;
+       struct ext4_super_block *es;
+
+       sbi = EXT4_SB(sb);
+       es = sbi->s_es;
+
+       if (es->s_error_count)
+               ext4_msg(sb, KERN_NOTICE, "error count: %u",
+                        le32_to_cpu(es->s_error_count));
+       if (es->s_first_error_time) {
+               printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+                      sb->s_id, le32_to_cpu(es->s_first_error_time),
+                      (int) sizeof(es->s_first_error_func),
+                      es->s_first_error_func,
+                      le32_to_cpu(es->s_first_error_line));
+               if (es->s_first_error_ino)
+                       printk(": inode %u",
+                              le32_to_cpu(es->s_first_error_ino));
+               if (es->s_first_error_block)
+                       printk(": block %llu", (unsigned long long)
+                              le64_to_cpu(es->s_first_error_block));
+               printk("\n");
+       }
+       if (es->s_last_error_time) {
+               printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+                      sb->s_id, le32_to_cpu(es->s_last_error_time),
+                      (int) sizeof(es->s_last_error_func),
+                      es->s_last_error_func,
+                      le32_to_cpu(es->s_last_error_line));
+               if (es->s_last_error_ino)
+                       printk(": inode %u",
+                              le32_to_cpu(es->s_last_error_ino));
+               if (es->s_last_error_block)
+                       printk(": block %llu", (unsigned long long)
+                              le64_to_cpu(es->s_last_error_block));
+               printk("\n");
+       }
+       mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
+}
+
  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                 __releases(kernel_lock)
                                 __acquires(kernel_lock)
@@ -2448,7 +2559,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         struct inode *root;
         char *cp;
         const char *descr;
-       int ret = -EINVAL;
+       int ret = -ENOMEM;
         int blocksize;
         unsigned int db_count;
         unsigned int i;
@@ -2459,13 +2570,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
  
         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
         if (!sbi)
-               return -ENOMEM;
+               goto out_free_orig;
  
         sbi->s_blockgroup_lock =
                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
         if (!sbi->s_blockgroup_lock) {
                 kfree(sbi);
-               return -ENOMEM;
+               goto out_free_orig;
         }
         sb->s_fs_info = sbi;
         sbi->s_mount_opt = 0;
@@ -2473,8 +2584,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         sbi->s_resgid = EXT4_DEF_RESGID;
         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
         sbi->s_sb_block = sb_block;
-       sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
-                                                     sectors[1]);
+       if (sb->s_bdev->bd_part)
+               sbi->s_sectors_written_start =
+                       part_stat_read(sb->s_bdev->bd_part, sectors[1]);
  
         unlock_kernel();
  
@@ -2482,6 +2594,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
                 *cp = '!';
  
+       ret = -EINVAL;
         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
         if (!blocksize) {
                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
@@ -2546,6 +2659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
         else
                 set_opt(sbi->s_mount_opt, ERRORS_RO);
+       if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
+               set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+       if (def_mount_opts & EXT4_DEFM_DISCARD)
+               set_opt(sbi->s_mount_opt, DISCARD);
  
         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2553,15 +2670,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
  
-       set_opt(sbi->s_mount_opt, BARRIER);
+       if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
+               set_opt(sbi->s_mount_opt, BARRIER);
  
         /*
          * enable delayed allocation by default
          * Use -o nodelalloc to turn it off
          */
-       if (!IS_EXT3_SB(sb))
+       if (!IS_EXT3_SB(sb) &&
+           ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
                 set_opt(sbi->s_mount_opt, DELALLOC);
  
+       if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
+                          &journal_devnum, &journal_ioprio, NULL, 0)) {
+               ext4_msg(sb, KERN_WARNING,
+                        "failed to parse options in superblock: %s",
+                        sbi->s_es->s_mount_opts);
+       }
         if (!parse_options((char *) data, sb, &journal_devnum,
                            &journal_ioprio, NULL, 0))
                 goto failed_mount;
@@ -2912,18 +3037,7 @@ no_journal:
                 ext4_msg(sb, KERN_ERR, "insufficient memory");
                 goto failed_mount_wq;
         }
-       if (test_opt(sb, NOBH)) {
-               if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
-                       ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
-                               "its supported only with writeback mode");
-                       clear_opt(sbi->s_mount_opt, NOBH);
-               }
-               if (test_opt(sb, DIOREAD_NOLOCK)) {
-                       ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
-                               "not supported with nobh mode");
-                       goto failed_mount_wq;
-               }
-       }
+
         EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
         if (!EXT4_SB(sb)->dio_unwritten_wq) {
                 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3043,7 +3157,14 @@ no_journal:
                 descr = "out journal";
  
         ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
-               "Opts: %s", descr, orig_data);
+                "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+                *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+
+       init_timer(&sbi->s_err_report);
+       sbi->s_err_report.function = print_daily_error_info;
+       sbi->s_err_report.data = (unsigned long) sb;
+       if (es->s_error_count)
+               mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
  
         lock_kernel();
         kfree(orig_data);
@@ -3093,6 +3214,7 @@ out_fail:
         kfree(sbi->s_blockgroup_lock);
         kfree(sbi);
         lock_kernel();
+out_free_orig:
         kfree(orig_data);
         return ret;
  }
@@ -3110,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
         journal->j_min_batch_time = sbi->s_min_batch_time;
         journal->j_max_batch_time = sbi->s_max_batch_time;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         if (test_opt(sb, BARRIER))
                 journal->j_flags |= JBD2_BARRIER;
         else
@@ -3119,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
         else
                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  static journal_t *ext4_get_journal(struct super_block *sb,
@@ -3327,8 +3449,17 @@ static int ext4_load_journal(struct super_block *sb,
  
         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
                 err = jbd2_journal_wipe(journal, !really_read_only);
-       if (!err)
+       if (!err) {
+               char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+               if (save)
+                       memcpy(save, ((char *) es) +
+                              EXT4_S_ERR_START, EXT4_S_ERR_LEN);
                 err = jbd2_journal_load(journal);
+               if (save)
+                       memcpy(((char *) es) + EXT4_S_ERR_START,
+                              save, EXT4_S_ERR_LEN);
+               kfree(save);
+       }
  
         if (err) {
                 ext4_msg(sb, KERN_ERR, "error loading journal");
@@ -3384,10 +3515,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
          */
         if (!(sb->s_flags & MS_RDONLY))
                 es->s_wtime = cpu_to_le32(get_seconds());
-       es->s_kbytes_written =
-               cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+       if (sb->s_bdev->bd_part)
+               es->s_kbytes_written =
+                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
+       else
+               es->s_kbytes_written =
+                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
                                         &EXT4_SB(sb)->s_freeblocks_counter));
         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@@ -3491,7 +3626,7 @@ int ext4_force_commit(struct super_block *sb)
  
         journal = EXT4_SB(sb)->s_journal;
         if (journal) {
-               vfs_check_frozen(sb, SB_FREEZE_WRITE);
+               vfs_check_frozen(sb, SB_FREEZE_TRANS);
                 ret = ext4_journal_force_commit(journal);
         }
  
@@ -3616,7 +3751,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
         }
  
         if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
-               ext4_abort(sb, __func__, "Abort forced by user");
+               ext4_abort(sb, "Abort forced by user");
  
         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3981,6 +4116,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
         return err;
  }
  
+static int ext4_quota_off(struct super_block *sb, int type)
+{
+       /* Force all delayed allocation blocks to be allocated */
+       if (test_opt(sb, DELALLOC)) {
+               down_read(&sb->s_umount);
+               sync_filesystem(sb);
+               up_read(&sb->s_umount);
+       }
+
+       return dquot_quota_off(sb, type);
+}
+
  /* Read data from quotafile - avoid pagecache and such because we cannot afford
   * acquiring the locks... As quota files are never truncated and quota code
   * itself serializes the operations (and noone else should touch the files)
@@ -4030,7 +4177,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
         int err = 0;
         int offset = off & (sb->s_blocksize - 1);
-       int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
         struct buffer_head *bh;
         handle_t *handle = journal_current_handle();
  
@@ -4055,24 +4201,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
         bh = ext4_bread(handle, inode, blk, 1, &err);
         if (!bh)
                 goto out;
-       if (journal_quota) {
-               err = ext4_journal_get_write_access(handle, bh);
-               if (err) {
-                       brelse(bh);
-                       goto out;
-               }
+       err = ext4_journal_get_write_access(handle, bh);
+       if (err) {
+               brelse(bh);
+               goto out;
         }
         lock_buffer(bh);
         memcpy(bh->b_data+offset, data, len);
         flush_dcache_page(bh->b_page);
         unlock_buffer(bh);
-       if (journal_quota)
-               err = ext4_handle_dirty_metadata(handle, NULL, bh);
-       else {
-               /* Always do at least ordered writes for quotas */
-               err = ext4_jbd2_file_inode(handle, inode);
-               mark_buffer_dirty(bh);
-       }
+       err = ext4_handle_dirty_metadata(handle, NULL, bh);
         brelse(bh);
  out:
         if (err) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 04338009793abcfbfa384d1c0f9528cc3236adc6..a6f314249574d1605337aa3cb4844f1cc188e0a0 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
  
         if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
                 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
-               sb->s_dirt = 1;
-               ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+               ext4_handle_dirty_super(handle, sb);
         }
  }
  
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 076d1cc44f95858f16c4c66d3a9e4c2475bd1741..1c23a0f4e8a35021a3c291fa3407b976fdbb34a3 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
  void __jbd2_log_wait_for_space(journal_t *journal)
  {
         int nblocks, space_left;
-       assert_spin_locked(&journal->j_state_lock);
+       /* assert_spin_locked(&journal->j_state_lock); */
  
         nblocks = jbd_space_needed(journal);
         while (__jbd2_log_space_left(journal) < nblocks) {
                 if (journal->j_flags & JBD2_ABORT)
                         return;
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 mutex_lock(&journal->j_checkpoint_mutex);
  
                 /*
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
                  * filesystem, so abort the journal and leave a stack
                  * trace for forensic evidence.
                  */
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
                 spin_lock(&journal->j_list_lock);
                 nblocks = jbd_space_needed(journal);
                 space_left = __jbd2_log_space_left(journal);
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
                         if (journal->j_committing_transaction)
                                 tid = journal->j_committing_transaction->t_tid;
                         spin_unlock(&journal->j_list_lock);
-                       spin_unlock(&journal->j_state_lock);
+                       write_unlock(&journal->j_state_lock);
                         if (chkpt) {
                                 jbd2_log_do_checkpoint(journal);
                         } else if (jbd2_cleanup_journal_tail(journal) == 0) {
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
                                 WARN_ON(1);
                                 jbd2_journal_abort(journal, 0);
                         }
-                       spin_lock(&journal->j_state_lock);
+                       write_lock(&journal->j_state_lock);
                 } else {
                         spin_unlock(&journal->j_list_lock);
                 }
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
          * next transaction ID we will write, and where it will
          * start. */
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         spin_lock(&journal->j_list_lock);
         transaction = journal->j_checkpoint_transactions;
         if (transaction) {
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
         /* If the oldest pinned transaction is at the tail of the log
             already then there's not much we can do right now. */
         if (journal->j_tail_sequence == first_tid) {
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 return 1;
         }
  
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
         journal->j_free += freed;
         journal->j_tail_sequence = first_tid;
         journal->j_tail = blocknr;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  
         /*
          * If there is an external journal, we need to make sure that
@@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
         J_ASSERT(transaction->t_log_list == NULL);
         J_ASSERT(transaction->t_checkpoint_list == NULL);
         J_ASSERT(transaction->t_checkpoint_io_list == NULL);
-       J_ASSERT(transaction->t_updates == 0);
+       J_ASSERT(atomic_read(&transaction->t_updates) == 0);
         J_ASSERT(journal->j_committing_transaction != transaction);
         J_ASSERT(journal->j_running_transaction != transaction);
  
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index 75716d3d2be0351fde4a5953360a5a0c2774f4ae..f52e5e8049f195ec461bfb8781584722b5da2562 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal,
          */
         if (ret == -EOPNOTSUPP && barrier_done) {
                 printk(KERN_WARNING
-                      "JBD: barrier-based sync failed on %s - "
-                      "disabling barriers\n", journal->j_devname);
-               spin_lock(&journal->j_state_lock);
+                      "JBD2: Disabling barriers on %s, "
+                      "not supported by device\n", journal->j_devname);
+               write_lock(&journal->j_state_lock);
                 journal->j_flags &= ~JBD2_BARRIER;
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
  
                 /* And try again, without the barrier */
                 lock_buffer(bh);
@@ -180,11 +180,11 @@ retry:
         wait_on_buffer(bh);
         if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
                 printk(KERN_WARNING
-                      "JBD2: wait_on_commit_record: sync failed on %s - "
-                      "disabling barriers\n", journal->j_devname);
-               spin_lock(&journal->j_state_lock);
+                      "JBD2: %s: disabling barries on %s - not supported "
+                      "by device\n", __func__, journal->j_devname);
+               write_lock(&journal->j_state_lock);
                 journal->j_flags &= ~JBD2_BARRIER;
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
  
                 lock_buffer(bh);
                 clear_buffer_dirty(bh);
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         jbd_debug(1, "JBD: starting commit of transaction %d\n",
                         commit_transaction->t_tid);
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_LOCKED;
  
         /*
@@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                                               stats.run.rs_locked);
  
         spin_lock(&commit_transaction->t_handle_lock);
-       while (commit_transaction->t_updates) {
+       while (atomic_read(&commit_transaction->t_updates)) {
                 DEFINE_WAIT(wait);
  
                 prepare_to_wait(&journal->j_wait_updates, &wait,
                                         TASK_UNINTERRUPTIBLE);
-               if (commit_transaction->t_updates) {
+               if (atomic_read(&commit_transaction->t_updates)) {
                         spin_unlock(&commit_transaction->t_handle_lock);
-                       spin_unlock(&journal->j_state_lock);
+                       write_unlock(&journal->j_state_lock);
                         schedule();
-                       spin_lock(&journal->j_state_lock);
+                       write_lock(&journal->j_state_lock);
                         spin_lock(&commit_transaction->t_handle_lock);
                 }
                 finish_wait(&journal->j_wait_updates, &wait);
         }
         spin_unlock(&commit_transaction->t_handle_lock);
  
-       J_ASSERT (commit_transaction->t_outstanding_credits <=
+       J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
                         journal->j_max_transaction_buffers);
  
         /*
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         start_time = ktime_get();
         commit_transaction->t_log_start = journal->j_head;
         wake_up(&journal->j_wait_transaction_locked);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  
         jbd_debug (3, "JBD: commit phase 2\n");
  
@@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
          * transaction!  Now comes the tricky part: we need to write out
          * metadata.  Loop over the transaction's entire buffer list:
          */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_COMMIT;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  
         trace_jbd2_commit_logging(journal, commit_transaction);
         stats.run.rs_logging = jiffies;
         stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
                                                stats.run.rs_logging);
-       stats.run.rs_blocks = commit_transaction->t_outstanding_credits;
+       stats.run.rs_blocks =
+               atomic_read(&commit_transaction->t_outstanding_credits);
         stats.run.rs_blocks_logged = 0;
  
         J_ASSERT(commit_transaction->t_nr_buffers <=
-                commit_transaction->t_outstanding_credits);
+                atomic_read(&commit_transaction->t_outstanding_credits));
  
         err = 0;
         descriptor = NULL;
@@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                  * the free space in the log, but this counter is changed
                  * by jbd2_journal_next_log_block() also.
                  */
-               commit_transaction->t_outstanding_credits--;
+               atomic_dec(&commit_transaction->t_outstanding_credits);
  
                 /* Bump b_count to prevent truncate from stumbling over
                     the shadowed buffer!  @@@ This can go if we ever get
@@ -977,7 +978,7 @@ restart_loop:
          * __jbd2_journal_drop_transaction(). Otherwise we could race with
          * other checkpointing code processing the transaction...
          */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         spin_lock(&journal->j_list_lock);
         /*
          * Now recheck if some buffers did not get attached to the transaction
@@ -985,7 +986,7 @@ restart_loop:
          */
         if (commit_transaction->t_forget) {
                 spin_unlock(&journal->j_list_lock);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 goto restart_loop;
         }
  
@@ -1003,7 +1004,8 @@ restart_loop:
          * File the transaction statistics
          */
         stats.ts_tid = commit_transaction->t_tid;
-       stats.run.rs_handle_count = commit_transaction->t_handle_count;
+       stats.run.rs_handle_count =
+               atomic_read(&commit_transaction->t_handle_count);
         trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
                              commit_transaction->t_tid, &stats.run);
  
@@ -1037,7 +1039,7 @@ restart_loop:
                                 journal->j_average_commit_time*3) / 4;
         else
                 journal->j_average_commit_time = commit_time;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  
         if (commit_transaction->t_checkpoint_list == NULL &&
             commit_transaction->t_checkpoint_io_list == NULL) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index 036880895bfc8c2e99c42f6fd900819315bd508a..ad5866aaf0f9aa88cc114fb915e1888f393ffdbb 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -41,6 +41,7 @@
  #include <linux/hash.h>
  #include <linux/log2.h>
  #include <linux/vmalloc.h>
+#include <linux/backing-dev.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/jbd2.h>
@@ -48,8 +49,6 @@
  #include <asm/uaccess.h>
  #include <asm/page.h>
  
-EXPORT_SYMBOL(jbd2_journal_start);
-EXPORT_SYMBOL(jbd2_journal_restart);
  EXPORT_SYMBOL(jbd2_journal_extend);
  EXPORT_SYMBOL(jbd2_journal_stop);
  EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -143,7 +142,7 @@ static int kjournald2(void *arg)
         /*
          * And now, wait forever for commit wakeup events.
          */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
  
  loop:
         if (journal->j_flags & JBD2_UNMOUNT)
@@ -154,10 +153,10 @@ loop:
  
         if (journal->j_commit_sequence != journal->j_commit_request) {
                 jbd_debug(1, "OK, requests differ\n");
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 del_timer_sync(&journal->j_commit_timer);
                 jbd2_journal_commit_transaction(journal);
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
                 goto loop;
         }
  
@@ -169,9 +168,9 @@ loop:
                  * be already stopped.
                  */
                 jbd_debug(1, "Now suspending kjournald2\n");
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 refrigerator();
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
         } else {
                 /*
                  * We assume on resume that commits are already there,
@@ -191,9 +190,9 @@ loop:
                 if (journal->j_flags & JBD2_UNMOUNT)
                         should_sleep = 0;
                 if (should_sleep) {
-                       spin_unlock(&journal->j_state_lock);
+                       write_unlock(&journal->j_state_lock);
                         schedule();
-                       spin_lock(&journal->j_state_lock);
+                       write_lock(&journal->j_state_lock);
                 }
                 finish_wait(&journal->j_wait_commit, &wait);
         }
@@ -211,7 +210,7 @@ loop:
         goto loop;
  
  end_loop:
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         del_timer_sync(&journal->j_commit_timer);
         journal->j_task = NULL;
         wake_up(&journal->j_wait_done_commit);
@@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
  
  static void journal_kill_thread(journal_t *journal)
  {
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         journal->j_flags |= JBD2_UNMOUNT;
  
         while (journal->j_task) {
                 wake_up(&journal->j_wait_commit);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
         }
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  /*
@@ -310,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
          */
         J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
  
-       new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+retry_alloc:
+       new_bh = alloc_buffer_head(GFP_NOFS);
+       if (!new_bh) {
+               /*
+                * Failure is not an option, but __GFP_NOFAIL is going
+                * away; so we retry ourselves here.
+                */
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               goto retry_alloc;
+       }
+
         /* keep subsequent assertions sane */
         new_bh->b_state = 0;
         init_buffer(new_bh, NULL, NULL);
@@ -442,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal)
  {
         int left = journal->j_free;
  
-       assert_spin_locked(&journal->j_state_lock);
+       /* assert_spin_locked(&journal->j_state_lock); */
  
         /*
          * Be pessimistic here about the number of those free blocks which
@@ -487,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
  {
         int ret;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         ret = __jbd2_log_start_commit(journal, tid);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         return ret;
  }
  
@@ -508,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
         transaction_t *transaction = NULL;
         tid_t tid;
  
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
         if (journal->j_running_transaction && !current->journal_info) {
                 transaction = journal->j_running_transaction;
                 __jbd2_log_start_commit(journal, transaction->t_tid);
@@ -516,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
                 transaction = journal->j_committing_transaction;
  
         if (!transaction) {
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 return 0;       /* Nothing to retry */
         }
  
         tid = transaction->t_tid;
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
         jbd2_log_wait_commit(journal, tid);
         return 1;
  }
@@ -535,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
  {
         int ret = 0;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         if (journal->j_running_transaction) {
                 tid_t tid = journal->j_running_transaction->t_tid;
  
@@ -554,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
                         *ptid = journal->j_committing_transaction->t_tid;
                 ret = 1;
         }
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         return ret;
  }
  
@@ -566,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  {
         int err = 0;
  
+       read_lock(&journal->j_state_lock);
  #ifdef CONFIG_JBD2_DEBUG
-       spin_lock(&journal->j_state_lock);
         if (!tid_geq(journal->j_commit_request, tid)) {
                 printk(KERN_EMERG
                        "%s: error: j_commit_request=%d, tid=%d\n",
                        __func__, journal->j_commit_request, tid);
         }
-       spin_unlock(&journal->j_state_lock);
  #endif
-       spin_lock(&journal->j_state_lock);
         while (tid_gt(tid, journal->j_commit_sequence)) {
                 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
                                   tid, journal->j_commit_sequence);
                 wake_up(&journal->j_wait_commit);
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 wait_event(journal->j_wait_done_commit,
                                 !tid_gt(tid, journal->j_commit_sequence));
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
         }
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
  
         if (unlikely(is_journal_aborted(journal))) {
                 printk(KERN_EMERG "journal commit I/O error\n");
@@ -602,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
  {
         unsigned long blocknr;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         J_ASSERT(journal->j_free > 1);
  
         blocknr = journal->j_head;
@@ -610,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
         journal->j_free--;
         if (journal->j_head == journal->j_last)
                 journal->j_head = journal->j_first;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         return jbd2_journal_bmap(journal, blocknr, retp);
  }
  
@@ -830,7 +837,7 @@ static journal_t * journal_init_common (void)
         mutex_init(&journal->j_checkpoint_mutex);
         spin_lock_init(&journal->j_revoke_lock);
         spin_lock_init(&journal->j_list_lock);
-       spin_lock_init(&journal->j_state_lock);
+       rwlock_init(&journal->j_state_lock);
  
         journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
         journal->j_min_batch_time = 0;
@@ -1096,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
                 set_buffer_uptodate(bh);
         }
  
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
         jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
                   journal->j_tail, journal->j_tail_sequence, journal->j_errno);
  
         sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
         sb->s_start    = cpu_to_be32(journal->j_tail);
         sb->s_errno    = cpu_to_be32(journal->j_errno);
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
  
         BUFFER_TRACE(bh, "marking dirty");
         mark_buffer_dirty(bh);
@@ -1124,12 +1131,12 @@ out:
          * any future commit will have to be careful to update the
          * superblock again to re-record the true start of the log. */
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         if (sb->s_start)
                 journal->j_flags &= ~JBD2_FLUSHED;
         else
                 journal->j_flags |= JBD2_FLUSHED;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  /*
@@ -1391,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
  int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
                                       unsigned long ro, unsigned long incompat)
  {
-       journal_superblock_t *sb;
-
         if (!compat && !ro && !incompat)
                 return 1;
  
-       sb = journal->j_superblock;
-
         /* We can support any known requested features iff the
          * superblock is in version 2.  Otherwise we fail to support any
          * extended sb features. */
@@ -1545,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal)
         transaction_t *transaction = NULL;
         unsigned long old_tail;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
  
         /* Force everything buffered to the log... */
         if (journal->j_running_transaction) {
@@ -1558,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal)
         if (transaction) {
                 tid_t tid = transaction->t_tid;
  
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 jbd2_log_wait_commit(journal, tid);
         } else {
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
         }
  
         /* ...and flush everything in the log out to disk. */
@@ -1585,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal)
          * the magic code for a fully-recovered superblock.  Any future
          * commits of data to the journal will restore the current
          * s_start value. */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         old_tail = journal->j_tail;
         journal->j_tail = 0;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         jbd2_journal_update_superblock(journal, 1);
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         journal->j_tail = old_tail;
  
         J_ASSERT(!journal->j_running_transaction);
@@ -1598,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal)
         J_ASSERT(!journal->j_checkpoint_transactions);
         J_ASSERT(journal->j_head == journal->j_tail);
         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         return 0;
  }
  
@@ -1617,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal)
  
  int jbd2_journal_wipe(journal_t *journal, int write)
  {
-       journal_superblock_t *sb;
         int err = 0;
  
         J_ASSERT (!(journal->j_flags & JBD2_LOADED));
@@ -1626,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
         if (err)
                 return err;
  
-       sb = journal->j_superblock;
-
         if (!journal->j_tail)
                 goto no_recovery;
  
@@ -1665,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
         printk(KERN_ERR "Aborting journal on device %s.\n",
                journal->j_devname);
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         journal->j_flags |= JBD2_ABORT;
         transaction = journal->j_running_transaction;
         if (transaction)
                 __jbd2_log_start_commit(journal, transaction->t_tid);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  /* Soft abort: record the abort error status in the journal superblock,
@@ -1755,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal)
  {
         int err;
  
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
         if (journal->j_flags & JBD2_ABORT)
                 err = -EROFS;
         else
                 err = journal->j_errno;
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
         return err;
  }
  
@@ -1775,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal)
  {
         int err = 0;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         if (journal->j_flags & JBD2_ABORT)
                 err = -EROFS;
         else
                 journal->j_errno = 0;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         return err;
  }
  
@@ -1793,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal)
   */
  void jbd2_journal_ack_err(journal_t *journal)
  {
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         if (journal->j_errno)
                 journal->j_flags |= JBD2_ACK_ERR;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  int jbd2_journal_blocks_per_page(struct inode *inode)
@@ -2201,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
  void jbd2_journal_release_jbd_inode(journal_t *journal,
                                     struct jbd2_inode *jinode)
  {
-       int writeout = 0;
-
         if (!journal)
                 return;
  restart:
@@ -2219,9 +2217,6 @@ restart:
                 goto restart;
         }
  
-       /* Do we need to wait for data writeback? */
-       if (journal->j_committing_transaction == jinode->i_transaction)
-               writeout = 1;
         if (jinode->i_transaction) {
                 list_del(&jinode->i_list);
                 jinode->i_transaction = NULL;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c

index 049281b7cb8966ab236f8085c65f5230f891aaae..2bc4d5f116f190048782825738cb19485376227c 100644 (file)
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal)
  int jbd2_journal_skip_recovery(journal_t *journal)
  {
         int                     err;
-       journal_superblock_t *  sb;
  
         struct recovery_info    info;
  
         memset (&info, 0, sizeof(info));
-       sb = journal->j_superblock;
  
         err = do_one_pass(journal, &info, PASS_SCAN);
  
@@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal)
                 ++journal->j_transaction_sequence;
         } else {
  #ifdef CONFIG_JBD2_DEBUG
-               int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+               int dropped = info.end_transaction - 
+                       be32_to_cpu(journal->j_superblock->s_sequence);
  #endif
                 jbd_debug(1,
                           "JBD: ignoring %d transaction%s from the journal.\n",
@@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal,
         int                     tag_bytes = journal_tag_bytes(journal);
         __u32                   crc32_sum = ~0; /* Transactional Checksums */
  
-       /* Precompute the maximum metadata descriptors in a descriptor block */
-       int                     MAX_BLOCKS_PER_DESC;
-       MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-                              / tag_bytes);
-
         /*
          * First thing is to establish what we expect to find in the log
          * (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index b8e0806681bb0f4acf63964fa3f72ae00e1f8900..d95cc9d0401dda527b6c202ca2b3da11f73eb992 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -26,6 +26,8 @@
  #include <linux/mm.h>
  #include <linux/highmem.h>
  #include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
  
  static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  
@@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
         transaction->t_tid = journal->j_transaction_sequence++;
         transaction->t_expires = jiffies + journal->j_commit_interval;
         spin_lock_init(&transaction->t_handle_lock);
+       atomic_set(&transaction->t_updates, 0);
+       atomic_set(&transaction->t_outstanding_credits, 0);
+       atomic_set(&transaction->t_handle_count, 0);
         INIT_LIST_HEAD(&transaction->t_inode_list);
         INIT_LIST_HEAD(&transaction->t_private_list);
  
@@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
   * transaction's buffer credits.
   */
  
-static int start_this_handle(journal_t *journal, handle_t *handle)
+static int start_this_handle(journal_t *journal, handle_t *handle,
+                            int gfp_mask)
  {
         transaction_t *transaction;
         int needed;
         int nblocks = handle->h_buffer_credits;
         transaction_t *new_transaction = NULL;
-       int ret = 0;
         unsigned long ts = jiffies;
  
         if (nblocks > journal->j_max_transaction_buffers) {
                 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
                        current->comm, nblocks,
                        journal->j_max_transaction_buffers);
-               ret = -ENOSPC;
-               goto out;
+               return -ENOSPC;
         }
  
  alloc_transaction:
         if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction),
-                                               GFP_NOFS|__GFP_NOFAIL);
+               new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
                 if (!new_transaction) {
-                       ret = -ENOMEM;
-                       goto out;
+                       /*
+                        * If __GFP_FS is not present, then we may be
+                        * being called from inside the fs writeback
+                        * layer, so we MUST NOT fail.  Since
+                        * __GFP_NOFAIL is going away, we will arrange
+                        * to retry the allocation ourselves.
+                        */
+                       if ((gfp_mask & __GFP_FS) == 0) {
+                               congestion_wait(BLK_RW_ASYNC, HZ/50);
+                               goto alloc_transaction;
+                       }
+                       return -ENOMEM;
                 }
         }
  
         jbd_debug(3, "New handle %p going live.\n", handle);
  
-repeat:
-
         /*
          * We need to hold j_state_lock until t_updates has been incremented,
          * for proper journal barrier handling
          */
-       spin_lock(&journal->j_state_lock);
-repeat_locked:
+repeat:
+       read_lock(&journal->j_state_lock);
         if (is_journal_aborted(journal) ||
             (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
-               spin_unlock(&journal->j_state_lock);
-               ret = -EROFS;
-               goto out;
+               read_unlock(&journal->j_state_lock);
+               kfree(new_transaction);
+               return -EROFS;
         }
  
         /* Wait on the journal's transaction barrier if necessary */
         if (journal->j_barrier_count) {
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 wait_event(journal->j_wait_transaction_locked,
                                 journal->j_barrier_count == 0);
                 goto repeat;
         }
  
         if (!journal->j_running_transaction) {
-               if (!new_transaction) {
-                       spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
+               if (!new_transaction)
                         goto alloc_transaction;
+               write_lock(&journal->j_state_lock);
+               if (!journal->j_running_transaction) {
+                       jbd2_get_transaction(journal, new_transaction);
+                       new_transaction = NULL;
                 }
-               jbd2_get_transaction(journal, new_transaction);
-               new_transaction = NULL;
+               write_unlock(&journal->j_state_lock);
+               goto repeat;
         }
  
         transaction = journal->j_running_transaction;
@@ -155,7 +170,7 @@ repeat_locked:
  
                 prepare_to_wait(&journal->j_wait_transaction_locked,
                                         &wait, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 schedule();
                 finish_wait(&journal->j_wait_transaction_locked, &wait);
                 goto repeat;
@@ -166,8 +181,8 @@ repeat_locked:
          * buffers requested by this operation, we need to stall pending a log
          * checkpoint to free some more log space.
          */
-       spin_lock(&transaction->t_handle_lock);
-       needed = transaction->t_outstanding_credits + nblocks;
+       needed = atomic_add_return(nblocks,
+                                  &transaction->t_outstanding_credits);
  
         if (needed > journal->j_max_transaction_buffers) {
                 /*
@@ -178,11 +193,11 @@ repeat_locked:
                 DEFINE_WAIT(wait);
  
                 jbd_debug(2, "Handle %p starting new commit...\n", handle);
-               spin_unlock(&transaction->t_handle_lock);
+               atomic_sub(nblocks, &transaction->t_outstanding_credits);
                 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
                                 TASK_UNINTERRUPTIBLE);
                 __jbd2_log_start_commit(journal, transaction->t_tid);
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 schedule();
                 finish_wait(&journal->j_wait_transaction_locked, &wait);
                 goto repeat;
@@ -215,35 +230,48 @@ repeat_locked:
          */
         if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
                 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
-               spin_unlock(&transaction->t_handle_lock);
-               __jbd2_log_wait_for_space(journal);
-               goto repeat_locked;
+               atomic_sub(nblocks, &transaction->t_outstanding_credits);
+               read_unlock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
+               if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
+                       __jbd2_log_wait_for_space(journal);
+               write_unlock(&journal->j_state_lock);
+               goto repeat;
         }
  
         /* OK, account for the buffers that this operation expects to
-        * use and add the handle to the running transaction. */
-
-       if (time_after(transaction->t_start, ts)) {
+        * use and add the handle to the running transaction. 
+        *
+        * In order for t_max_wait to be reliable, it must be
+        * protected by a lock.  But doing so will mean that
+        * start_this_handle() can not be run in parallel on SMP
+        * systems, which limits our scalability.  So we only enable
+        * it when debugging is enabled.  We may want to use a
+        * separate flag, eventually, so we can enable this
+        * independently of debugging.
+        */
+#ifdef CONFIG_JBD2_DEBUG
+       if (jbd2_journal_enable_debug &&
+           time_after(transaction->t_start, ts)) {
                 ts = jbd2_time_diff(ts, transaction->t_start);
+               spin_lock(&transaction->t_handle_lock);
                 if (ts > transaction->t_max_wait)
                         transaction->t_max_wait = ts;
+               spin_unlock(&transaction->t_handle_lock);
         }
-
+#endif
         handle->h_transaction = transaction;
-       transaction->t_outstanding_credits += nblocks;
-       transaction->t_updates++;
-       transaction->t_handle_count++;
+       atomic_inc(&transaction->t_updates);
+       atomic_inc(&transaction->t_handle_count);
         jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
-                 handle, nblocks, transaction->t_outstanding_credits,
+                 handle, nblocks,
+                 atomic_read(&transaction->t_outstanding_credits),
                   __jbd2_log_space_left(journal));
-       spin_unlock(&transaction->t_handle_lock);
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
  
         lock_map_acquire(&handle->h_lockdep_map);
-out:
-       if (unlikely(new_transaction))          /* It's usually NULL */
-               kfree(new_transaction);
-       return ret;
+       kfree(new_transaction);
+       return 0;
  }
  
  static struct lock_class_key jbd2_handle_key;
@@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks)
   *
   * Return a pointer to a newly allocated handle, or NULL on failure
   */
-handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
  {
         handle_t *handle = journal_current_handle();
         int err;
@@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
  
         current->journal_info = handle;
  
-       err = start_this_handle(journal, handle);
+       err = start_this_handle(journal, handle, gfp_mask);
         if (err < 0) {
                 jbd2_free_handle(handle);
                 current->journal_info = NULL;
@@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
  out:
         return handle;
  }
+EXPORT_SYMBOL(jbd2__journal_start);
+
+
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+{
+       return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_start);
+
  
  /**
   * int jbd2_journal_extend() - extend buffer credits.
@@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
  
         result = 1;
  
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
  
         /* Don't extend a locked-down transaction! */
         if (handle->h_transaction->t_state != T_RUNNING) {
@@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
         }
  
         spin_lock(&transaction->t_handle_lock);
-       wanted = transaction->t_outstanding_credits + nblocks;
+       wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
  
         if (wanted > journal->j_max_transaction_buffers) {
                 jbd_debug(3, "denied handle %p %d blocks: "
@@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
         }
  
         handle->h_buffer_credits += nblocks;
-       transaction->t_outstanding_credits += nblocks;
+       atomic_add(nblocks, &transaction->t_outstanding_credits);
         result = 0;
  
         jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
  unlock:
         spin_unlock(&transaction->t_handle_lock);
  error_out:
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
  out:
         return result;
  }
@@ -394,8 +431,7 @@ out:
   * transaction capabable of guaranteeing the requested number of
   * credits.
   */
-
-int jbd2_journal_restart(handle_t *handle, int nblocks)
+int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
  {
         transaction_t *transaction = handle->h_transaction;
         journal_t *journal = transaction->t_journal;
@@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
          * First unlink the handle from its current transaction, and start the
          * commit on that.
          */
-       J_ASSERT(transaction->t_updates > 0);
+       J_ASSERT(atomic_read(&transaction->t_updates) > 0);
         J_ASSERT(journal_current_handle() == handle);
  
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
         spin_lock(&transaction->t_handle_lock);
-       transaction->t_outstanding_credits -= handle->h_buffer_credits;
-       transaction->t_updates--;
-
-       if (!transaction->t_updates)
+       atomic_sub(handle->h_buffer_credits,
+                  &transaction->t_outstanding_credits);
+       if (atomic_dec_and_test(&transaction->t_updates))
                 wake_up(&journal->j_wait_updates);
         spin_unlock(&transaction->t_handle_lock);
  
         jbd_debug(2, "restarting handle %p\n", handle);
         __jbd2_log_start_commit(journal, transaction->t_tid);
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
  
         lock_map_release(&handle->h_lockdep_map);
         handle->h_buffer_credits = nblocks;
-       ret = start_this_handle(journal, handle);
+       ret = start_this_handle(journal, handle, gfp_mask);
         return ret;
  }
+EXPORT_SYMBOL(jbd2__journal_restart);
  
  
+int jbd2_journal_restart(handle_t *handle, int nblocks)
+{
+       return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_restart);
+
  /**
   * void jbd2_journal_lock_updates () - establish a transaction barrier.
   * @journal:  Journal to establish a barrier on.
@@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
  {
         DEFINE_WAIT(wait);
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         ++journal->j_barrier_count;
  
         /* Wait until there are no running updates */
@@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal)
                         break;
  
                 spin_lock(&transaction->t_handle_lock);
-               if (!transaction->t_updates) {
+               if (!atomic_read(&transaction->t_updates)) {
                         spin_unlock(&transaction->t_handle_lock);
                         break;
                 }
                 prepare_to_wait(&journal->j_wait_updates, &wait,
                                 TASK_UNINTERRUPTIBLE);
                 spin_unlock(&transaction->t_handle_lock);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 schedule();
                 finish_wait(&journal->j_wait_updates, &wait);
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
         }
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  
         /*
          * We have now established a barrier against other normal updates, but
@@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
         J_ASSERT(journal->j_barrier_count != 0);
  
         mutex_unlock(&journal->j_barrier);
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         --journal->j_barrier_count;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
         wake_up(&journal->j_wait_transaction_locked);
  }
  
@@ -1238,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle)
  {
         transaction_t *transaction = handle->h_transaction;
         journal_t *journal = transaction->t_journal;
-       int err;
+       int err, wait_for_commit = 0;
+       tid_t tid;
         pid_t pid;
  
         J_ASSERT(journal_current_handle() == handle);
@@ -1246,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle)
         if (is_handle_aborted(handle))
                 err = -EIO;
         else {
-               J_ASSERT(transaction->t_updates > 0);
+               J_ASSERT(atomic_read(&transaction->t_updates) > 0);
                 err = 0;
         }
  
@@ -1291,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle)
  
                 journal->j_last_sync_writer = pid;
  
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
                 commit_time = journal->j_average_commit_time;
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
  
                 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
                                                    transaction->t_start_time));
@@ -1314,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle)
         if (handle->h_sync)
                 transaction->t_synchronous_commit = 1;
         current->journal_info = NULL;
-       spin_lock(&transaction->t_handle_lock);
-       transaction->t_outstanding_credits -= handle->h_buffer_credits;
-       transaction->t_updates--;
-       if (!transaction->t_updates) {
-               wake_up(&journal->j_wait_updates);
-               if (journal->j_barrier_count)
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
+       atomic_sub(handle->h_buffer_credits,
+                  &transaction->t_outstanding_credits);
  
         /*
          * If the handle is marked SYNC, we need to set another commit
@@ -1330,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle)
          * transaction is too old now.
          */
         if (handle->h_sync ||
-                       transaction->t_outstanding_credits >
-                               journal->j_max_transaction_buffers ||
-                       time_after_eq(jiffies, transaction->t_expires)) {
+           (atomic_read(&transaction->t_outstanding_credits) >
+            journal->j_max_transaction_buffers) ||
+           time_after_eq(jiffies, transaction->t_expires)) {
                 /* Do this even for aborted journals: an abort still
                  * completes the commit thread, it just doesn't write
                  * anything to disk. */
-               tid_t tid = transaction->t_tid;
  
-               spin_unlock(&transaction->t_handle_lock);
                 jbd_debug(2, "transaction too old, requesting commit for "
                                         "handle %p\n", handle);
                 /* This is non-blocking */
@@ -1349,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle)
                  * to wait for the commit to complete.
                  */
                 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-                       err = jbd2_log_wait_commit(journal, tid);
-       } else {
-               spin_unlock(&transaction->t_handle_lock);
+                       wait_for_commit = 1;
         }
  
+       /*
+        * Once we drop t_updates, if it goes to zero the transaction
+        * could start commiting on us and eventually disappear.  So
+        * once we do this, we must not dereference transaction
+        * pointer again.
+        */
+       tid = transaction->t_tid;
+       if (atomic_dec_and_test(&transaction->t_updates)) {
+               wake_up(&journal->j_wait_updates);
+               if (journal->j_barrier_count)
+                       wake_up(&journal->j_wait_transaction_locked);
+       }
+
+       if (wait_for_commit)
+               err = jbd2_log_wait_commit(journal, tid);
+
         lock_map_release(&handle->h_lockdep_map);
  
         jbd2_free_handle(handle);
@@ -1719,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                 goto zap_buffer_unlocked;
  
         /* OK, we have data buffer in journaled mode */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         jbd_lock_bh_state(bh);
         spin_lock(&journal->j_list_lock);
  
@@ -1772,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                         jbd2_journal_put_journal_head(jh);
                         spin_unlock(&journal->j_list_lock);
                         jbd_unlock_bh_state(bh);
-                       spin_unlock(&journal->j_state_lock);
+                       write_unlock(&journal->j_state_lock);
                         return ret;
                 } else {
                         /* There is no currently-running transaction. So the
@@ -1786,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                                 jbd2_journal_put_journal_head(jh);
                                 spin_unlock(&journal->j_list_lock);
                                 jbd_unlock_bh_state(bh);
-                               spin_unlock(&journal->j_state_lock);
+                               write_unlock(&journal->j_state_lock);
                                 return ret;
                         } else {
                                 /* The orphan record's transaction has
@@ -1810,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                 jbd2_journal_put_journal_head(jh);
                 spin_unlock(&journal->j_list_lock);
                 jbd_unlock_bh_state(bh);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                 return 0;
         } else {
                 /* Good, the buffer belongs to the running transaction.
@@ -1829,7 +1878,7 @@ zap_buffer:
  zap_buffer_no_jh:
         spin_unlock(&journal->j_list_lock);
         jbd_unlock_bh_state(bh);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  zap_buffer_unlocked:
         clear_buffer_dirty(bh);
         J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2136,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
         /* Locks are here just to force reading of recent values, it is
          * enough that the transaction was not committing before we started
          * a transaction adding the inode to orphan list */
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
         commit_trans = journal->j_committing_transaction;
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
         spin_lock(&journal->j_list_lock);
         inode_trans = jinode->i_transaction;
         spin_unlock(&journal->j_list_lock);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c

index 625de9d7088cdf2c82008b2e875094ec2b43f1d0..9b57c0350ff9337b20a9c96869e209c925246c4f 100644 (file)
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
         if (osb->osb_commit_interval)
                 commit_interval = osb->osb_commit_interval;
  
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
         journal->j_commit_interval = commit_interval;
         if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
                 journal->j_flags |= JBD2_BARRIER;
         else
                 journal->j_flags &= ~JBD2_BARRIER;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
  }
  
  int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index adf832dec3f37dd639e8aa24fe3cc29c7504a6a7..0b52924a0cb6ac5acda48540cb93d6b45a43beaf 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -601,13 +601,13 @@ struct transaction_s
          * Number of outstanding updates running on this transaction
          * [t_handle_lock]
          */
-       int                     t_updates;
+       atomic_t                t_updates;
  
         /*
          * Number of buffers reserved for use by all handles in this transaction
          * handle but not yet modified. [t_handle_lock]
          */
-       int                     t_outstanding_credits;
+       atomic_t                t_outstanding_credits;
  
         /*
          * Forward and backward links for the circular list of all transactions
@@ -629,7 +629,7 @@ struct transaction_s
         /*
          * How many handles used this transaction? [t_handle_lock]
          */
-       int t_handle_count;
+       atomic_t                t_handle_count;
  
         /*
          * This transaction is being forced and some process is
@@ -764,7 +764,7 @@ struct journal_s
         /*
          * Protect the various scalars in the journal
          */
-       spinlock_t              j_state_lock;
+       rwlock_t                j_state_lock;
  
         /*
          * Number of processes waiting to create a barrier lock [j_state_lock]
@@ -1082,7 +1082,9 @@ static inline handle_t *journal_current_handle(void)
   */
  
  extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern int      jbd2_journal_restart (handle_t *, int nblocks);
+extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
+extern int      jbd2_journal_restart(handle_t *, int nblocks);
+extern int      jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
  extern int      jbd2_journal_extend (handle_t *, int nblocks);
  extern int      jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
  extern int      jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
@@ -1257,8 +1259,8 @@ static inline int jbd_space_needed(journal_t *journal)
  {
         int nblocks = journal->j_max_transaction_buffers;
         if (journal->j_committing_transaction)
-               nblocks += journal->j_committing_transaction->
-                                       t_outstanding_credits;
+               nblocks += atomic_read(&journal->j_committing_transaction->
+                                      t_outstanding_credits);
         return nblocks;
  }
  
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index f3865c7b4166e315da77d9c342b977b9d4338915..01e9e0076a92c68682fa513e1f4a862e291fb9ae 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -395,11 +395,12 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
  );
  
  TRACE_EVENT(ext4_mb_release_inode_pa,
-       TP_PROTO(struct ext4_allocation_context *ac,
+       TP_PROTO(struct super_block *sb,
+                struct ext4_allocation_context *ac,
                  struct ext4_prealloc_space *pa,
                  unsigned long long block, unsigned int count),
  
-       TP_ARGS(ac, pa, block, count),
+       TP_ARGS(sb, ac, pa, block, count),
  
         TP_STRUCT__entry(
                 __field(        dev_t,  dev                     )
@@ -410,8 +411,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
         ),
  
         TP_fast_assign(
-               __entry->dev            = ac->ac_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
+               __entry->dev            = sb->s_dev;
+               __entry->ino            = (ac && ac->ac_inode) ? 
+                                               ac->ac_inode->i_ino : 0;
                 __entry->block          = block;
                 __entry->count          = count;
         ),
@@ -422,10 +424,11 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
  );
  
  TRACE_EVENT(ext4_mb_release_group_pa,
-       TP_PROTO(struct ext4_allocation_context *ac,
+       TP_PROTO(struct super_block *sb,
+                struct ext4_allocation_context *ac,
                  struct ext4_prealloc_space *pa),
  
-       TP_ARGS(ac, pa),
+       TP_ARGS(sb, ac, pa),
  
         TP_STRUCT__entry(
                 __field(        dev_t,  dev                     )
@@ -436,8 +439,9 @@ TRACE_EVENT(ext4_mb_release_group_pa,
         ),
  
         TP_fast_assign(
-               __entry->dev            = ac->ac_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
+               __entry->dev            = sb->s_dev;
+               __entry->ino            = (ac && ac->ac_inode) ?
+                                               ac->ac_inode->i_ino : 0;
                 __entry->pa_pstart      = pa->pa_pstart;
                 __entry->pa_len         = pa->pa_len;
         ),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 7 Aug 2010 20:03:53 +0000 (13:03 -0700)
fs/ext4/acl.c		patch \| blob \| history
fs/ext4/balloc.c		patch \| blob \| history
fs/ext4/block_validity.c		patch \| blob \| history
fs/ext4/dir.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ext4_jbd2.c		patch \| blob \| history
fs/ext4/ext4_jbd2.h		patch \| blob \| history
fs/ext4/extents.c		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
fs/ext4/ialloc.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/migrate.c		patch \| blob \| history
fs/ext4/move_extent.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/resize.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/ext4/xattr.c		patch \| blob \| history
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
fs/jbd2/recovery.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
fs/ocfs2/journal.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history
include/trace/events/ext4.h		patch \| blob \| history