]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
ext4: introduce new i_write_mutex to protect fallocate
authorNamjae Jeon <namjae.jeon@samsung.com>
Tue, 27 May 2014 16:48:35 +0000 (12:48 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 27 May 2014 16:48:35 +0000 (12:48 -0400)
Introduce new i_write_mutex to protect new writes from coming while doing
fallocate operations. Also, get rid of aio_mutex as it is covered by
i_write_mutex.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/inode.c
fs/ext4/super.c

index 1479e2ae00d28e83e8d1c175752a61b59828cd55..0519715323a60fdf7c5cbccde3ecb05b058945ec 100644 (file)
@@ -943,6 +943,9 @@ struct ext4_inode_info {
 
        /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
        __u32 i_csum_seed;
+
+       /* protects fallocate operations racing with new writes */
+       struct mutex i_write_mutex;
 };
 
 /*
@@ -2805,10 +2808,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
 #define EXT4_WQ_HASH_SZ                37
 #define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
                                            EXT4_WQ_HASH_SZ])
-#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
-                                            EXT4_WQ_HASH_SZ])
 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 
 #define EXT4_RESIZING  0
 extern int ext4_resize_begin(struct super_block *sb);
index 5bbe425640dacd2af968f06466ccec010b415097..cb23a34bc48a323fdd64e47e0622317eca140e3e 100644 (file)
@@ -4741,6 +4741,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
 
+       mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -4748,8 +4750,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                ret = filemap_write_and_wait_range(mapping, offset,
                                                   offset + len - 1);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
                        return ret;
+               }
        }
 
        /*
@@ -4761,8 +4765,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        start = round_up(offset, 1 << blkbits);
        end = round_down((offset + len), 1 << blkbits);
 
-       if (start < offset || end > offset + len)
+       if (start < offset || end > offset + len) {
+               mutex_unlock(&EXT4_I(inode)->i_write_mutex);
                return -EINVAL;
+       }
        partial = (offset + len) & ((1 << blkbits) - 1);
 
        lblk = start >> blkbits;
@@ -4859,6 +4865,7 @@ out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
+       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
        return ret;
 }
 
@@ -5411,11 +5418,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
        punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
 
+       mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
        /* Call ext4_force_commit to flush all data in case of data=journal. */
        if (ext4_should_journal_data(inode)) {
                ret = ext4_force_commit(inode->i_sb);
                if (ret)
-                       return ret;
+                       goto out_i_write_mutex;
        }
 
        /*
@@ -5428,7 +5437,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
                                           LLONG_MAX);
        if (ret)
-               return ret;
+               goto out_i_write_mutex;
 
        /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
@@ -5501,5 +5510,7 @@ out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
+out_i_write_mutex:
+       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
        return ret;
 }
index 4e8bc284ec0e96296e8bbcf68423b9ea9ee8c921..e5cd87fa45973262cffd69bc88f943f23750cf0d 100644 (file)
@@ -97,7 +97,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(iocb->ki_filp);
-       struct mutex *aio_mutex = NULL;
+       bool unaligned_direct_aio = false;
        struct blk_plug plug;
        int o_direct = file->f_flags & O_DIRECT;
        int overwrite = 0;
@@ -106,6 +106,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 
        BUG_ON(iocb->ki_pos != pos);
 
+       mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
        /*
         * Unaligned direct AIO must be serialized; see comment above
         * In the case of O_APPEND, assume that we must always serialize
@@ -115,8 +117,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
            !is_sync_kiocb(iocb) &&
            (file->f_flags & O_APPEND ||
             ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
-               aio_mutex = ext4_aio_mutex(inode);
-               mutex_lock(aio_mutex);
+               unaligned_direct_aio = true;
                ext4_unwritten_wait(inode);
        }
 
@@ -134,8 +135,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                if ((pos > sbi->s_bitmap_maxbytes) ||
                    (pos == sbi->s_bitmap_maxbytes && length > 0)) {
                        mutex_unlock(&inode->i_mutex);
-                       ret = -EFBIG;
-                       goto errout;
+                       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
+                       return -EFBIG;
                }
 
                if (pos + length > sbi->s_bitmap_maxbytes) {
@@ -150,8 +151,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                iocb->private = &overwrite;
 
                /* check whether we do a DIO overwrite or not */
-               if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
-                   !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+               if (ext4_should_dioread_nolock(inode) &&
+                   !unaligned_direct_aio && !file->f_mapping->nrpages &&
+                   pos + length <= i_size_read(inode)) {
                        struct ext4_map_blocks map;
                        unsigned int blkbits = inode->i_blkbits;
                        int err, len;
@@ -181,6 +183,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 
        ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
+       if (!unaligned_direct_aio)
+               mutex_unlock(&EXT4_I(inode)->i_write_mutex);
 
        if (ret > 0) {
                ssize_t err;
@@ -192,9 +196,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
        if (o_direct)
                blk_finish_plug(&plug);
 
-errout:
-       if (aio_mutex)
-               mutex_unlock(aio_mutex);
+       if (unaligned_direct_aio)
+               mutex_unlock(&EXT4_I(inode)->i_write_mutex);
        return ret;
 }
 
index 645de3eaf2c94fc8270cf8ef03d294a2add1a978..55f999a91f0b9f27a541e42789220e7472c07fcb 100644 (file)
@@ -3534,6 +3534,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        trace_ext4_punch_hole(inode, offset, length, 0);
 
+       mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -3541,8 +3543,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                ret = filemap_write_and_wait_range(mapping, offset,
                                                   offset + length - 1);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
                        return ret;
+               }
        }
 
        mutex_lock(&inode->i_mutex);
@@ -3643,6 +3647,7 @@ out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
+       mutex_unlock(&EXT4_I(inode)->i_write_mutex);
        return ret;
 }
 
index b9b9aabfb4d2403e67565ef1b49992e3ae565407..7667a5b3c8e41f1fb5acc2c1e66e77fa3aaae143 100644 (file)
@@ -904,6 +904,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        atomic_set(&ei->i_ioend_count, 0);
        atomic_set(&ei->i_unwritten, 0);
        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
+       mutex_init(&ei->i_write_mutex);
 
        return &ei->vfs_inode;
 }
@@ -5516,7 +5517,6 @@ static void ext4_exit_feat_adverts(void)
 
 /* Shared across all ext4 file systems */
 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 
 static int __init ext4_init_fs(void)
 {
@@ -5529,7 +5529,6 @@ static int __init ext4_init_fs(void)
        ext4_check_flag_values();
 
        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
-               mutex_init(&ext4__aio_mutex[i]);
                init_waitqueue_head(&ext4__ioend_wq[i]);
        }