]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/ext4/inode.c
ext4: fix sleep inside spinlock issue with quota and dealloc (#14739)
[karo-tx-linux.git] / fs / ext4 / inode.c
index 2c8caa51addb40c3120d9d3c97d91a559a70814e..e233879ebbcb0595651ced91b3ecafc8fc492685 100644 (file)
@@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        if (!err)
                err = ext4_splice_branch(handle, inode, iblock,
                                         partial, indirect_blks, count);
-       else
+       if (err)
                goto cleanup;
 
        set_buffer_new(bh_result);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
 got_it:
        map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
        if (count > blocks_to_boundary)
@@ -1043,17 +1045,12 @@ out:
        return err;
 }
 
-qsize_t ext4_get_reserved_space(struct inode *inode)
+#ifdef CONFIG_QUOTA
+qsize_t *ext4_get_reserved_space(struct inode *inode)
 {
-       unsigned long long total;
-
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       total = EXT4_I(inode)->i_reserved_data_blocks +
-               EXT4_I(inode)->i_reserved_meta_blocks;
-       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
-       return total;
+       return &EXT4_I(inode)->i_reserved_quota;
 }
+#endif
 /*
  * Calculate the number of metadata blocks need to reserve
  * to allocate @blocks for non extent file based file
@@ -1534,6 +1531,16 @@ static int do_journal_get_write_access(handle_t *handle,
        return ext4_journal_get_write_access(handle, bh);
 }
 
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext4_truncate_failed_write(struct inode *inode)
+{
+       truncate_inode_pages(inode->i_mapping, inode->i_size);
+       ext4_truncate(inode);
+}
+
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
                            loff_t pos, unsigned len, unsigned flags,
                            struct page **pagep, void **fsdata)
@@ -1599,7 +1606,7 @@ retry:
 
                ext4_journal_stop(handle);
                if (pos + len > inode->i_size) {
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
                        /*
                         * If truncate failed early the inode might
                         * still be on the orphan list; we need to
@@ -1709,7 +1716,7 @@ static int ext4_ordered_write_end(struct file *file,
                ret = ret2;
 
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1758,7 @@ static int ext4_writeback_write_end(struct file *file,
                ret = ret2;
 
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1821,7 @@ static int ext4_journalled_write_end(struct file *file,
        if (!ret)
                ret = ret2;
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -1846,19 +1853,17 @@ repeat:
 
        md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
        total = md_needed + nrblocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
        /*
         * Make quota reservation here to prevent quota overflow
         * later. Real quota accounting is done at pages writeout
         * time.
         */
-       if (vfs_dq_reserve_block(inode, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       if (vfs_dq_reserve_block(inode, total))
                return -EDQUOT;
-       }
 
        if (ext4_claim_free_blocks(sbi, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                vfs_dq_release_reservation_block(inode, total);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
@@ -1866,10 +1871,11 @@ repeat:
                }
                return -ENOSPC;
        }
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
-       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
-
+       EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
        return 0;       /* success */
 }
 
@@ -2788,7 +2794,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
         * number of contiguous block. So we will limit
         * number of contiguous block to a sane value
         */
-       if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
            (max_blocks > EXT4_MAX_TRANS_DATA))
                max_blocks = EXT4_MAX_TRANS_DATA;
 
@@ -3091,7 +3097,7 @@ retry:
                 * i_size_read because we hold i_mutex.
                 */
                if (pos + len > inode->i_size)
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
        }
 
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4120,6 +4126,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
                              __le32 *last)
 {
        __le32 *p;
+       int     is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
+
        if (try_to_extend_transaction(handle, inode)) {
                if (bh) {
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4150,11 +4158,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
 
                        *p = 0;
                        tbh = sb_find_get_block(inode->i_sb, nr);
-                       ext4_forget(handle, 0, inode, tbh, nr);
+                       ext4_forget(handle, is_metadata, inode, tbh, nr);
                }
        }
 
-       ext4_free_blocks(handle, inode, block_to_free, count, 0);
+       ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
 }
 
 /**
@@ -4781,8 +4789,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        struct ext4_iloc iloc;
        struct ext4_inode *raw_inode;
        struct ext4_inode_info *ei;
-       struct buffer_head *bh;
        struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
        int block;
 
@@ -4793,11 +4801,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT4_I(inode);
+       iloc.bh = 0;
 
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
                goto bad_inode;
-       bh = iloc.bh;
        raw_inode = ext4_raw_inode(&iloc);
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4828,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                if (inode->i_mode == 0 ||
                    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
                        /* this inode is deleted */
-                       brelse(bh);
                        ret = -ESTALE;
                        goto bad_inode;
                }
@@ -4837,6 +4844,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
        inode->i_size = ext4_isize(raw_inode);
        ei->i_disksize = inode->i_size;
+#ifdef CONFIG_QUOTA
+       ei->i_reserved_quota = 0;
+#endif
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
        ei->i_block_group = iloc.block_group;
        ei->i_last_alloc_group = ~0;
@@ -4848,11 +4858,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
 
+       /*
+        * Set transaction id's of transactions that have to be committed
+        * to finish f[data]sync. We set them to currently running transaction
+        * as we cannot be sure that the inode or some of its metadata isn't
+        * part of the transaction - the inode could have been reclaimed and
+        * now it is reread from disk.
+        */
+       if (journal) {
+               transaction_t *transaction;
+               tid_t tid;
+
+               spin_lock(&journal->j_state_lock);
+               if (journal->j_running_transaction)
+                       transaction = journal->j_running_transaction;
+               else
+                       transaction = journal->j_committing_transaction;
+               if (transaction)
+                       tid = transaction->t_tid;
+               else
+                       tid = journal->j_commit_sequence;
+               spin_unlock(&journal->j_state_lock);
+               ei->i_sync_tid = tid;
+               ei->i_datasync_tid = tid;
+       }
+
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                    EXT4_INODE_SIZE(inode->i_sb)) {
-                       brelse(bh);
                        ret = -EIO;
                        goto bad_inode;
                }
@@ -4884,10 +4918,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
        ret = 0;
        if (ei->i_file_acl &&
-           ((ei->i_file_acl <
-             (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
-              EXT4_SB(sb)->s_gdb_count)) ||
-            (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
+           !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
                ext4_error(sb, __func__,
                           "bad extended attribute block %llu in inode #%lu",
                           ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4936,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                /* Validate block references which are part of inode */
                ret = ext4_check_inode_blockref(inode);
        }
-       if (ret) {
-               brelse(bh);
+       if (ret)
                goto bad_inode;
-       }
 
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4965,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        init_special_inode(inode, inode->i_mode,
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
        } else {
-               brelse(bh);
                ret = -EIO;
                ext4_error(inode->i_sb, __func__,
                           "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4977,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        return inode;
 
 bad_inode:
+       brelse(iloc.bh);
        iget_failed(inode);
        return ERR_PTR(ret);
 }
@@ -5108,6 +5137,7 @@ static int ext4_do_update_inode(handle_t *handle,
                err = rc;
        ei->i_state &= ~EXT4_STATE_NEW;
 
+       ext4_update_inode_fsync_trans(handle, inode, 0);
 out_brelse:
        brelse(bh);
        ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5257,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-               handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
-                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;