]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/ext4/inode.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[karo-tx-linux.git] / fs / ext4 / inode.c
index d7b7462a0e13e11e7131f2b148d1323a3de5c996..8a064734e6eb3ed06461e9954d036da6ff1e8147 100644 (file)
@@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
         int ea_blocks = EXT4_I(inode)->i_file_acl ?
                EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
 
+       if (ext4_has_inline_data(inode))
+               return 0;
+
        return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
@@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         * could be converted.
         */
        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read((&EXT4_I(inode)->i_data_sem));
+               down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -489,8 +492,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
  * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
  * based files
  *
- * On success, it returns the number of blocks being mapped or allocate.
- * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * On success, it returns the number of blocks being mapped or allocated.
+ * if create==0 and the blocks are pre-allocated and unwritten block,
  * the result buffer head is unmapped. If the create ==1, it will make sure
  * the buffer head is mapped.
  *
@@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * file system block.
         */
        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read((&EXT4_I(inode)->i_data_sem));
+               down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -622,12 +625,12 @@ found:
        map->m_flags &= ~EXT4_MAP_FLAGS;
 
        /*
-        * New blocks allocate and/or writing to uninitialized extent
+        * New blocks allocate and/or writing to unwritten extent
         * will possibly result in updating i_data, so we take
         * the write lock of i_data_sem, and call get_blocks()
         * with create == 1 flag.
         */
-       down_write((&EXT4_I(inode)->i_data_sem));
+       down_write(&EXT4_I(inode)->i_data_sem);
 
        /*
         * if the caller is from delayed allocation writeout path
@@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle,
         */
        if (dirty)
                clear_buffer_dirty(bh);
+       BUFFER_TRACE(bh, "get write access");
        ret = ext4_journal_get_write_access(handle, bh);
        if (!ret && dirty)
                ret = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                ext4_es_lru_add(inode);
                if (ext4_es_is_hole(&es)) {
                        retval = 0;
-                       down_read((&EXT4_I(inode)->i_data_sem));
+                       down_read(&EXT4_I(inode)->i_data_sem);
                        goto add_delayed;
                }
 
@@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       down_read((&EXT4_I(inode)->i_data_sem));
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_has_inline_data(inode)) {
                /*
                 * We will soon create blocks for this page, and let
@@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page,
        BUG_ON(!ext4_handle_valid(handle));
 
        if (inline_data) {
+               BUFFER_TRACE(inode_bh, "get write access");
                ret = ext4_journal_get_write_access(handle, inode_bh);
 
                err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
@@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page,
        struct buffer_head *page_bufs = NULL;
        struct inode *inode = page->mapping->host;
        struct ext4_io_submit io_submit;
+       bool keep_towrite = false;
 
        trace_ext4_writepage(page);
        size = i_size_read(inode);
@@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page,
                        unlock_page(page);
                        return 0;
                }
+               keep_towrite = true;
        }
 
        if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page,
                unlock_page(page);
                return -ENOMEM;
        }
-       ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+       ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
        ext4_io_submit(&io_submit);
        /* Drop io_end reference we got from init */
        ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
        else
                len = PAGE_CACHE_SIZE;
        clear_page_dirty_for_io(page);
-       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
        if (!err)
                mpd->wbc->nr_to_write--;
        mpd->first_page++;
@@ -2032,7 +2039,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
  * Scan buffers corresponding to changed extent (we expect corresponding pages
  * to be already locked) and update buffer state according to new extent state.
  * We map delalloc buffers to their physical location, clear unwritten bits,
- * and mark buffers as uninit when we perform writes to uninitialized extents
+ * and mark buffers as uninit when we perform writes to unwritten extents
  * and do extent conversion after IO is finished. If the last page is not fully
  * mapped, we update @map to the next extent in the last page that needs
  * mapping. Otherwise we submit the page for IO.
@@ -2126,12 +2133,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
        struct inode *inode = mpd->inode;
        struct ext4_map_blocks *map = &mpd->map;
        int get_blocks_flags;
-       int err;
+       int err, dioread_nolock;
 
        trace_ext4_da_write_pages_extent(inode, map);
        /*
         * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
-        * to convert an uninitialized extent to be initialized (in the case
+        * to convert an unwritten extent to be initialized (in the case
         * where we have written into one or more preallocated blocks).  It is
         * possible that we're going to need more metadata blocks than
         * previously reserved. However we must not fail because we're in
@@ -2148,7 +2155,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
         */
        get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
                           EXT4_GET_BLOCKS_METADATA_NOFAIL;
-       if (ext4_should_dioread_nolock(inode))
+       dioread_nolock = ext4_should_dioread_nolock(inode);
+       if (dioread_nolock)
                get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
        if (map->m_flags & (1 << BH_Delay))
                get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
@@ -2156,7 +2164,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
        err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
        if (err < 0)
                return err;
-       if (map->m_flags & EXT4_MAP_UNINIT) {
+       if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {
                if (!mpd->io_submit.io_end->handle &&
                    ext4_handle_valid(handle)) {
                        mpd->io_submit.io_end->handle = handle->h_rsv_handle;
@@ -3070,9 +3078,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
  * preallocated extents, and those write extend the file, no need to
  * fall back to buffered IO.
  *
- * For holes, we fallocate those blocks, mark them as uninitialized
+ * For holes, we fallocate those blocks, mark them as unwritten
  * If those blocks were preallocated, we mark sure they are split, but
- * still keep the range to write as uninitialized.
+ * still keep the range to write as unwritten.
  *
  * The unwritten extents will be converted to written when DIO is completed.
  * For async direct IO, since the IO may still pending when return, we
@@ -3085,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
  *
  */
 static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        int overwrite = 0;
        get_block_t *get_block_func = NULL;
        int dio_flags = 0;
@@ -3100,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
        /* Use the old path for reads and writes beyond i_size. */
        if (rw != WRITE || final_size > inode->i_size)
-               return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               return ext4_ind_direct_IO(rw, iocb, iter, offset);
 
        BUG_ON(iocb->private == NULL);
 
@@ -3124,12 +3131,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
         * We could direct write to holes and fallocate.
         *
         * Allocated blocks to fill the hole are marked as
-        * uninitialized to prevent parallel buffered read to expose
+        * unwritten to prevent parallel buffered read to expose
         * the stale data before DIO complete the data IO.
         *
         * As to previously fallocated extents, ext4 get_block will
         * just simply mark the buffer mapped but still keep the
-        * extents uninitialized.
+        * extents unwritten.
         *
         * For non AIO case, we will convert those unwritten extents
         * to written after return back from blockdev_direct_IO.
@@ -3167,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                dio_flags = DIO_LOCKING;
        }
        ret = __blockdev_direct_IO(rw, iocb, inode,
-                                  inode->i_sb->s_bdev, iov,
-                                  offset, nr_segs,
+                                  inode->i_sb->s_bdev, iter,
+                                  offset,
                                   get_block_func,
                                   ext4_end_io_dio,
                                   NULL,
@@ -3222,11 +3229,11 @@ retake_lock:
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
        /*
@@ -3239,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        if (ext4_has_inline_data(inode))
                return 0;
 
-       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+       trace_ext4_direct_IO_enter(inode, offset, count, rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
        else
-               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
-       trace_ext4_direct_IO_exit(inode, offset,
-                               iov_length(iov, nr_segs), rw, ret);
+               ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
+       trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
        return ret;
 }
 
@@ -3440,7 +3446,7 @@ unlock:
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-int ext4_block_truncate_page(handle_t *handle,
+static int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from)
 {
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
@@ -4304,12 +4310,15 @@ static int ext4_do_update_inode(handle_t *handle,
        struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct buffer_head *bh = iloc->bh;
+       struct super_block *sb = inode->i_sb;
        int err = 0, rc, block;
-       int need_datasync = 0;
+       int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
 
-       /* For fields not not tracking in the in-memory inode,
+       spin_lock(&ei->i_raw_lock);
+
+       /* For fields not tracked in the in-memory inode,
         * initialise them to zero for new inodes. */
        if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
                memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -4347,8 +4356,10 @@ static int ext4_do_update_inode(handle_t *handle,
        EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
 
-       if (ext4_inode_blocks_set(handle, raw_inode, ei))
+       if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
+               spin_unlock(&ei->i_raw_lock);
                goto out_brelse;
+       }
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
        raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
        if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
@@ -4360,24 +4371,11 @@ static int ext4_do_update_inode(handle_t *handle,
                need_datasync = 1;
        }
        if (ei->i_disksize > 0x7fffffffULL) {
-               struct super_block *sb = inode->i_sb;
                if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
                                EXT4_SB(sb)->s_es->s_rev_level ==
-                               cpu_to_le32(EXT4_GOOD_OLD_REV)) {
-                       /* If this is the first large file
-                        * created, add a flag to the superblock.
-                        */
-                       err = ext4_journal_get_write_access(handle,
-                                       EXT4_SB(sb)->s_sbh);
-                       if (err)
-                               goto out_brelse;
-                       ext4_update_dynamic_rev(sb);
-                       EXT4_SET_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
-                       ext4_handle_sync(handle);
-                       err = ext4_handle_dirty_super(handle, sb);
-               }
+                   cpu_to_le32(EXT4_GOOD_OLD_REV))
+                       set_large_file = 1;
        }
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -4409,12 +4407,24 @@ static int ext4_do_update_inode(handle_t *handle,
 
        ext4_inode_csum_set(inode, raw_inode, ei);
 
+       spin_unlock(&ei->i_raw_lock);
+
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
        rc = ext4_handle_dirty_metadata(handle, NULL, bh);
        if (!err)
                err = rc;
        ext4_clear_inode_state(inode, EXT4_STATE_NEW);
-
+       if (set_large_file) {
+               BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
+               err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+               if (err)
+                       goto out_brelse;
+               ext4_update_dynamic_rev(sb);
+               EXT4_SET_RO_COMPAT_FEATURE(sb,
+                                          EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
+               ext4_handle_sync(handle);
+               err = ext4_handle_dirty_super(handle, sb);
+       }
        ext4_update_inode_fsync_trans(handle, inode, need_datasync);
 out_brelse:
        brelse(bh);