Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[karo-tx-linux.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index d7b7462a0e13e11e7131f2b148d1323a3de5c996..8a064734e6eb3ed06461e9954d036da6ff1e8147 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
          int ea_blocks = EXT4_I(inode)->i_file_acl ?
                 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
  
+       if (ext4_has_inline_data(inode))
+               return 0;
+
         return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
  }
  
@@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
          * could be converted.
          */
         if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read((&EXT4_I(inode)->i_data_sem));
+               down_read(&EXT4_I(inode)->i_data_sem);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                              EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -489,8 +492,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
   * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
   * based files
   *
- * On success, it returns the number of blocks being mapped or allocate.
- * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * On success, it returns the number of blocks being mapped or allocated.
+ * if create==0 and the blocks are pre-allocated and unwritten block,
   * the result buffer head is unmapped. If the create ==1, it will make sure
   * the buffer head is mapped.
   *
@@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
          * file system block.
          */
         if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read((&EXT4_I(inode)->i_data_sem));
+               down_read(&EXT4_I(inode)->i_data_sem);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                              EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -622,12 +625,12 @@ found:
         map->m_flags &= ~EXT4_MAP_FLAGS;
  
         /*
-        * New blocks allocate and/or writing to uninitialized extent
+        * New blocks allocate and/or writing to unwritten extent
          * will possibly result in updating i_data, so we take
          * the write lock of i_data_sem, and call get_blocks()
          * with create == 1 flag.
          */
-       down_write((&EXT4_I(inode)->i_data_sem));
+       down_write(&EXT4_I(inode)->i_data_sem);
  
         /*
          * if the caller is from delayed allocation writeout path
@@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle,
          */
         if (dirty)
                 clear_buffer_dirty(bh);
+       BUFFER_TRACE(bh, "get write access");
         ret = ext4_journal_get_write_access(handle, bh);
         if (!ret && dirty)
                 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                 ext4_es_lru_add(inode);
                 if (ext4_es_is_hole(&es)) {
                         retval = 0;
-                       down_read((&EXT4_I(inode)->i_data_sem));
+                       down_read(&EXT4_I(inode)->i_data_sem);
                         goto add_delayed;
                 }
  
@@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
          * Try to see if we can get the block without requesting a new
          * file system block.
          */
-       down_read((&EXT4_I(inode)->i_data_sem));
+       down_read(&EXT4_I(inode)->i_data_sem);
         if (ext4_has_inline_data(inode)) {
                 /*
                  * We will soon create blocks for this page, and let
@@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page,
         BUG_ON(!ext4_handle_valid(handle));
  
         if (inline_data) {
+               BUFFER_TRACE(inode_bh, "get write access");
                 ret = ext4_journal_get_write_access(handle, inode_bh);
  
                 err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
@@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page,
         struct buffer_head *page_bufs = NULL;
         struct inode *inode = page->mapping->host;
         struct ext4_io_submit io_submit;
+       bool keep_towrite = false;
  
         trace_ext4_writepage(page);
         size = i_size_read(inode);
@@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page,
                         unlock_page(page);
                         return 0;
                 }
+               keep_towrite = true;
         }
  
         if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page,
                 unlock_page(page);
                 return -ENOMEM;
         }
-       ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+       ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
         ext4_io_submit(&io_submit);
         /* Drop io_end reference we got from init */
         ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
         else
                 len = PAGE_CACHE_SIZE;
         clear_page_dirty_for_io(page);
-       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
         if (!err)
                 mpd->wbc->nr_to_write--;
         mpd->first_page++;
@@ -2032,7 +2039,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
   * Scan buffers corresponding to changed extent (we expect corresponding pages
   * to be already locked) and update buffer state according to new extent state.
   * We map delalloc buffers to their physical location, clear unwritten bits,
- * and mark buffers as uninit when we perform writes to uninitialized extents
+ * and mark buffers as uninit when we perform writes to unwritten extents
   * and do extent conversion after IO is finished. If the last page is not fully
   * mapped, we update @map to the next extent in the last page that needs
   * mapping. Otherwise we submit the page for IO.
@@ -2126,12 +2133,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
         struct inode *inode = mpd->inode;
         struct ext4_map_blocks *map = &mpd->map;
         int get_blocks_flags;
-       int err;
+       int err, dioread_nolock;
  
         trace_ext4_da_write_pages_extent(inode, map);
         /*
          * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
-        * to convert an uninitialized extent to be initialized (in the case
+        * to convert an unwritten extent to be initialized (in the case
          * where we have written into one or more preallocated blocks).  It is
          * possible that we're going to need more metadata blocks than
          * previously reserved. However we must not fail because we're in
@@ -2148,7 +2155,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
          */
         get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
                            EXT4_GET_BLOCKS_METADATA_NOFAIL;
-       if (ext4_should_dioread_nolock(inode))
+       dioread_nolock = ext4_should_dioread_nolock(inode);
+       if (dioread_nolock)
                 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
         if (map->m_flags & (1 << BH_Delay))
                 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
@@ -2156,7 +2164,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
         err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
         if (err < 0)
                 return err;
-       if (map->m_flags & EXT4_MAP_UNINIT) {
+       if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {
                 if (!mpd->io_submit.io_end->handle &&
                     ext4_handle_valid(handle)) {
                         mpd->io_submit.io_end->handle = handle->h_rsv_handle;
@@ -3070,9 +3078,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
   * preallocated extents, and those write extend the file, no need to
   * fall back to buffered IO.
   *
- * For holes, we fallocate those blocks, mark them as uninitialized
+ * For holes, we fallocate those blocks, mark them as unwritten
   * If those blocks were preallocated, we mark sure they are split, but
- * still keep the range to write as uninitialized.
+ * still keep the range to write as unwritten.
   *
   * The unwritten extents will be converted to written when DIO is completed.
   * For async direct IO, since the IO may still pending when return, we
@@ -3085,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
   *
   */
  static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
  {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
         ssize_t ret;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
         int overwrite = 0;
         get_block_t *get_block_func = NULL;
         int dio_flags = 0;
@@ -3100,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
  
         /* Use the old path for reads and writes beyond i_size. */
         if (rw != WRITE || final_size > inode->i_size)
-               return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               return ext4_ind_direct_IO(rw, iocb, iter, offset);
  
         BUG_ON(iocb->private == NULL);
  
@@ -3124,12 +3131,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
          * We could direct write to holes and fallocate.
          *
          * Allocated blocks to fill the hole are marked as
-        * uninitialized to prevent parallel buffered read to expose
+        * unwritten to prevent parallel buffered read to expose
          * the stale data before DIO complete the data IO.
          *
          * As to previously fallocated extents, ext4 get_block will
          * just simply mark the buffer mapped but still keep the
-        * extents uninitialized.
+        * extents unwritten.
          *
          * For non AIO case, we will convert those unwritten extents
          * to written after return back from blockdev_direct_IO.
@@ -3167,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                 dio_flags = DIO_LOCKING;
         }
         ret = __blockdev_direct_IO(rw, iocb, inode,
-                                  inode->i_sb->s_bdev, iov,
-                                  offset, nr_segs,
+                                  inode->i_sb->s_bdev, iter,
+                                  offset,
                                    get_block_func,
                                    ext4_end_io_dio,
                                    NULL,
@@ -3222,11 +3229,11 @@ retake_lock:
  }
  
  static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
  {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
         ssize_t ret;
  
         /*
@@ -3239,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
         if (ext4_has_inline_data(inode))
                 return 0;
  
-       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+       trace_ext4_direct_IO_enter(inode, offset, count, rw);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
         else
-               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
-       trace_ext4_direct_IO_exit(inode, offset,
-                               iov_length(iov, nr_segs), rw, ret);
+               ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
+       trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
         return ret;
  }
  
@@ -3440,7 +3446,7 @@ unlock:
   * This required during truncate. We need to physically zero the tail end
   * of that block so it doesn't yield old data if the file is later grown.
   */
-int ext4_block_truncate_page(handle_t *handle,
+static int ext4_block_truncate_page(handle_t *handle,
                 struct address_space *mapping, loff_t from)
  {
         unsigned offset = from & (PAGE_CACHE_SIZE-1);
@@ -4304,12 +4310,15 @@ static int ext4_do_update_inode(handle_t *handle,
         struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct buffer_head *bh = iloc->bh;
+       struct super_block *sb = inode->i_sb;
         int err = 0, rc, block;
-       int need_datasync = 0;
+       int need_datasync = 0, set_large_file = 0;
         uid_t i_uid;
         gid_t i_gid;
  
-       /* For fields not not tracking in the in-memory inode,
+       spin_lock(&ei->i_raw_lock);
+
+       /* For fields not tracked in the in-memory inode,
          * initialise them to zero for new inodes. */
         if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
                 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -4347,8 +4356,10 @@ static int ext4_do_update_inode(handle_t *handle,
         EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
         EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
  
-       if (ext4_inode_blocks_set(handle, raw_inode, ei))
+       if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
+               spin_unlock(&ei->i_raw_lock);
                 goto out_brelse;
+       }
         raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
         raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
         if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
@@ -4360,24 +4371,11 @@ static int ext4_do_update_inode(handle_t *handle,
                 need_datasync = 1;
         }
         if (ei->i_disksize > 0x7fffffffULL) {
-               struct super_block *sb = inode->i_sb;
                 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
                                 EXT4_SB(sb)->s_es->s_rev_level ==
-                               cpu_to_le32(EXT4_GOOD_OLD_REV)) {
-                       /* If this is the first large file
-                        * created, add a flag to the superblock.
-                        */
-                       err = ext4_journal_get_write_access(handle,
-                                       EXT4_SB(sb)->s_sbh);
-                       if (err)
-                               goto out_brelse;
-                       ext4_update_dynamic_rev(sb);
-                       EXT4_SET_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
-                       ext4_handle_sync(handle);
-                       err = ext4_handle_dirty_super(handle, sb);
-               }
+                   cpu_to_le32(EXT4_GOOD_OLD_REV))
+                       set_large_file = 1;
         }
         raw_inode->i_generation = cpu_to_le32(inode->i_generation);
         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -4409,12 +4407,24 @@ static int ext4_do_update_inode(handle_t *handle,
  
         ext4_inode_csum_set(inode, raw_inode, ei);
  
+       spin_unlock(&ei->i_raw_lock);
+
         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
         rc = ext4_handle_dirty_metadata(handle, NULL, bh);
         if (!err)
                 err = rc;
         ext4_clear_inode_state(inode, EXT4_STATE_NEW);
-
+       if (set_large_file) {
+               BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
+               err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+               if (err)
+                       goto out_brelse;
+               ext4_update_dynamic_rev(sb);
+               EXT4_SET_RO_COMPAT_FEATURE(sb,
+                                          EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
+               ext4_handle_sync(handle);
+               err = ext4_handle_dirty_super(handle, sb);
+       }
         ext4_update_inode_fsync_trans(handle, inode, need_datasync);
  out_brelse:
         brelse(bh);