Merge branch 'for-linus' of git://git.o-hand.com/linux-mfd

[mv-sheeva.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index a6b800c58474782e9db8d98adae518a215cf2b4c..9843b046c2358974bff4bea0449583c688c27358 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -847,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
         struct ext4_inode_info *ei = EXT4_I(inode);
         int count = 0;
         ext4_fsblk_t first_block = 0;
+       loff_t disksize;
  
  
         J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
@@ -922,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
          * protect it if you're about to implement concurrent
          * ext4_get_block() -bzzz
         */
-       if (!err && extend_disksize && inode->i_size > ei->i_disksize)
-               ei->i_disksize = inode->i_size;
+       if (!err && extend_disksize) {
+               disksize = ((loff_t) iblock + count) << inode->i_blkbits;
+               if (disksize > i_size_read(inode))
+                       disksize = i_size_read(inode);
+               if (disksize > ei->i_disksize)
+                       ei->i_disksize = disksize;
+       }
         if (err)
                 goto cleanup;
  
@@ -1683,13 +1689,11 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                         do {
                                 if (cur_logical >= logical + blocks)
                                         break;
-
                                 if (buffer_delay(bh)) {
                                         bh->b_blocknr = pblock;
                                         clear_buffer_delay(bh);
-                               } else if (buffer_mapped(bh)) {
+                               } else if (buffer_mapped(bh))
                                         BUG_ON(bh->b_blocknr != pblock);
-                               }
  
                                 cur_logical++;
                                 pblock++;
@@ -1764,10 +1768,10 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
                 if (buffer_delay(lbh))
                         mpage_put_bnr_to_bhs(mpd, next, &new);
  
-                       /* go for the remaining blocks */
-                       next += new.b_size >> mpd->inode->i_blkbits;
-                       remain -= new.b_size;
-               }
+               /* go for the remaining blocks */
+               next += new.b_size >> mpd->inode->i_blkbits;
+               remain -= new.b_size;
+       }
  }
  
  #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
@@ -1993,21 +1997,21 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
  static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
  {
-       int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+       int ret;
         unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
         loff_t disksize = EXT4_I(inode)->i_disksize;
         handle_t *handle = NULL;
  
-       if (create) {
-               handle = ext4_journal_start(inode, needed_blocks);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
+       handle = ext4_journal_current_handle();
+       if (!handle) {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+               BUG_ON(!ret);
+       } else {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                  bh_result, create, 0, EXT4_DELALLOC_RSVED);
         }
  
-       ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                                  bh_result, create, 0, EXT4_DELALLOC_RSVED);
         if (ret > 0) {
                 bh_result->b_size = (ret << inode->i_blkbits);
  
@@ -2029,65 +2033,222 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
                         up_write(&EXT4_I(inode)->i_data_sem);
  
                         if (EXT4_I(inode)->i_disksize == disksize) {
-                               if (handle == NULL)
-                                       handle = ext4_journal_start(inode, 1);
-                               if (!IS_ERR(handle))
-                                       ext4_mark_inode_dirty(handle, inode);
+                               ret = ext4_mark_inode_dirty(handle, inode);
+                               return ret;
                         }
                 }
-
                 ret = 0;
         }
+       return ret;
+}
  
-out:
-       if (handle && !IS_ERR(handle))
-               ext4_journal_stop(handle);
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+       /*
+        * unmapped buffer is possible for holes.
+        * delay buffer is possible with delayed allocation
+        */
+       return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
+}
+
+static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create)
+{
+       int ret = 0;
+       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
  
+       /*
+        * we don't want to do block allocation in writepage
+        * so call get_block_wrap with create = 0
+        */
+       ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
+       }
         return ret;
  }
-/* FIXME!! only support data=writeback mode */
+
+/*
+ * get called vi ext4_da_writepages after taking page lock (have journal handle)
+ * get called via journal_submit_inode_data_buffers (no journal handle)
+ * get called via shrink_page_list via pdflush (no journal handle)
+ * or grab_page_cache when doing write_begin (have journal handle)
+ */
  static int ext4_da_writepage(struct page *page,
                                 struct writeback_control *wbc)
  {
-       struct inode *inode = page->mapping->host;
-       handle_t *handle = NULL;
         int ret = 0;
-       int err;
+       loff_t size;
+       unsigned long len;
+       struct buffer_head *page_bufs;
+       struct inode *inode = page->mapping->host;
  
-       if (ext4_journal_current_handle())
-               goto out_fail;
+       size = i_size_read(inode);
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
  
-       handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               goto out_fail;
+       if (page_has_buffers(page)) {
+               page_bufs = page_buffers(page);
+               if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay)) {
+                       /*
+                        * We don't want to do  block allocation
+                        * So redirty the page and return
+                        * We may reach here when we do a journal commit
+                        * via journal_submit_inode_data_buffers.
+                        * If we don't have mapping block we just ignore
+                        * them. We can also reach here via shrink_page_list
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
+       } else {
+               /*
+                * The test for page_has_buffers() is subtle:
+                * We know the page is dirty but it lost buffers. That means
+                * that at some moment in time after write_begin()/write_end()
+                * has been called all buffers have been clean and thus they
+                * must have been written at least once. So they are all
+                * mapped and we can happily proceed with mapping them
+                * and writing the page.
+                *
+                * Try to initialize the buffer_heads and check whether
+                * all are mapped and non delay. We don't want to
+                * do block allocation here.
+                */
+               ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                               ext4_normal_get_block_write);
+               if (!ret) {
+                       page_bufs = page_buffers(page);
+                       /* check whether all are mapped and non delay */
+                       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                               ext4_bh_unmapped_or_delay)) {
+                               redirty_page_for_writepage(wbc, page);
+                               unlock_page(page);
+                               return 0;
+                       }
+               } else {
+                       /*
+                        * We can't do block allocation here
+                        * so just redity the page and unlock
+                        * and return
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
         }
  
         if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, ext4_get_block, wbc);
+               ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
         else
-               ret = block_write_full_page(page, ext4_get_block, wbc);
-
-       if (!ret && inode->i_size > EXT4_I(inode)->i_disksize) {
-               EXT4_I(inode)->i_disksize = inode->i_size;
-               ext4_mark_inode_dirty(handle, inode);
-       }
-
-       err = ext4_journal_stop(handle);
-       if (!ret)
-               ret = err;
-       return ret;
+               ret = block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
  
-out_fail:
-       redirty_page_for_writepage(wbc, page);
-       unlock_page(page);
         return ret;
  }
  
+/*
+ * For now just follow the DIO way to estimate the max credits
+ * needed to write out EXT4_MAX_WRITEBACK_PAGES.
+ * todo: need to calculate the max credits need for
+ * extent based files, currently the DIO credits is based on
+ * indirect-blocks mapping way.
+ *
+ * Probably should have a generic way to calculate credits
+ * for DIO, writepages, and truncate
+ */
+#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
+#define EXT4_MAX_WRITEBACK_CREDITS    DIO_CREDITS
+
  static int ext4_da_writepages(struct address_space *mapping,
                                 struct writeback_control *wbc)
  {
-       return mpage_da_writepages(mapping, wbc, ext4_da_get_block_write);
+       struct inode *inode = mapping->host;
+       handle_t *handle = NULL;
+       int needed_blocks;
+       int ret = 0;
+       long to_write;
+       loff_t range_start = 0;
+
+       /*
+        * No pages to write? This is mainly a kludge to avoid starting
+        * a transaction for special inodes like journal inode on last iput()
+        * because that could violate lock ordering on umount
+        */
+       if (!mapping->nrpages)
+               return 0;
+
+       /*
+        * Estimate the worse case needed credits to write out
+        * EXT4_MAX_BUF_BLOCKS pages
+        */
+       needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
+
+       to_write = wbc->nr_to_write;
+       if (!wbc->range_cyclic) {
+               /*
+                * If range_cyclic is not set force range_cont
+                * and save the old writeback_index
+                */
+               wbc->range_cont = 1;
+               range_start =  wbc->range_start;
+       }
+
+       while (!ret && to_write) {
+               /* start a new transaction*/
+               handle = ext4_journal_start(inode, needed_blocks);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out_writepages;
+               }
+               if (ext4_should_order_data(inode)) {
+                       /*
+                        * With ordered mode we need to add
+                        * the inode to the journal handle
+                        * when we do block allocation.
+                        */
+                       ret = ext4_jbd2_file_inode(handle, inode);
+                       if (ret) {
+                               ext4_journal_stop(handle);
+                               goto out_writepages;
+                       }
+
+               }
+               /*
+                * set the max dirty pages could be write at a time
+                * to fit into the reserved transaction credits
+                */
+               if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
+                       wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
+
+               to_write -= wbc->nr_to_write;
+               ret = mpage_da_writepages(mapping, wbc,
+                                               ext4_da_get_block_write);
+               ext4_journal_stop(handle);
+               if (wbc->nr_to_write) {
+                       /*
+                        * There is no more writeout needed
+                        * or we requested for a noblocking writeout
+                        * and we found the device congested
+                        */
+                       to_write += wbc->nr_to_write;
+                       break;
+               }
+               wbc->nr_to_write = to_write;
+       }
+
+out_writepages:
+       wbc->nr_to_write = to_write;
+       if (range_start)
+               wbc->range_start = range_start;
+       return ret;
  }
  
  static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
@@ -2137,9 +2298,27 @@ out:
         return ret;
  }
  
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+/*
+ * Check if we should update i_disksize
+ * when write to the end of file but not require block allocation
+ */
+static int ext4_da_should_update_i_disksize(struct page *page,
+                                        unsigned long offset)
  {
-       return !buffer_mapped(bh) || buffer_delay(bh);
+       struct buffer_head *bh;
+       struct inode *inode = page->mapping->host;
+       unsigned int idx;
+       int i;
+
+       bh = page_buffers(page);
+       idx = offset >> inode->i_blkbits;
+
+       for (i=0; i < idx; i++)
+               bh = bh->b_this_page;
+
+       if (!buffer_mapped(bh) || (buffer_delay(bh)))
+               return 0;
+       return 1;
  }
  
  static int ext4_da_write_end(struct file *file,
@@ -2151,6 +2330,10 @@ static int ext4_da_write_end(struct file *file,
         int ret = 0, ret2;
         handle_t *handle = ext4_journal_current_handle();
         loff_t new_i_size;
+       unsigned long start, end;
+
+       start = pos & (PAGE_CACHE_SIZE - 1);
+       end = start + copied -1;
  
         /*
          * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2159,18 +2342,23 @@ static int ext4_da_write_end(struct file *file,
          */
  
         new_i_size = pos + copied;
-       if (new_i_size > EXT4_I(inode)->i_disksize)
-               if (!walk_page_buffers(NULL, page_buffers(page),
-                                      0, len, NULL, ext4_bh_unmapped_or_delay)){
-                       /*
-                        * Updating i_disksize when extending file without
-                        * needing block allocation
-                        */
-                       if (ext4_should_order_data(inode))
-                               ret = ext4_jbd2_file_inode(handle, inode);
+       if (new_i_size > EXT4_I(inode)->i_disksize) {
+               if (ext4_da_should_update_i_disksize(page, end)) {
+                       down_write(&EXT4_I(inode)->i_data_sem);
+                       if (new_i_size > EXT4_I(inode)->i_disksize) {
+                               /*
+                                * Updating i_disksize when extending file
+                                * without needing block allocation
+                                */
+                               if (ext4_should_order_data(inode))
+                                       ret = ext4_jbd2_file_inode(handle,
+                                                                  inode);
  
-                       EXT4_I(inode)->i_disksize = new_i_size;
+                               EXT4_I(inode)->i_disksize = new_i_size;
+                       }
+                       up_write(&EXT4_I(inode)->i_data_sem);
                 }
+       }
         ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
         copied = ret2;
@@ -2334,12 +2522,14 @@ static int __ext4_normal_writepage(struct page *page,
         struct inode *inode = page->mapping->host;
  
         if (test_opt(inode->i_sb, NOBH))
-               return nobh_writepage(page, ext4_get_block, wbc);
+               return nobh_writepage(page,
+                                       ext4_normal_get_block_write, wbc);
         else
-               return block_write_full_page(page, ext4_get_block, wbc);
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
  }
  
-
  static int ext4_normal_writepage(struct page *page,
                                 struct writeback_control *wbc)
  {
@@ -2348,13 +2538,24 @@ static int ext4_normal_writepage(struct page *page,
         loff_t len;
  
         J_ASSERT(PageLocked(page));
-       J_ASSERT(page_has_buffers(page));
         if (page->index == size >> PAGE_CACHE_SHIFT)
                 len = size & ~PAGE_CACHE_MASK;
         else
                 len = PAGE_CACHE_SIZE;
-       BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-                                ext4_bh_unmapped_or_delay));
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
+       }
  
         if (!ext4_journal_current_handle())
                 return __ext4_normal_writepage(page, wbc);
@@ -2374,7 +2575,8 @@ static int __ext4_journalled_writepage(struct page *page,
         int ret = 0;
         int err;
  
-       ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext4_get_block);
+       ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                       ext4_normal_get_block_write);
         if (ret != 0)
                 goto out_unlock;
  
@@ -2421,13 +2623,24 @@ static int ext4_journalled_writepage(struct page *page,
         loff_t len;
  
         J_ASSERT(PageLocked(page));
-       J_ASSERT(page_has_buffers(page));
         if (page->index == size >> PAGE_CACHE_SHIFT)
                 len = size & ~PAGE_CACHE_MASK;
         else
                 len = PAGE_CACHE_SIZE;
-       BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-                                ext4_bh_unmapped_or_delay));
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
+       }
  
         if (ext4_journal_current_handle())
                 goto no_write;
@@ -2445,7 +2658,9 @@ static int ext4_journalled_writepage(struct page *page,
                  * really know unless we go poke around in the buffer_heads.
                  * But block_write_full_page will do the right thing.
                  */
-               return block_write_full_page(page, ext4_get_block, wbc);
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
         }
  no_write:
         redirty_page_for_writepage(wbc, page);
@@ -2591,64 +2806,71 @@ static int ext4_journalled_set_page_dirty(struct page *page)
  }
  
  static const struct address_space_operations ext4_ordered_aops = {
-       .readpage       = ext4_readpage,
-       .readpages      = ext4_readpages,
-       .writepage      = ext4_normal_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext4_write_begin,
-       .write_end      = ext4_ordered_write_end,
-       .bmap           = ext4_bmap,
-       .invalidatepage = ext4_invalidatepage,
-       .releasepage    = ext4_releasepage,
-       .direct_IO      = ext4_direct_IO,
-       .migratepage    = buffer_migrate_page,
+       .readpage               = ext4_readpage,
+       .readpages              = ext4_readpages,
+       .writepage              = ext4_normal_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext4_write_begin,
+       .write_end              = ext4_ordered_write_end,
+       .bmap                   = ext4_bmap,
+       .invalidatepage         = ext4_invalidatepage,
+       .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  static const struct address_space_operations ext4_writeback_aops = {
-       .readpage       = ext4_readpage,
-       .readpages      = ext4_readpages,
-       .writepage      = ext4_normal_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext4_write_begin,
-       .write_end      = ext4_writeback_write_end,
-       .bmap           = ext4_bmap,
-       .invalidatepage = ext4_invalidatepage,
-       .releasepage    = ext4_releasepage,
-       .direct_IO      = ext4_direct_IO,
-       .migratepage    = buffer_migrate_page,
+       .readpage               = ext4_readpage,
+       .readpages              = ext4_readpages,
+       .writepage              = ext4_normal_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext4_write_begin,
+       .write_end              = ext4_writeback_write_end,
+       .bmap                   = ext4_bmap,
+       .invalidatepage         = ext4_invalidatepage,
+       .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  static const struct address_space_operations ext4_journalled_aops = {
-       .readpage       = ext4_readpage,
-       .readpages      = ext4_readpages,
-       .writepage      = ext4_journalled_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext4_write_begin,
-       .write_end      = ext4_journalled_write_end,
-       .set_page_dirty = ext4_journalled_set_page_dirty,
-       .bmap           = ext4_bmap,
-       .invalidatepage = ext4_invalidatepage,
-       .releasepage    = ext4_releasepage,
+       .readpage               = ext4_readpage,
+       .readpages              = ext4_readpages,
+       .writepage              = ext4_journalled_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext4_write_begin,
+       .write_end              = ext4_journalled_write_end,
+       .set_page_dirty         = ext4_journalled_set_page_dirty,
+       .bmap                   = ext4_bmap,
+       .invalidatepage         = ext4_invalidatepage,
+       .releasepage            = ext4_releasepage,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  static const struct address_space_operations ext4_da_aops = {
-       .readpage       = ext4_readpage,
-       .readpages      = ext4_readpages,
-       .writepage      = ext4_da_writepage,
-       .writepages     = ext4_da_writepages,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext4_da_write_begin,
-       .write_end      = ext4_da_write_end,
-       .bmap           = ext4_bmap,
-       .invalidatepage = ext4_da_invalidatepage,
-       .releasepage    = ext4_releasepage,
-       .direct_IO      = ext4_direct_IO,
-       .migratepage    = buffer_migrate_page,
+       .readpage               = ext4_readpage,
+       .readpages              = ext4_readpages,
+       .writepage              = ext4_da_writepage,
+       .writepages             = ext4_da_writepages,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext4_da_write_begin,
+       .write_end              = ext4_da_write_end,
+       .bmap                   = ext4_bmap,
+       .invalidatepage         = ext4_da_invalidatepage,
+       .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  void ext4_set_aops(struct inode *inode)
  {
-       if (ext4_should_order_data(inode))
+       if (ext4_should_order_data(inode) &&
+               test_opt(inode->i_sb, DELALLOC))
+               inode->i_mapping->a_ops = &ext4_da_aops;
+       else if (ext4_should_order_data(inode))
                 inode->i_mapping->a_ops = &ext4_ordered_aops;
         else if (ext4_should_writeback_data(inode) &&
                  test_opt(inode->i_sb, DELALLOC))
@@ -3207,6 +3429,11 @@ void ext4_truncate(struct inode *inode)
         if (ext4_orphan_add(handle, inode))
                 goto out_stop;
  
+       /*
+        * From here we block out all ext4_get_block() callers who want to
+        * modify the block allocation tree.
+        */
+       down_write(&ei->i_data_sem);
         /*
          * The orphan list entry will now protect us from any crash which
          * occurs before the truncate completes, so it is now safe to propagate
@@ -3216,12 +3443,6 @@ void ext4_truncate(struct inode *inode)
          */
         ei->i_disksize = inode->i_size;
  
-       /*
-        * From here we block out all ext4_get_block() callers who want to
-        * modify the block allocation tree.
-        */
-       down_write(&ei->i_data_sem);
-
         if (n == 1) {           /* direct blocks */
                 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                                i_data + EXT4_NDIR_BLOCKS);
@@ -4014,6 +4235,32 @@ err_out:
         return error;
  }
  
+int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                struct kstat *stat)
+{
+       struct inode *inode;
+       unsigned long delalloc_blocks;
+
+       inode = dentry->d_inode;
+       generic_fillattr(inode, stat);
+
+       /*
+        * We can't update i_blocks if the block allocation is delayed
+        * otherwise in the case of system crash before the real block
+        * allocation is done, we will have i_blocks inconsistent with
+        * on-disk file blocks.
+        * We always keep i_blocks updated together with real
+        * allocation. But to not confuse with user, stat
+        * will return the blocks that include the delayed allocation
+        * blocks for this file.
+        */
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+       stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
+       return 0;
+}
  
  /*
   * How many blocks doth make a writepage()?