]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'cow_readahead' of git://oss.oracle.com/git/tma/linux-2.6 into merge-2
authorJoel Becker <joel.becker@oracle.com>
Fri, 10 Sep 2010 15:41:04 +0000 (08:41 -0700)
committerJoel Becker <joel.becker@oracle.com>
Fri, 10 Sep 2010 15:41:04 +0000 (08:41 -0700)
1  2 
fs/ocfs2/aops.c
fs/ocfs2/file.c
fs/ocfs2/mmap.c
fs/ocfs2/refcounttree.c
fs/ocfs2/refcounttree.h

diff --combined fs/ocfs2/aops.c
index f477f18b35d5be2844fc74b602ffd33bdf6efb97,7155c5a919d722f05faadc117de698d8592fe610..5cfeee11815881b04a3250d42a72838e40289200
@@@ -883,8 -883,8 +883,8 @@@ struct ocfs2_write_ctxt 
         * out in so that future reads from that region will get
         * zero's.
         */
 -      struct page                     *w_pages[OCFS2_MAX_CTXT_PAGES];
        unsigned int                    w_num_pages;
 +      struct page                     *w_pages[OCFS2_MAX_CTXT_PAGES];
        struct page                     *w_target_page;
  
        /*
@@@ -1642,7 -1642,8 +1642,8 @@@ static int ocfs2_zero_tail(struct inod
        return ret;
  }
  
- int ocfs2_write_begin_nolock(struct address_space *mapping,
+ int ocfs2_write_begin_nolock(struct file *filp,
+                            struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
                             struct page **pagep, void **fsdata,
                             struct buffer_head *di_bh, struct page *mmap_page)
                mlog_errno(ret);
                goto out;
        } else if (ret == 1) {
-               ret = ocfs2_refcount_cow(inode, di_bh,
+               ret = ocfs2_refcount_cow(inode, filp, di_bh,
                                         wc->w_cpos, wc->w_clen, UINT_MAX);
                if (ret) {
                        mlog_errno(ret);
@@@ -1854,7 -1855,7 +1855,7 @@@ static int ocfs2_write_begin(struct fil
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
+       ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
                                       fsdata, di_bh, NULL);
        if (ret) {
                mlog_errno(ret);
diff --combined fs/ocfs2/file.c
index b03f6601fd71b523b39f333d26b16b99bb7076c9,4331f57e9fde58703f3bd260b8b002094996556f..9a74542e1a057904ef1108a5628f9b808f0847a1
@@@ -36,7 -36,6 +36,7 @@@
  #include <linux/writeback.h>
  #include <linux/falloc.h>
  #include <linux/quotaops.h>
 +#include <linux/blkdev.h>
  
  #define MLOG_MASK_PREFIX ML_INODE
  #include <cluster/masklog.h>
  
  #include "buffer_head_io.h"
  
 -static int ocfs2_sync_inode(struct inode *inode)
 -{
 -      filemap_fdatawrite(inode->i_mapping);
 -      return sync_mapping_buffers(inode->i_mapping);
 -}
 -
  static int ocfs2_init_file_private(struct inode *inode, struct file *file)
  {
        struct ocfs2_file_private *fp;
@@@ -181,16 -186,12 +181,16 @@@ static int ocfs2_sync_file(struct file 
        mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
                   dentry->d_name.len, dentry->d_name.name);
  
 -      err = ocfs2_sync_inode(dentry->d_inode);
 -      if (err)
 -              goto bail;
 -
 -      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 +      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
 +              /*
 +               * We still have to flush drive's caches to get data to the
 +               * platter
 +               */
 +              if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
 +                      blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
 +                                         NULL, BLKDEV_IFL_WAIT);
                goto bail;
 +      }
  
        journal = osb->journal->j_journal;
        err = jbd2_journal_force_commit(journal);
@@@ -360,7 -361,7 +360,7 @@@ static int ocfs2_cow_file_pos(struct in
        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
                goto out;
  
-       return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
+       return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
  
  out:
        return status;
@@@ -773,7 -774,7 +773,7 @@@ static int ocfs2_write_zero_page(struc
        BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
        BUG_ON(abs_from & (inode->i_blkbits - 1));
  
 -      page = grab_cache_page(mapping, index);
 +      page = find_or_create_page(mapping, index, GFP_NOFS);
        if (!page) {
                ret = -ENOMEM;
                mlog_errno(ret);
@@@ -903,8 -904,8 +903,8 @@@ static int ocfs2_zero_extend_get_range(
                zero_clusters = last_cpos - zero_cpos;
  
        if (needs_cow) {
-               rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
-                                       UINT_MAX);
+               rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
+                                       zero_clusters, UINT_MAX);
                if (rc) {
                        mlog_errno(rc);
                        goto out;
@@@ -2052,6 -2053,7 +2052,7 @@@ out
  }
  
  static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
+                                           struct file *file,
                                            loff_t pos, size_t count,
                                            int *meta_level)
  {
  
        *meta_level = 1;
  
-       ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
+       ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
        if (ret)
                mlog_errno(ret);
  out:
        return ret;
  }
  
- static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+ static int ocfs2_prepare_inode_for_write(struct file *file,
                                         loff_t *ppos,
                                         size_t count,
                                         int appending,
                                         int *has_refcount)
  {
        int ret = 0, meta_level = 0;
+       struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        loff_t saved_pos, end;
  
                        meta_level = -1;
  
                        ret = ocfs2_prepare_inode_for_refcount(inode,
+                                                              file,
                                                               saved_pos,
                                                               count,
                                                               &meta_level);
@@@ -2254,7 -2258,7 +2257,7 @@@ relock
        }
  
        can_do_direct = direct_io;
-       ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
+       ret = ocfs2_prepare_inode_for_write(file, ppos,
                                            iocb->ki_left, appending,
                                            &can_do_direct, &has_refcount);
        if (ret < 0) {
                written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
                                                    ppos, count, ocount);
                if (written < 0) {
 -                      /*
 -                       * direct write may have instantiated a few
 -                       * blocks outside i_size. Trim these off again.
 -                       * Don't need i_size_read because we hold i_mutex.
 -                       *
 -                       * XXX(truncate): this looks buggy because ocfs2 did not
 -                       * actually implement ->truncate.  Take a look at
 -                       * the new truncate sequence and update this accordingly
 -                       */
 -                      if (*ppos + count > inode->i_size)
 -                              truncate_setsize(inode, inode->i_size);
                        ret = written;
                        goto out_dio;
                }
@@@ -2317,7 -2332,7 +2320,7 @@@ out_dio
        BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
  
        if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
 -          ((file->f_flags & O_DIRECT) && has_refcount)) {
 +          ((file->f_flags & O_DIRECT) && !direct_io)) {
                ret = filemap_fdatawrite_range(file->f_mapping, pos,
                                               pos + count - 1);
                if (ret < 0)
@@@ -2373,7 -2388,7 +2376,7 @@@ static int ocfs2_splice_to_file(struct 
  {
        int ret;
  
-       ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
+       ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
                                            sd->total_len, 0, NULL, NULL);
        if (ret < 0) {
                mlog_errno(ret);
diff --combined fs/ocfs2/mmap.c
index 4c18f4ad93b43cae6e5ddcd5a2781fbc79292484,b04d6961c0d4d29dfd1394496fea37665746dcd4..7e32db9c2c993b38e67f5d1282525f0a089b54b0
@@@ -59,10 -59,11 +59,11 @@@ static int ocfs2_fault(struct vm_area_s
        return ret;
  }
  
- static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
+ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
                                struct page *page)
  {
        int ret;
+       struct inode *inode = file->f_path.dentry->d_inode;
        struct address_space *mapping = inode->i_mapping;
        loff_t pos = page_offset(page);
        unsigned int len = PAGE_CACHE_SIZE;
        /*
         * Another node might have truncated while we were waiting on
         * cluster locks.
 +       * We don't check size == 0 before the shift. This is borrowed
 +       * from do_generic_file_read.
         */
 -      last_index = size >> PAGE_CACHE_SHIFT;
 -      if (page->index > last_index) {
 +      last_index = (size - 1) >> PAGE_CACHE_SHIFT;
 +      if (unlikely(!size || page->index > last_index)) {
                ret = -EINVAL;
                goto out;
        }
         * because the "write" would invalidate their data.
         */
        if (page->index == last_index)
 -              len = size & ~PAGE_CACHE_MASK;
 +              len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
  
-       ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
+       ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
                                       &fsdata, di_bh, page);
        if (ret) {
                if (ret != -ENOSPC)
@@@ -159,7 -158,7 +160,7 @@@ static int ocfs2_page_mkwrite(struct vm
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       ret = __ocfs2_page_mkwrite(inode, di_bh, page);
+       ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
  
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
  
diff --combined fs/ocfs2/refcounttree.c
index 0afeda83120fa0e54bd2c9cc7e1f4f08c6e836bf,47549f64224cee4836db2f4f9be81e13fedee5fa..a120cfcf69bfd842cf4d50f18ecef025389ff3be
@@@ -49,6 -49,7 +49,7 @@@
  
  struct ocfs2_cow_context {
        struct inode *inode;
+       struct file *file;
        u32 cow_start;
        u32 cow_len;
        struct ocfs2_extent_tree data_et;
@@@ -2436,26 -2437,16 +2437,26 @@@ static int ocfs2_calc_refcount_meta_cre
                len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
                          le32_to_cpu(rec.r_clusters)) - cpos;
                /*
 -               * If the refcount rec already exist, cool. We just need
 -               * to check whether there is a split. Otherwise we just need
 -               * to increase the refcount.
 -               * If we will insert one, increases recs_add.
 -               *
                 * We record all the records which will be inserted to the
                 * same refcount block, so that we can tell exactly whether
                 * we need a new refcount block or not.
 +               *
 +               * If we will insert a new one, this is easy and only happens
 +               * during adding refcounted flag to the extent, so we don't
 +               * have a chance of spliting. We just need one record.
 +               *
 +               * If the refcount rec already exists, that would be a little
 +               * complicated. we may have to:
 +               * 1) split at the beginning if the start pos isn't aligned.
 +               *    we need 1 more record in this case.
 +               * 2) split int the end if the end pos isn't aligned.
 +               *    we need 1 more record in this case.
 +               * 3) split in the middle because of file system fragmentation.
 +               *    we need 2 more records in this case(we can't detect this
 +               *    beforehand, so always think of the worst case).
                 */
                if (rec.r_refcount) {
 +                      recs_add += 2;
                        /* Check whether we need a split at the beginning. */
                        if (cpos == start_cpos &&
                            cpos != le64_to_cpu(rec.r_cpos))
@@@ -2932,13 -2923,16 +2933,16 @@@ static int ocfs2_duplicate_clusters_by_
        u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
        struct page *page;
        pgoff_t page_index;
-       unsigned int from, to;
+       unsigned int from, to, readahead_pages;
        loff_t offset, end, map_end;
        struct address_space *mapping = context->inode->i_mapping;
  
        mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
             new_cluster, new_len, cpos);
  
+       readahead_pages =
+               (ocfs2_cow_contig_clusters(sb) <<
+                OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT;
        offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
        end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
        /*
                if (map_end & (PAGE_CACHE_SIZE - 1))
                        to = map_end & (PAGE_CACHE_SIZE - 1);
  
 -              page = grab_cache_page(mapping, page_index);
 +              page = find_or_create_page(mapping, page_index, GFP_NOFS);
  
                /*
                 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
                if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
                        BUG_ON(PageDirty(page));
  
+               if (PageReadahead(page) && context->file) {
+                       page_cache_async_readahead(mapping,
+                                                  &context->file->f_ra,
+                                                  context->file,
+                                                  page, page_index,
+                                                  readahead_pages);
+               }
                if (!PageUptodate(page)) {
                        ret = block_read_full_page(page, ocfs2_get_block);
                        if (ret) {
@@@ -3179,8 -3181,7 +3191,8 @@@ static int ocfs2_cow_sync_writeback(str
                if (map_end > end)
                        map_end = end;
  
 -              page = grab_cache_page(context->inode->i_mapping, page_index);
 +              page = find_or_create_page(context->inode->i_mapping,
 +                                         page_index, GFP_NOFS);
                BUG_ON(!page);
  
                wait_on_page_writeback(page);
@@@ -3409,12 -3410,35 +3421,35 @@@ static int ocfs2_replace_cow(struct ocf
        return ret;
  }
  
+ static void ocfs2_readahead_for_cow(struct inode *inode,
+                                   struct file *file,
+                                   u32 start, u32 len)
+ {
+       struct address_space *mapping;
+       pgoff_t index;
+       unsigned long num_pages;
+       int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+       if (!file)
+               return;
+       mapping = file->f_mapping;
+       num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
+       if (!num_pages)
+               num_pages = 1;
+       index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
+       page_cache_sync_readahead(mapping, &file->f_ra, file,
+                                 index, num_pages);
+ }
  /*
   * Starting at cpos, try to CoW write_len clusters.  Don't CoW
   * past max_cpos.  This will stop when it runs into a hole or an
   * unrefcounted extent.
   */
  static int ocfs2_refcount_cow_hunk(struct inode *inode,
+                                  struct file *file,
                                   struct buffer_head *di_bh,
                                   u32 cpos, u32 write_len, u32 max_cpos)
  {
  
        BUG_ON(cow_len == 0);
  
+       ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
        context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
        if (!context) {
                ret = -ENOMEM;
        context->ref_root_bh = ref_root_bh;
        context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
        context->get_clusters = ocfs2_di_get_clusters;
+       context->file = file;
  
        ocfs2_init_dinode_extent_tree(&context->data_et,
                                      INODE_CACHE(inode), di_bh);
@@@ -3492,6 -3519,7 +3530,7 @@@ out
   * clusters between cpos and cpos+write_len are safe to modify.
   */
  int ocfs2_refcount_cow(struct inode *inode,
+                      struct file *file,
                       struct buffer_head *di_bh,
                       u32 cpos, u32 write_len, u32 max_cpos)
  {
                        num_clusters = write_len;
  
                if (ext_flags & OCFS2_EXT_REFCOUNTED) {
-                       ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
+                       ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
                                                      num_clusters, max_cpos);
                        if (ret) {
                                mlog_errno(ret);
diff --combined fs/ocfs2/refcounttree.h
index f04892d6175d8a8d8cd702e57526c01bd0896de6,29cba0eaa92743a98da2a095a5ff234edd98e7c6..c8ce46f7d8e30ee842cc8966a8c034aefae3b98b
@@@ -21,14 -21,14 +21,14 @@@ struct ocfs2_refcount_tree 
        struct rb_node rf_node;
        u64 rf_blkno;
        u32 rf_generation;
 +      struct kref rf_getcnt;
        struct rw_semaphore rf_sem;
        struct ocfs2_lock_res rf_lockres;
 -      struct kref rf_getcnt;
        int rf_removed;
  
        /* the following 4 fields are used by caching_info. */
 -      struct ocfs2_caching_info rf_ci;
        spinlock_t rf_lock;
 +      struct ocfs2_caching_info rf_ci;
        struct mutex rf_io_mutex;
        struct super_block *rf_sb;
  };
@@@ -52,7 -52,8 +52,8 @@@ int ocfs2_prepare_refcount_change_for_d
                                          u32 clusters,
                                          int *credits,
                                          int *ref_blocks);
- int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
+ int ocfs2_refcount_cow(struct inode *inode,
+                      struct file *filep, struct buffer_head *di_bh,
                       u32 cpos, u32 write_len, u32 max_cpos);
  
  typedef int (ocfs2_post_refcount_func)(struct inode *inode,