]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
dio: optimize cache misses in the submission path
authorAndi Kleen <ak@linux.intel.com>
Wed, 5 Oct 2011 00:44:07 +0000 (11:44 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Fri, 7 Oct 2011 06:07:15 +0000 (17:07 +1100)
Some investigation of a transaction processing workload showed that a
major consumer of cycles in __blockdev_direct_IO is the cache miss while
accessing the block size.  This is because it has to walk the chain from
block_dev to gendisk to queue.

The block size is needed early on to check alignment and sizes.  It's only
done if the check for the inode block size fails.  But the costly block
device state is unconditionally fetched.

- Reorganize the code to only fetch block dev state when actually
  needed.

Then do a prefetch on the block dev early on in the direct IO path.  This
is worth it, because there is substantial code run before we actually
touch the block dev now.

- I also added some unlikelies to make it clear the compiler that block
  device fetch code is not normally executed.

This gave a small, but measurable improvement on a large database
benchmark (about 0.3%)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/direct-io.c

index af9917c7d08ab7f96653ea4d87ff79fc28320341..f8dcd377ded7d6082d23f7c15ca87776154922c9 100644 (file)
@@ -1080,8 +1080,8 @@ static inline int drop_refcount(struct dio *dio)
  * individual fields and will generate much worse code.
  * This is important for the whole file.
  */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+static inline ssize_t
+do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset, 
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
        dio_submit_t submit_io, int flags)
@@ -1090,7 +1090,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        size_t size;
        unsigned long addr;
        unsigned blkbits = inode->i_blkbits;
-       unsigned bdev_blkbits = 0;
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
        loff_t end = offset;
@@ -1103,12 +1102,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        if (rw & WRITE)
                rw = WRITE_ODIRECT;
 
-       if (bdev)
-               bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+       /* 
+        * Avoid references to bdev if not absolutely needed to give
+        * the early prefetch in the caller enough time.
+        */
 
        if (offset & blocksize_mask) {
                if (bdev)
-                        blkbits = bdev_blkbits;
+                       blkbits = blksize_bits(bdev_logical_block_size(bdev));
                blocksize_mask = (1 << blkbits) - 1;
                if (offset & blocksize_mask)
                        goto out;
@@ -1119,11 +1120,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                addr = (unsigned long)iov[seg].iov_base;
                size = iov[seg].iov_len;
                end += size;
-               if ((addr & blocksize_mask) || (size & blocksize_mask))  {
+               if (unlikely((addr & blocksize_mask) || 
+                            (size & blocksize_mask)))  {
                        if (bdev)
-                                blkbits = bdev_blkbits;
+                                blkbits = blksize_bits(
+                                        bdev_logical_block_size(bdev));
                        blocksize_mask = (1 << blkbits) - 1;
-                       if ((addr & blocksize_mask) || (size & blocksize_mask))  
+                       if ((addr & blocksize_mask) || (size & blocksize_mask))
                                goto out;
                }
        }
@@ -1306,6 +1309,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 out:
        return retval;
 }
+
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+       struct block_device *bdev, const struct iovec *iov, loff_t offset, 
+       unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+       dio_submit_t submit_io, int flags)
+{
+       /* 
+        * The block device state is needed in the end to finally
+        * submit everything.  Since it's likely to be cache cold
+        * prefetch it here as first thing to hide some of the
+        * latency.
+        * 
+        * Attempt to prefetch the pieces we likely need later.
+        */
+       prefetch(&bdev->bd_disk->part_tbl);
+       prefetch(bdev->bd_queue);
+       prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+
+       return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+                                    nr_segs, get_block, end_io,
+                                    submit_io, flags);
+}
+
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
 static __init int dio_init(void)