X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=fs%2Fdirect-io.c;h=a142314710a3a9d020e3d3d7a4cbbf2c168af72a;hb=HEAD;hp=0e04142d5962312fcb055738479247b2364a252e;hpb=c562865f0ae4fe8532b492136d870f2033ad7f38;p=karo-tx-linux.git diff --git a/fs/direct-io.c b/fs/direct-io.c index 0e04142d5962..a142314710a3 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -127,6 +127,7 @@ struct dio { spinlock_t bio_lock; /* protects BIO fields below */ int page_errors; /* errno from get_user_pages() */ int is_async; /* is IO async ? */ + int should_dirty; /* should we mark read pages dirty? */ bool defer_completion; /* defer AIO completion to workqueue? */ int io_error; /* IO error in completion path */ unsigned long refcount; /* direct_io_worker() and bios */ @@ -403,7 +404,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->refcount++; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (dio->is_async && dio->rw == READ) + if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); if (sdio->submit_io) @@ -474,13 +475,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) if (!uptodate) dio->io_error = -EIO; - if (dio->is_async && dio->rw == READ) { + if (dio->is_async && dio->rw == READ && dio->should_dirty) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; - if (dio->rw == READ && !PageCompound(page)) + if (dio->rw == READ && !PageCompound(page) && + dio->should_dirty) set_page_dirty_lock(page); page_cache_release(page); } @@ -1081,6 +1083,101 @@ static inline int drop_refcount(struct dio *dio) return ret2; } +static ssize_t direct_IO_iovec(const struct iovec *iov, unsigned long nr_segs, + struct dio *dio, struct dio_submit *sdio, + unsigned blkbits, struct buffer_head *map_bh) +{ + size_t bytes; + ssize_t retval = 0; + int seg; + unsigned long user_addr; + + for (seg = 0; seg < nr_segs; seg++) { + user_addr = (unsigned long)iov[seg].iov_base; + sdio->pages_in_io += + ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / + PAGE_SIZE - user_addr / PAGE_SIZE); + } + + dio->should_dirty = 1; + + for (seg = 0; seg < nr_segs; seg++) { + user_addr = (unsigned long)iov[seg].iov_base; + sdio->size += bytes = iov[seg].iov_len; + + /* Index into the first page of the first block */ + sdio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; + sdio->final_block_in_request = sdio->block_in_file + + (bytes >> blkbits); + /* Page fetching state */ + sdio->head = 0; + sdio->tail = 0; + sdio->curr_page = 0; + + sdio->total_pages = 0; + if (user_addr & (PAGE_SIZE-1)) { + sdio->total_pages++; + bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); + } + sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; + sdio->curr_user_address = user_addr; + + retval = do_direct_IO(dio, sdio, map_bh); + + dio->result += iov[seg].iov_len - + ((sdio->final_block_in_request - sdio->block_in_file) << + blkbits); + + if (retval) { + dio_cleanup(dio, sdio); + break; + } + } /* end iovec loop */ + + return retval; +} + +static ssize_t direct_IO_bvec(struct bio_vec *bvec, unsigned long nr_segs, + struct dio *dio, struct dio_submit *sdio, + unsigned blkbits, struct buffer_head *map_bh) +{ + ssize_t retval = 0; + int seg; + + sdio->pages_in_io += nr_segs; + + for (seg = 0; seg < nr_segs; seg++) { + sdio->size += bvec[seg].bv_len; + + /* Index into the first page of the first block */ + sdio->first_block_in_page = bvec[seg].bv_offset >> blkbits; + sdio->final_block_in_request = sdio->block_in_file + + (bvec[seg].bv_len >> blkbits); + /* Page fetching state */ + sdio->curr_page = 0; + page_cache_get(bvec[seg].bv_page); + dio->pages[0] = bvec[seg].bv_page; + sdio->head = 0; + sdio->tail = 1; + + sdio->total_pages = 1; + sdio->curr_user_address = 0; + + retval = do_direct_IO(dio, sdio, map_bh); + + dio->result += bvec[seg].bv_len - + ((sdio->final_block_in_request - sdio->block_in_file) << + blkbits); + + if (retval) { + dio_cleanup(dio, sdio); + break; + } + } + + return retval; +} + /* * This is a library function for use by filesystem drivers. * @@ -1108,9 +1205,9 @@ static inline int drop_refcount(struct dio *dio) */ static inline ssize_t do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, + int flags) { int seg; size_t size; @@ -1122,10 +1219,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, loff_t end = offset; struct dio *dio; struct dio_submit sdio = { 0, }; - unsigned long user_addr; - size_t bytes; struct buffer_head map_bh = { 0, }; struct blk_plug plug; + unsigned long nr_segs = iter->nr_segs; if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1144,20 +1240,49 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < nr_segs; seg++) { - addr = (unsigned long)iov[seg].iov_base; - size = iov[seg].iov_len; - end += size; - if (unlikely((addr & blocksize_mask) || - (size & blocksize_mask))) { - if (bdev) - blkbits = blksize_bits( - bdev_logical_block_size(bdev)); - blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) - goto out; + if (iov_iter_has_iovec(iter)) { + const struct iovec *iov = iov_iter_iovec(iter); + + for (seg = 0; seg < nr_segs; seg++) { + addr = (unsigned long)iov[seg].iov_base; + size = iov[seg].iov_len; + end += size; + if (unlikely((addr & blocksize_mask) || + (size & blocksize_mask))) { + if (bdev) + blkbits = blksize_bits( + bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if ((addr & blocksize_mask) || + (size & blocksize_mask)) + goto out; + } } - } + } else if (iov_iter_has_bvec(iter)) { + /* + * Is this necessary, or can we trust the in-kernel + * caller? Can we replace this with + * end += iov_iter_count(iter); ? + */ + struct bio_vec *bvec = iov_iter_bvec(iter); + + for (seg = 0; seg < nr_segs; seg++) { + addr = bvec[seg].bv_offset; + size = bvec[seg].bv_len; + end += size; + if (unlikely((addr & blocksize_mask) || + (size & blocksize_mask))) { + if (bdev) + blkbits = blksize_bits( + bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if ((addr & blocksize_mask) || + (size & blocksize_mask)) + goto out; + } + } + } else + BUG(); /* watch out for a 0 len io from a tricksy fs */ if (rw == READ && end == offset) @@ -1251,47 +1376,14 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (unlikely(sdio.blkfactor)) sdio.pages_in_io = 2; - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - sdio.pages_in_io += - ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / - PAGE_SIZE - user_addr / PAGE_SIZE); - } - blk_start_plug(&plug); - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - sdio.size += bytes = iov[seg].iov_len; - - /* Index into the first page of the first block */ - sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; - sdio.final_block_in_request = sdio.block_in_file + - (bytes >> blkbits); - /* Page fetching state */ - sdio.head = 0; - sdio.tail = 0; - sdio.curr_page = 0; - - sdio.total_pages = 0; - if (user_addr & (PAGE_SIZE-1)) { - sdio.total_pages++; - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); - } - sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - sdio.curr_user_address = user_addr; - - retval = do_direct_IO(dio, &sdio, &map_bh); - - dio->result += iov[seg].iov_len - - ((sdio.final_block_in_request - sdio.block_in_file) << - blkbits); - - if (retval) { - dio_cleanup(dio, &sdio); - break; - } - } /* end iovec loop */ + if (iov_iter_has_iovec(iter)) + retval = direct_IO_iovec(iov_iter_iovec(iter), nr_segs, dio, + &sdio, blkbits, &map_bh); + else + retval = direct_IO_bvec(iov_iter_bvec(iter), nr_segs, dio, + &sdio, blkbits, &map_bh); if (retval == -ENOTBLK) { /* @@ -1360,9 +1452,9 @@ out: ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, + int flags) { /* * The block device state is needed in the end to finally @@ -1376,9 +1468,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, - submit_io, flags); + return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, + get_block, end_io, submit_io, flags); } EXPORT_SYMBOL(__blockdev_direct_IO);