From: Al Viro Date: Sat, 5 Apr 2014 08:27:08 +0000 (-0400) Subject: ->splice_write() via ->write_iter() X-Git-Tag: next-20140507~74^2~6 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=5899ea5ba8bde7a5961bfe76308a141b80461de4;p=karo-tx-linux.git ->splice_write() via ->write_iter() iter_file_splice_write() - a ->splice_write() instance that gathers the pipe buffers, builds a bio_vec-based iov_iter covering those and feeds it to ->write_iter(). A bunch of simple cases coverted to that... Signed-off-by: Al Viro --- diff --git a/fs/block_dev.c b/fs/block_dev.c index 4e36b8ea8aa4..e68e150b1b16 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1583,7 +1583,7 @@ const struct file_operations def_blk_fops = { .compat_ioctl = compat_blkdev_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 5b7f6be5a2d5..71bf8e4fb5d4 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -77,7 +77,7 @@ const struct file_operations exofs_file_operations = { .fsync = exofs_file_fsync, .flush = exofs_flush, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; const struct inode_operations exofs_file_inode_operations = { diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 970c6aca15cc..7c87b22a7228 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = { .release = ext2_release_file, .fsync = ext2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; #ifdef CONFIG_EXT2_FS_XIP diff --git a/fs/ext3/file.c b/fs/ext3/file.c index c833b1226d4d..a062fa1e1b11 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = { .release = ext3_release_file, .fsync = ext3_sync_file, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; const struct inode_operations ext3_file_inode_operations = { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 48383a5f37a1..708aad768199 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -599,7 +599,7 @@ const struct file_operations ext4_file_operations = { .release = ext4_release_file, .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, }; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 22f4900dd8eb..e4ba4b93f96a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -692,5 +692,5 @@ const struct file_operations f2fs_file_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ca932cd358d3..01b4c5b1bff8 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1068,7 +1068,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = gfs2_setlease, .fallocate = gfs2_fallocate, }; @@ -1098,7 +1098,7 @@ const struct file_operations gfs2_file_fops_nolock = { .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/jfs/file.c b/fs/jfs/file.c index cc744ecaf51f..33aa0cc1f8b8 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -157,7 +157,7 @@ const struct file_operations jfs_file_operations = { .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .unlocked_ioctl = jfs_ioctl, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 6ea0b9718a9d..4f56de822d2f 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -38,7 +38,7 @@ const struct file_operations ramfs_file_operations = { .mmap = generic_file_mmap, .fsync = noop_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 9ed420f8f3ca..dda012ad4208 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = { .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 7c8ecd6468db..f070cc827456 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -248,7 +248,7 @@ const struct file_operations reiserfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/splice.c b/fs/splice.c index f99e420744c7..26d988755432 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" /* @@ -1052,6 +1053,144 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, EXPORT_SYMBOL(generic_file_splice_write); +/** + * iter_file_splice_write - splice data from a pipe to a file + * @pipe: pipe info + * @out: file to write to + * @ppos: position in @out + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Description: + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. + * This one is ->write_iter-based. + * + */ +ssize_t +iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; + int nbufs = pipe->buffers; + struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), + GFP_KERNEL); + ssize_t ret; + + if (unlikely(!array)) + return -ENOMEM; + + pipe_lock(pipe); + + splice_from_pipe_begin(&sd); + while (sd.total_len) { + struct iov_iter from; + struct kiocb kiocb; + size_t count = 0; + int n, idx; + + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; + + if (unlikely(nbufs < pipe->buffers)) { + kfree(array); + nbufs = pipe->buffers; + array = kcalloc(nbufs, sizeof(struct bio_vec), + GFP_KERNEL); + if (!array) { + ret = -ENOMEM; + break; + } + } + + /* build the vector */ + for (n = 0, idx = pipe->curbuf; n < pipe->nrbufs; n++, idx++) { + struct pipe_buffer *buf = pipe->bufs + idx; + size_t this_len = buf->len; + + if (this_len > sd.total_len) + this_len = sd.total_len; + + if (idx == pipe->buffers - 1) + idx = -1; + + ret = buf->ops->confirm(pipe, buf); + if (unlikely(ret)) { + if (ret == -ENODATA) + ret = 0; + goto done; + } + + array[n].bv_page = buf->page; + array[n].bv_len = this_len; + array[n].bv_offset = buf->offset; + count += this_len; + } + + /* ... iov_iter */ + from.type = ITER_BVEC | WRITE; + from.bvec = array; + from.nr_segs = n; + from.count = count; + from.iov_offset = 0; + + /* ... and iocb */ + init_sync_kiocb(&kiocb, out); + kiocb.ki_pos = sd.pos; + kiocb.ki_nbytes = count; + + /* now, send it */ + ret = out->f_op->write_iter(&kiocb, &from); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + + if (ret <= 0) + break; + + sd.num_spliced += ret; + sd.total_len -= ret; + *ppos = sd.pos = kiocb.ki_pos; + + /* dismiss the fully eaten buffers, adjust the partial one */ + while (ret) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + if (ret >= buf->len) { + const struct pipe_buf_operations *ops = buf->ops; + ret -= buf->len; + buf->len = 0; + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + if (pipe->files) + sd.need_wakeup = true; + } else { + buf->offset += ret; + buf->len -= ret; + ret = 0; + } + } + } +done: + kfree(array); + splice_from_pipe_end(pipe, &sd); + + pipe_unlock(pipe); + + if (sd.num_spliced) + ret = sd.num_spliced; + + return ret; +} + +EXPORT_SYMBOL(iter_file_splice_write); + static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6bc4e8efbccf..0888502a6041 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1585,7 +1585,7 @@ const struct file_operations ubifs_file_operations = { .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5446e86d3485..b1c489c1fb2e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -342,47 +342,6 @@ xfs_file_splice_read( return ret; } -/* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - /* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased @@ -1442,7 +1401,7 @@ const struct file_operations xfs_file_operations = { .read_iter = xfs_file_read_iter, .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 65d8c793a25c..53182f97cf01 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1060,7 +1060,6 @@ DEFINE_RW_EVENT(xfs_file_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write); DECLARE_EVENT_CLASS(xfs_page_class, TP_PROTO(struct inode *inode, struct page *page, unsigned long off, diff --git a/include/linux/fs.h b/include/linux/fs.h index a6448849dbce..8bd8ed357c7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2434,6 +2434,8 @@ extern ssize_t default_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); +extern ssize_t iter_file_splice_write(struct pipe_inode_info *, + struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags);