From: Josef Bacik Date: Sun, 23 May 2010 15:07:21 +0000 (-0400) Subject: Btrfs: do aio_write instead of write X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=11c65dccf70be9ace5dbd3906778e1a099b1fee1;p=linux-beck.git Btrfs: do aio_write instead of write In order for AIO to work, we need to implement aio_write. This patch converts our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and nothing broke, and the AIO stuff magically started working. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1a57c17d4029..a53aca338c7f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; + struct btrfs_ordered_extent *ordered; int ret; int nr = 0; size_t page_offset = 0; @@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, set_page_extent_mapped(page); end = page_end; - lock_extent(tree, start, end, GFP_NOFS); + while (1) { + lock_extent(tree, start, end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a28810abfb98..233aea2e5ef2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -46,32 +46,42 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, - const char __user *buf) + struct iov_iter *i) { - long page_fault = 0; - int i; + size_t copied; + int pg = 0; int offset = pos & (PAGE_CACHE_SIZE - 1); - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + while (write_bytes > 0) { size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); - struct page *page = prepared_pages[i]; - fault_in_pages_readable(buf, count); + struct page *page = prepared_pages[pg]; +again: + if (unlikely(iov_iter_fault_in_readable(i, count))) + return -EFAULT; /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page) + offset, - buf, count); + copied = iov_iter_copy_from_user(page, i, offset, count); + /* Flush processor's dcache for this page */ flush_dcache_page(page); - kunmap(page); - buf += count; - write_bytes -= count; + iov_iter_advance(i, copied); + write_bytes -= copied; - if (page_fault) - break; + if (unlikely(copied == 0)) { + count = min_t(size_t, PAGE_CACHE_SIZE - offset, + iov_iter_single_seg_count(i)); + goto again; + } + + if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { + offset += copied; + } else { + pg++; + offset = 0; + } } - return page_fault ? -EFAULT : 0; + return 0; } /* @@ -822,60 +832,24 @@ again: return 0; } -/* Copied from read-write.c */ -static void wait_on_retry_sync_kiocb(struct kiocb *iocb) -{ - set_current_state(TASK_UNINTERRUPTIBLE); - if (!kiocbIsKicked(iocb)) - schedule(); - else - kiocbClearKicked(iocb); - __set_current_state(TASK_RUNNING); -} - -/* - * Just a copy of what do_sync_write does. - */ -static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t pos, loff_t *ppos) +static ssize_t btrfs_file_aio_write(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - unsigned long nr_segs = 1; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = pos; - kiocb.ki_left = count; - kiocb.ki_nbytes = count; - - while (1) { - ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos, - ppos, count, count); - if (ret != -EIOCBRETRY) - break; - wait_on_retry_sync_kiocb(&kiocb); - } - - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} - -static ssize_t btrfs_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - loff_t pos; + struct file *file = iocb->ki_filp; + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *pinned[2]; + struct page **pages = NULL; + struct iov_iter i; + loff_t *ppos = &iocb->ki_pos; loff_t start_pos; ssize_t num_written = 0; ssize_t err = 0; + size_t count; + size_t ocount; int ret = 0; - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page **pages = NULL; int nrptrs; - struct page *pinned[2]; unsigned long first_index; unsigned long last_index; int will_write; @@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pinned[0] = NULL; pinned[1] = NULL; - pos = *ppos; start_pos = pos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); mutex_lock(&inode->i_mutex); + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); + if (err) + goto out; + count = ocount; + current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) @@ -910,14 +888,48 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, BTRFS_I(inode)->sequence++; if (unlikely(file->f_flags & O_DIRECT)) { - num_written = __btrfs_direct_write(file, buf, count, pos, - ppos); - pos += num_written; - count -= num_written; + ret = btrfs_delalloc_reserve_space(inode, count); + if (ret) + goto out; - /* We've written everything we wanted to, exit */ - if (num_written < 0 || !count) + num_written = generic_file_direct_write(iocb, iov, &nr_segs, + pos, ppos, count, + ocount); + + /* + * the generic O_DIRECT will update in-memory i_size after the + * DIOs are done. But our endio handlers that update the on + * disk i_size never update past the in memory i_size. So we + * need one more update here to catch any additions to the + * file + */ + if (inode->i_size != BTRFS_I(inode)->disk_i_size) { + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + mark_inode_dirty(inode); + } + + if (num_written < 0) { + if (num_written != -EIOCBQUEUED) { + /* + * aio land will take care of releasing the + * delalloc + */ + btrfs_delalloc_release_space(inode, count); + } + ret = num_written; + num_written = 0; goto out; + } else if (num_written == count) { + /* pick up pos changes done by the generic code */ + pos = *ppos; + goto out; + } + + /* + * the buffered IO will reserve bytes for the rest of the + * range, don't double count them here + */ + btrfs_delalloc_release_space(inode, count - num_written); /* * We are going to do buffered for the rest of the range, so we @@ -925,18 +937,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, * done. */ buffered = 1; - buf += num_written; + pos += num_written; } - nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, - PAGE_CACHE_SIZE / (sizeof(struct page *))); + iov_iter_init(&i, iov, nr_segs, count, num_written); + nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / + (sizeof(struct page *))); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); /* generic_write_checks can change our pos */ start_pos = pos; first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + count) >> PAGE_CACHE_SHIFT; + last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; /* * there are lots of better ways to do this, but this code @@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unlock_page(pinned[0]); } } - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { + if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { ret = btrfs_readpage(NULL, pinned[1]); @@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - while (count > 0) { + while (iov_iter_count(&i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, nrptrs * - (size_t)PAGE_CACHE_SIZE - + size_t write_bytes = min(iov_iter_count(&i), + nrptrs * (size_t)PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; @@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } ret = btrfs_copy_from_user(pos, num_pages, - write_bytes, pages, buf); + write_bytes, pages, &i); if (ret == 0) { dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); @@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_throttle(root); } - buf += write_bytes; - count -= write_bytes; pos += write_bytes; num_written += write_bytes; @@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, - .write = btrfs_file_write, + .aio_write = btrfs_file_aio_write, .mmap = btrfs_file_mmap, .open = generic_file_open, .release = btrfs_release_file,