6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/module.h>
23 #include <asm/system.h>
31 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 static const struct nfs_pageio_ops nfs_pageio_read_ops;
34 static const struct rpc_call_ops nfs_read_partial_ops;
35 static const struct rpc_call_ops nfs_read_full_ops;
37 static struct kmem_cache *nfs_rdata_cachep;
38 static mempool_t *nfs_rdata_mempool;
40 #define MIN_POOL_READ (32)
42 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
44 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
47 memset(p, 0, sizeof(*p));
48 INIT_LIST_HEAD(&p->pages);
49 p->npages = pagecount;
50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array;
53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
55 mempool_free(p, nfs_rdata_mempool);
63 void nfs_readdata_free(struct nfs_read_data *p)
65 if (p && (p->pagevec != &p->page_array[0]))
67 mempool_free(p, nfs_rdata_mempool);
70 static void nfs_readdata_release(struct nfs_read_data *rdata)
72 put_lseg(rdata->lseg);
73 put_nfs_open_context(rdata->args.context);
74 nfs_readdata_free(rdata);
78 int nfs_return_empty_page(struct page *page)
80 zero_user(page, 0, PAGE_CACHE_SIZE);
81 SetPageUptodate(page);
86 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
88 unsigned int remainder = data->args.count - data->res.count;
89 unsigned int base = data->args.pgbase + data->res.count;
93 if (data->res.eof == 0 || remainder == 0)
96 * Note: "remainder" can never be negative, since we check for
97 * this in the XDR code.
99 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
100 base &= ~PAGE_CACHE_MASK;
101 pglen = PAGE_CACHE_SIZE - base;
103 if (remainder <= pglen) {
104 zero_user(*pages, base, remainder);
107 zero_user(*pages, base, pglen);
110 pglen = PAGE_CACHE_SIZE;
115 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
118 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
119 NFS_SERVER(inode)->rsize, 0);
121 EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
123 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
126 if (!pnfs_pageio_init_read(pgio, inode))
127 nfs_pageio_init_read_mds(pgio, inode);
130 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
133 struct nfs_page *new;
135 struct nfs_pageio_descriptor pgio;
137 len = nfs_page_length(page);
139 return nfs_return_empty_page(page);
140 new = nfs_create_request(ctx, inode, page, 0, len);
145 if (len < PAGE_CACHE_SIZE)
146 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 nfs_pageio_init_read(&pgio, inode);
149 nfs_pageio_add_request(&pgio, new);
150 nfs_pageio_complete(&pgio);
154 static void nfs_readpage_release(struct nfs_page *req)
156 struct inode *d_inode = req->wb_context->path.dentry->d_inode;
158 if (PageUptodate(req->wb_page))
159 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
161 unlock_page(req->wb_page);
163 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
164 req->wb_context->path.dentry->d_inode->i_sb->s_id,
165 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
167 (long long)req_offset(req));
168 nfs_release_request(req);
171 int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
172 const struct rpc_call_ops *call_ops)
174 struct inode *inode = data->inode;
175 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
176 struct rpc_task *task;
177 struct rpc_message msg = {
178 .rpc_argp = &data->args,
179 .rpc_resp = &data->res,
180 .rpc_cred = data->cred,
182 struct rpc_task_setup task_setup_data = {
186 .callback_ops = call_ops,
187 .callback_data = data,
188 .workqueue = nfsiod_workqueue,
189 .flags = RPC_TASK_ASYNC | swap_flags,
192 /* Set up the initial task struct. */
193 NFS_PROTO(inode)->read_setup(data, &msg);
195 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
199 (long long)NFS_FILEID(inode),
201 (unsigned long long)data->args.offset);
203 task = rpc_run_task(&task_setup_data);
205 return PTR_ERR(task);
209 EXPORT_SYMBOL_GPL(nfs_initiate_read);
212 * Set up the NFS read request struct
214 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
215 unsigned int count, unsigned int offset)
217 struct inode *inode = req->wb_context->path.dentry->d_inode;
221 data->cred = req->wb_context->cred;
223 data->args.fh = NFS_FH(inode);
224 data->args.offset = req_offset(req) + offset;
225 data->args.pgbase = req->wb_pgbase + offset;
226 data->args.pages = data->pagevec;
227 data->args.count = count;
228 data->args.context = get_nfs_open_context(req->wb_context);
229 data->args.lock_context = req->wb_lock_context;
231 data->res.fattr = &data->fattr;
232 data->res.count = count;
234 nfs_fattr_init(&data->fattr);
237 static int nfs_do_read(struct nfs_read_data *data,
238 const struct rpc_call_ops *call_ops,
239 struct pnfs_layout_segment *lseg)
241 struct inode *inode = data->args.context->path.dentry->d_inode;
244 data->lseg = get_lseg(lseg);
245 if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
247 put_lseg(data->lseg);
251 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
255 nfs_do_multiple_reads(struct list_head *head,
256 const struct rpc_call_ops *call_ops,
257 struct pnfs_layout_segment *lseg)
259 struct nfs_read_data *data;
262 while (!list_empty(head)) {
265 data = list_entry(head->next, struct nfs_read_data, list);
266 list_del_init(&data->list);
268 ret2 = nfs_do_read(data, call_ops, lseg);
276 nfs_async_read_error(struct list_head *head)
278 struct nfs_page *req;
280 while (!list_empty(head)) {
281 req = nfs_list_entry(head->next);
282 nfs_list_remove_request(req);
283 SetPageError(req->wb_page);
284 nfs_readpage_release(req);
289 * Generate multiple requests to fill a single page.
291 * We optimize to reduce the number of read operations on the wire. If we
292 * detect that we're reading a page, or an area of a page, that is past the
293 * end of file, we do not generate NFS read operations but just clear the
294 * parts of the page that would have come back zero from the server anyway.
296 * We rely on the cached value of i_size to make this determination; another
297 * client can fill pages on the server past our cached end-of-file, but we
298 * won't see the new data until our attribute cache is updated. This is more
299 * or less conventional NFS client behavior.
301 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
303 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
304 struct page *page = req->wb_page;
305 struct nfs_read_data *data;
306 size_t rsize = desc->pg_bsize, nbytes;
311 nfs_list_remove_request(req);
314 nbytes = desc->pg_count;
316 size_t len = min(nbytes,rsize);
318 data = nfs_readdata_alloc(1);
321 data->pagevec[0] = page;
322 nfs_read_rpcsetup(req, data, len, offset);
323 list_add(&data->list, res);
327 } while(nbytes != 0);
328 atomic_set(&req->wb_complete, requests);
329 ClearPageError(page);
330 desc->pg_rpc_callops = &nfs_read_partial_ops;
333 while (!list_empty(res)) {
334 data = list_entry(res->next, struct nfs_read_data, list);
335 list_del(&data->list);
336 nfs_readdata_free(data);
339 nfs_readpage_release(req);
343 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
345 struct nfs_page *req;
347 struct nfs_read_data *data;
348 struct list_head *head = &desc->pg_list;
351 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
354 nfs_async_read_error(head);
359 pages = data->pagevec;
360 while (!list_empty(head)) {
361 req = nfs_list_entry(head->next);
362 nfs_list_remove_request(req);
363 nfs_list_add_request(req, &data->pages);
364 ClearPageError(req->wb_page);
365 *pages++ = req->wb_page;
367 req = nfs_list_entry(data->pages.next);
369 nfs_read_rpcsetup(req, data, desc->pg_count, 0);
370 list_add(&data->list, res);
371 desc->pg_rpc_callops = &nfs_read_full_ops;
376 int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
381 if (desc->pg_bsize < PAGE_CACHE_SIZE)
382 ret = nfs_pagein_multi(desc, &head);
384 ret = nfs_pagein_one(desc, &head);
387 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
389 put_lseg(desc->pg_lseg);
390 desc->pg_lseg = NULL;
393 EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
396 static const struct nfs_pageio_ops nfs_pageio_read_ops = {
397 .pg_test = nfs_generic_pg_test,
398 .pg_doio = nfs_generic_pg_readpages,
402 * This is the callback from RPC telling us whether a reply was
403 * received or some error occurred (timeout or socket shutdown).
405 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
409 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
412 status = NFS_PROTO(data->inode)->read_done(task, data);
416 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
418 if (task->tk_status == -ESTALE) {
419 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
420 nfs_mark_for_revalidate(data->inode);
425 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
427 struct nfs_readargs *argp = &data->args;
428 struct nfs_readres *resp = &data->res;
430 if (resp->eof || resp->count == argp->count)
433 /* This is a short read! */
434 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
435 /* Has the server at least made some progress? */
436 if (resp->count == 0)
439 /* Yes, so retry the read at the end of the data */
440 data->mds_offset += resp->count;
441 argp->offset += resp->count;
442 argp->pgbase += resp->count;
443 argp->count -= resp->count;
444 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
448 * Handle a read reply that fills part of a page.
450 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
452 struct nfs_read_data *data = calldata;
454 if (nfs_readpage_result(task, data) != 0)
456 if (task->tk_status < 0)
459 nfs_readpage_truncate_uninitialised_page(data);
460 nfs_readpage_retry(task, data);
463 static void nfs_readpage_release_partial(void *calldata)
465 struct nfs_read_data *data = calldata;
466 struct nfs_page *req = data->req;
467 struct page *page = req->wb_page;
468 int status = data->task.tk_status;
473 if (atomic_dec_and_test(&req->wb_complete)) {
474 if (!PageError(page))
475 SetPageUptodate(page);
476 nfs_readpage_release(req);
478 nfs_readdata_release(calldata);
481 #if defined(CONFIG_NFS_V4_1)
482 void nfs_read_prepare(struct rpc_task *task, void *calldata)
484 struct nfs_read_data *data = calldata;
486 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
487 &data->args.seq_args, &data->res.seq_res,
490 rpc_call_start(task);
492 #endif /* CONFIG_NFS_V4_1 */
494 static const struct rpc_call_ops nfs_read_partial_ops = {
495 #if defined(CONFIG_NFS_V4_1)
496 .rpc_call_prepare = nfs_read_prepare,
497 #endif /* CONFIG_NFS_V4_1 */
498 .rpc_call_done = nfs_readpage_result_partial,
499 .rpc_release = nfs_readpage_release_partial,
502 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
504 unsigned int count = data->res.count;
505 unsigned int base = data->args.pgbase;
509 count = data->args.count;
510 if (unlikely(count == 0))
512 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
513 base &= ~PAGE_CACHE_MASK;
515 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
516 SetPageUptodate(*pages);
519 /* Was this a short read? */
520 if (data->res.eof || data->res.count == data->args.count)
521 SetPageUptodate(*pages);
525 * This is the callback from RPC telling us whether a reply was
526 * received or some error occurred (timeout or socket shutdown).
528 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
530 struct nfs_read_data *data = calldata;
532 if (nfs_readpage_result(task, data) != 0)
534 if (task->tk_status < 0)
537 * Note: nfs_readpage_retry may change the values of
538 * data->args. In the multi-page case, we therefore need
539 * to ensure that we call nfs_readpage_set_pages_uptodate()
542 nfs_readpage_truncate_uninitialised_page(data);
543 nfs_readpage_set_pages_uptodate(data);
544 nfs_readpage_retry(task, data);
547 static void nfs_readpage_release_full(void *calldata)
549 struct nfs_read_data *data = calldata;
551 while (!list_empty(&data->pages)) {
552 struct nfs_page *req = nfs_list_entry(data->pages.next);
554 nfs_list_remove_request(req);
555 nfs_readpage_release(req);
557 nfs_readdata_release(calldata);
560 static const struct rpc_call_ops nfs_read_full_ops = {
561 #if defined(CONFIG_NFS_V4_1)
562 .rpc_call_prepare = nfs_read_prepare,
563 #endif /* CONFIG_NFS_V4_1 */
564 .rpc_call_done = nfs_readpage_result_full,
565 .rpc_release = nfs_readpage_release_full,
569 * Read a page over NFS.
570 * We read the page synchronously in the following case:
571 * - The error flag is set for this page. This happens only when a
572 * previous async read operation failed.
574 int nfs_readpage(struct file *file, struct page *page)
576 struct nfs_open_context *ctx;
577 struct inode *inode = page->mapping->host;
580 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
581 page, PAGE_CACHE_SIZE, page->index);
582 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
583 nfs_add_stats(inode, NFSIOS_READPAGES, 1);
586 * Try to flush any pending writes to the file..
588 * NOTE! Because we own the page lock, there cannot
589 * be any new pending writes generated at this point
590 * for this page (other pages can be written to).
592 error = nfs_wb_page(inode, page);
595 if (PageUptodate(page))
599 if (NFS_STALE(inode))
604 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
608 ctx = get_nfs_open_context(nfs_file_open_context(file));
610 if (!IS_SYNC(inode)) {
611 error = nfs_readpage_from_fscache(ctx, inode, page);
616 error = nfs_readpage_async(ctx, inode, page);
619 put_nfs_open_context(ctx);
626 struct nfs_readdesc {
627 struct nfs_pageio_descriptor *pgio;
628 struct nfs_open_context *ctx;
632 readpage_async_filler(void *data, struct page *page)
634 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
635 struct inode *inode = page->mapping->host;
636 struct nfs_page *new;
640 len = nfs_page_length(page);
642 return nfs_return_empty_page(page);
644 new = nfs_create_request(desc->ctx, inode, page, 0, len);
648 if (len < PAGE_CACHE_SIZE)
649 zero_user_segment(page, len, PAGE_CACHE_SIZE);
650 if (!nfs_pageio_add_request(desc->pgio, new)) {
651 error = desc->pgio->pg_error;
656 error = PTR_ERR(new);
663 int nfs_readpages(struct file *filp, struct address_space *mapping,
664 struct list_head *pages, unsigned nr_pages)
666 struct nfs_pageio_descriptor pgio;
667 struct nfs_readdesc desc = {
670 struct inode *inode = mapping->host;
671 unsigned long npages;
674 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
676 (long long)NFS_FILEID(inode),
678 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
680 if (NFS_STALE(inode))
684 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
685 if (desc.ctx == NULL)
688 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
690 /* attempt to read as many of the pages as possible from the cache
691 * - this returns -ENOBUFS immediately if the cookie is negative
693 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
696 goto read_complete; /* all pages were read */
698 nfs_pageio_init_read(&pgio, inode);
700 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
702 nfs_pageio_complete(&pgio);
703 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
704 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
706 put_nfs_open_context(desc.ctx);
711 int __init nfs_init_readpagecache(void)
713 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
714 sizeof(struct nfs_read_data),
715 0, SLAB_HWCACHE_ALIGN,
717 if (nfs_rdata_cachep == NULL)
720 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
722 if (nfs_rdata_mempool == NULL)
728 void nfs_destroy_readpagecache(void)
730 mempool_destroy(nfs_rdata_mempool);
731 kmem_cache_destroy(nfs_rdata_cachep);