]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 May 2012 17:43:51 +0000 (10:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 May 2012 17:43:51 +0000 (10:43 -0700)
Pull NFS client updates from Trond Myklebust:
 "New features include:
   - Rewrite the O_DIRECT code so that it can share the same coalescing
     and pNFS functionality as the page cache code.
   - Allow the server to provide hints as to when we should use pNFS,
     and when it is more efficient to read and write through the
     metadata server.
   - NFS cache consistency updates:
     * Use the ctime to emulate a change attribute for NFSv2/v3 so that
       all NFS versions can share the same cache management code.
     * New cache management code will only look at the change attribute
       and size attribute when deciding whether or not our cached data
       is still valid or not.
     * Don't request NFSv4 post-op attributes on writes in cases such as
       O_DIRECT, where we don't care about data cache consistency, or
       when we have a write delegation, and know that our cache is still
       consistent.
     * Don't request NFSv4 post-op attributes on operations such as
       COMMIT, where there are no expected metadata updates.
     * Don't request NFSv4 directory post-op attributes in cases where
       the operations themselves already return change attribute
       updates: i.e. operations such as OPEN, CREATE, REMOVE, LINK and
       RENAME.
   - Speed up 'ls' and friends by using READDIR rather than READDIRPLUS
     if we detect no attempts to lookup filenames.
   - Improve the code sharing between NFSv2/v3 and v4 mounts
   - NFSv4.1 state management efficiency improvements
   - More patches in preparation for NFSv4/v4.1 migration functionality."

Fix trivial conflict in fs/nfs/nfs4proc.c that was due to the dcache
qstr name initialization changes (that made the length/hash a 64-bit
union)

* tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (146 commits)
  NFSv4: Add debugging printks to state manager
  NFSv4: Map NFS4ERR_SHARE_DENIED into an EACCES error instead of EIO
  NFSv4: update_changeattr does not need to set NFS_INO_REVAL_PAGECACHE
  NFSv4.1: nfs4_reset_session should use nfs4_handle_reclaim_lease_error
  NFSv4.1: Handle other occurrences of NFS4ERR_CONN_NOT_BOUND_TO_SESSION
  NFSv4.1: Handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION in the state manager
  NFSv4.1: Handle errors in nfs4_bind_conn_to_session
  NFSv4.1: nfs4_bind_conn_to_session should drain the session
  NFSv4.1: Don't clobber the seqid if exchange_id returns a confirmed clientid
  NFSv4.1: Add DESTROY_CLIENTID
  NFSv4.1: Ensure we use the correct credentials for bind_conn_to_session
  NFSv4.1: Ensure we use the correct credentials for session create/destroy
  NFSv4.1: Move NFSPROC4_CLNT_BIND_CONN_TO_SESSION to the end of the operations
  NFSv4.1: Handle NFS4ERR_SEQ_MISORDERED when confirming the lease
  NFSv4: When purging the lease, we must clear NFS4CLNT_LEASE_CONFIRM
  NFSv4: Clean up the error handling for nfs4_reclaim_lease
  NFSv4.1: Exchange ID must use GFP_NOFS allocation mode
  nfs41: Use BIND_CONN_TO_SESSION for CB_PATH_DOWN*
  nfs4.1: add BIND_CONN_TO_SESSION operation
  NFSv4.1 test the mdsthreshold hint parameters
  ...

48 files changed:
fs/nfs/Kconfig
fs/nfs/Makefile
fs/nfs/blocklayout/blocklayout.c
fs/nfs/blocklayout/blocklayoutdev.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/fscache.c
fs/nfs/fscache.h
fs/nfs/getroot.c
fs/nfs/idmap.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/namespace.c
fs/nfs/netns.h
fs/nfs/nfs2xdr.c
fs/nfs/nfs3proc.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4namespace.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4renewd.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/objlayout/objio_osd.c
fs/nfs/objlayout/objlayout.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/write.c
include/linux/nfs4.h
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
net/sunrpc/clnt.c
net/sunrpc/rpc_pipe.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/xprt.c

index 2a0e6c599147aac9e66a5969c9c00593aa0dd380..f90f4f5cd421dc7be9db2151ead1d8d458acf096 100644 (file)
@@ -29,9 +29,20 @@ config NFS_FS
 
          If unsure, say N.
 
+config NFS_V2
+       bool "NFS client support for NFS version 2"
+       depends on NFS_FS
+       default y
+       help
+         This option enables support for version 2 of the NFS protocol
+         (RFC 1094) in the kernel's NFS client.
+
+         If unsure, say Y.
+
 config NFS_V3
        bool "NFS client support for NFS version 3"
        depends on NFS_FS
+       default y
        help
          This option enables support for version 3 of the NFS protocol
          (RFC 1813) in the kernel's NFS client.
index b58613d0abb3f62eaa3bb29fd4b8141ebdb596d6..7ddd45d9f1707d24a7661a402d95f58aef108c03 100644 (file)
@@ -4,11 +4,12 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y                  := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
-                          direct.o pagelist.o proc.o read.o symlink.o unlink.o \
+nfs-y                  := client.o dir.o file.o getroot.o inode.o super.o \
+                          direct.o pagelist.o read.o symlink.o unlink.o \
                           write.o namespace.o mount_clnt.o \
                           dns_resolve.o cache_lib.o
 nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
+nfs-$(CONFIG_NFS_V2)   += proc.o nfs2xdr.o
 nfs-$(CONFIG_NFS_V3)   += nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)       += nfs3acl.o
 nfs-$(CONFIG_NFS_V4)   += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
index 7f6a23f0244e7340f2861ac6fb0d3a2d20721287..7ae8a608956f60d7343fdfa9e77be7532b2db17d 100644 (file)
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
        struct parallel_io *par = bio->bi_private;
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
 
        do {
                struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
                        SetPageUptodate(page);
        } while (bvec >= bio->bi_io_vec);
        if (!uptodate) {
-               if (!rdata->pnfs_error)
-                       rdata->pnfs_error = -EIO;
-               pnfs_set_lo_fail(rdata->lseg);
+               struct nfs_read_data *rdata = par->data;
+               struct nfs_pgio_header *header = rdata->header;
+
+               if (!header->pnfs_error)
+                       header->pnfs_error = -EIO;
+               pnfs_set_lo_fail(header->lseg);
        }
        bio_put(bio);
        put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
 {
        struct nfs_read_data *rdata = data;
 
-       rdata->task.tk_status = rdata->pnfs_error;
+       rdata->task.tk_status = rdata->header->pnfs_error;
        INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
        schedule_work(&rdata->task.u.tk_work);
 }
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
 static enum pnfs_try_status
 bl_read_pagelist(struct nfs_read_data *rdata)
 {
+       struct nfs_pgio_header *header = rdata->header;
        int i, hole;
        struct bio *bio = NULL;
        struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
        int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
 
        dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-              rdata->npages, f_offset, (unsigned int)rdata->args.count);
+              rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
 
        par = alloc_parallel(rdata);
        if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 
        isect = (sector_t) (f_offset >> SECTOR_SHIFT);
        /* Code assumes extents are page-aligned */
-       for (i = pg_index; i < rdata->npages; i++) {
+       for (i = pg_index; i < rdata->pages.npages; i++) {
                if (!extent_length) {
                        /* We've used up the previous extent */
                        bl_put_extent(be);
                        bl_put_extent(cow_read);
                        bio = bl_submit_bio(READ, bio);
                        /* Get the next one */
-                       be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+                       be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
                                             isect, &cow_read);
                        if (!be) {
-                               rdata->pnfs_error = -EIO;
+                               header->pnfs_error = -EIO;
                                goto out;
                        }
                        extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
                        struct pnfs_block_extent *be_read;
 
                        be_read = (hole && cow_read) ? cow_read : be;
-                       bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+                       bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+                                                READ,
                                                 isect, pages[i], be_read,
                                                 bl_end_io_read, par);
                        if (IS_ERR(bio)) {
-                               rdata->pnfs_error = PTR_ERR(bio);
+                               header->pnfs_error = PTR_ERR(bio);
                                bio = NULL;
                                goto out;
                        }
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
                isect += PAGE_CACHE_SECTORS;
                extent_length -= PAGE_CACHE_SECTORS;
        }
-       if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+       if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
                rdata->res.eof = 1;
-               rdata->res.count = rdata->inode->i_size - f_offset;
+               rdata->res.count = header->inode->i_size - f_offset;
        } else {
                rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
        }
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
        struct parallel_io *par = bio->bi_private;
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
 
        do {
                struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
        } while (bvec >= bio->bi_io_vec);
 
        if (unlikely(!uptodate)) {
-               if (!wdata->pnfs_error)
-                       wdata->pnfs_error = -EIO;
-               pnfs_set_lo_fail(wdata->lseg);
+               struct nfs_write_data *data = par->data;
+               struct nfs_pgio_header *header = data->header;
+
+               if (!header->pnfs_error)
+                       header->pnfs_error = -EIO;
+               pnfs_set_lo_fail(header->lseg);
        }
        bio_put(bio);
        put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
        struct parallel_io *par = bio->bi_private;
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+       struct nfs_write_data *data = par->data;
+       struct nfs_pgio_header *header = data->header;
 
        if (!uptodate) {
-               if (!wdata->pnfs_error)
-                       wdata->pnfs_error = -EIO;
-               pnfs_set_lo_fail(wdata->lseg);
+               if (!header->pnfs_error)
+                       header->pnfs_error = -EIO;
+               pnfs_set_lo_fail(header->lseg);
        }
        bio_put(bio);
        put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
        dprintk("%s enter\n", __func__);
        task = container_of(work, struct rpc_task, u.tk_work);
        wdata = container_of(task, struct nfs_write_data, task);
-       if (likely(!wdata->pnfs_error)) {
+       if (likely(!wdata->header->pnfs_error)) {
                /* Marks for LAYOUTCOMMIT */
-               mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+               mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
                                     wdata->args.offset, wdata->args.count);
        }
        pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
 {
        struct nfs_write_data *wdata = data;
 
-       if (unlikely(wdata->pnfs_error)) {
-               bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+       if (unlikely(wdata->header->pnfs_error)) {
+               bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
                                        num_se);
        }
 
-       wdata->task.tk_status = wdata->pnfs_error;
+       wdata->task.tk_status = wdata->header->pnfs_error;
        wdata->verf.committed = NFS_FILE_SYNC;
        INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
        schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
 static enum pnfs_try_status
 bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 {
+       struct nfs_pgio_header *header = wdata->header;
        int i, ret, npg_zero, pg_index, last = 0;
        struct bio *bio = NULL;
        struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
        pgoff_t index;
        u64 temp;
        int npg_per_block =
-           NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+           NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
 
        dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
        /* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
        /* At this point, have to be more careful with error handling */
 
        isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
-       be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+       be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
        if (!be || !is_writable(be, isect)) {
                dprintk("%s no matching extents!\n", __func__);
                goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
                        dprintk("%s zero %dth page: index %lu isect %llu\n",
                                __func__, npg_zero, index,
                                (unsigned long long)isect);
-                       page = bl_find_get_zeroing_page(wdata->inode, index,
+                       page = bl_find_get_zeroing_page(header->inode, index,
                                                        cow_read);
                        if (unlikely(IS_ERR(page))) {
-                               wdata->pnfs_error = PTR_ERR(page);
+                               header->pnfs_error = PTR_ERR(page);
                                goto out;
                        } else if (page == NULL)
                                goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
                                        __func__, ret);
                                end_page_writeback(page);
                                page_cache_release(page);
-                               wdata->pnfs_error = ret;
+                               header->pnfs_error = ret;
                                goto out;
                        }
                        if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
                        else {
                                end_page_writeback(page);
                                page_cache_release(page);
-                               wdata->pnfs_error = -ENOMEM;
+                               header->pnfs_error = -ENOMEM;
                                goto out;
                        }
                        /* FIXME: This should be done in bi_end_io */
-                       mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+                       mark_extents_written(BLK_LSEG2EXT(header->lseg),
                                             page->index << PAGE_CACHE_SHIFT,
                                             PAGE_CACHE_SIZE);
 
@@ -632,7 +640,7 @@ fill_invalid_ext:
                                                 isect, page, be,
                                                 bl_end_io_write_zero, par);
                        if (IS_ERR(bio)) {
-                               wdata->pnfs_error = PTR_ERR(bio);
+                               header->pnfs_error = PTR_ERR(bio);
                                bio = NULL;
                                goto out;
                        }
@@ -647,16 +655,16 @@ next_page:
 
        /* Middle pages */
        pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-       for (i = pg_index; i < wdata->npages; i++) {
+       for (i = pg_index; i < wdata->pages.npages; i++) {
                if (!extent_length) {
                        /* We've used up the previous extent */
                        bl_put_extent(be);
                        bio = bl_submit_bio(WRITE, bio);
                        /* Get the next one */
-                       be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+                       be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
                                             isect, NULL);
                        if (!be || !is_writable(be, isect)) {
-                               wdata->pnfs_error = -EINVAL;
+                               header->pnfs_error = -EINVAL;
                                goto out;
                        }
                        if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
                                                                be->be_inval)))
                                        par->bse_count++;
                                else {
-                                       wdata->pnfs_error = -ENOMEM;
+                                       header->pnfs_error = -ENOMEM;
                                        goto out;
                                }
                        }
@@ -677,15 +685,15 @@ next_page:
                        if (unlikely(ret)) {
                                dprintk("%s bl_mark_sectors_init fail %d\n",
                                        __func__, ret);
-                               wdata->pnfs_error = ret;
+                               header->pnfs_error = ret;
                                goto out;
                        }
                }
-               bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+               bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
                                         isect, pages[i], be,
                                         bl_end_io_write, par);
                if (IS_ERR(bio)) {
-                       wdata->pnfs_error = PTR_ERR(bio);
+                       header->pnfs_error = PTR_ERR(bio);
                        bio = NULL;
                        goto out;
                }
index a5c88a554d921455256bb4dbeea7eaa498da5499..c96554245ccf7d90703c80d9a3e3f81b48fa65ee 100644 (file)
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
        uint8_t *dataptr;
        DECLARE_WAITQUEUE(wq, current);
        int offset, len, i, rc;
-       struct net *net = server->nfs_client->net;
+       struct net *net = server->nfs_client->cl_net;
        struct nfs_net *nn = net_generic(net, nfs_net_id);
        struct bl_dev_msg *reply = &nn->bl_mount_reply;
 
index 60f7e4ec842cf1d4fd48f21862d1d5ece26efc88..7d108753af81e9783ab1465c8bb9986452a6cf00 100644 (file)
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 {
        int ret = 0;
-       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+       struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
        if (clp->rpc_ops->version != 4 || minorversion != 0)
                return ret;
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
  * RPC cruft for NFS
  */
 static const struct rpc_version *nfs_version[5] = {
+#ifdef CONFIG_NFS_V2
        [2]                     = &nfs_version2,
+#endif
 #ifdef CONFIG_NFS_V3
        [3]                     = &nfs_version3,
 #endif
@@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {
 #endif  /* CONFIG_NFS_V3_ACL */
 
 struct nfs_client_initdata {
+       unsigned long init_flags;
        const char *hostname;
        const struct sockaddr *addr;
        size_t addrlen;
@@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
        clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
        clp->cl_proto = cl_init->proto;
-       clp->net = get_net(cl_init->net);
+       clp->cl_net = get_net(cl_init->net);
 
 #ifdef CONFIG_NFS_V4
        err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
        spin_lock_init(&clp->cl_lock);
        INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
        rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
-       clp->cl_boot_time = CURRENT_TIME;
        clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
        clp->cl_minorversion = cl_init->minorversion;
        clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
        if (nfs4_has_session(clp)) {
                nfs4_deviceid_purge_client(clp);
                nfs4_destroy_session(clp->cl_session);
+               nfs4_destroy_clientid(clp);
        }
 
 }
@@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
                nfs_idmap_delete(clp);
 
        rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+       kfree(clp->cl_serverowner);
+       kfree(clp->cl_serverscope);
+       kfree(clp->cl_implid);
 }
 
 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
 /* nfs_client_lock held */
 static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 {
-       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+       struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
        if (clp->cl_cb_ident)
                idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)
        if (clp->cl_machine_cred != NULL)
                put_rpccred(clp->cl_machine_cred);
 
-       put_net(clp->net);
+       put_net(clp->cl_net);
        kfree(clp->cl_hostname);
-       kfree(clp->server_scope);
-       kfree(clp->impl_id);
        kfree(clp);
 
        dprintk("<-- nfs_free_client()\n");
@@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)
                return;
 
        dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
-       nn = net_generic(clp->net, nfs_net_id);
+       nn = net_generic(clp->cl_net, nfs_net_id);
 
        if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
                list_del(&clp->cl_share_link);
@@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
            clp->cl_cons_state == NFS_CS_SESSION_INITING))
                return false;
 
+       smp_rmb();
+
        /* Match the version and minorversion */
        if (clp->rpc_ops->version != 4 ||
            clp->cl_minorversion != minorversion)
@@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
        return NULL;
 }
 
+static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+{
+       return clp->cl_cons_state != NFS_CS_INITING;
+}
+
+int nfs_wait_client_init_complete(const struct nfs_client *clp)
+{
+       return wait_event_killable(nfs_client_active_wq,
+                       nfs_client_init_is_complete(clp));
+}
+
+/*
+ * Found an existing client.  Make sure it's ready before returning.
+ */
+static struct nfs_client *
+nfs_found_client(const struct nfs_client_initdata *cl_init,
+                struct nfs_client *clp)
+{
+       int error;
+
+       error = nfs_wait_client_init_complete(clp);
+       if (error < 0) {
+               nfs_put_client(clp);
+               return ERR_PTR(-ERESTARTSYS);
+       }
+
+       if (clp->cl_cons_state < NFS_CS_READY) {
+               error = clp->cl_cons_state;
+               nfs_put_client(clp);
+               return ERR_PTR(error);
+       }
+
+       smp_rmb();
+
+       BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+       dprintk("<-- %s found nfs_client %p for %s\n",
+               __func__, clp, cl_init->hostname ?: "");
+       return clp;
+}
+
 /*
  * Look up a client by IP address and protocol version
  * - creates a new record if one doesn't yet exist
@@ -512,11 +559,9 @@ static struct nfs_client *
 nfs_get_client(const struct nfs_client_initdata *cl_init,
               const struct rpc_timeout *timeparms,
               const char *ip_addr,
-              rpc_authflavor_t authflavour,
-              int noresvport)
+              rpc_authflavor_t authflavour)
 {
        struct nfs_client *clp, *new = NULL;
-       int error;
        struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
 
        dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
                spin_lock(&nn->nfs_client_lock);
 
                clp = nfs_match_client(cl_init);
-               if (clp)
-                       goto found_client;
-               if (new)
-                       goto install_client;
+               if (clp) {
+                       spin_unlock(&nn->nfs_client_lock);
+                       if (new)
+                               nfs_free_client(new);
+                       return nfs_found_client(cl_init, clp);
+               }
+               if (new) {
+                       list_add(&new->cl_share_link, &nn->nfs_client_list);
+                       spin_unlock(&nn->nfs_client_lock);
+                       new->cl_flags = cl_init->init_flags;
+                       return cl_init->rpc_ops->init_client(new,
+                                               timeparms, ip_addr,
+                                               authflavour);
+               }
 
                spin_unlock(&nn->nfs_client_lock);
 
                new = nfs_alloc_client(cl_init);
        } while (!IS_ERR(new));
 
-       dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new));
+       dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
+               cl_init->hostname ?: "", PTR_ERR(new));
        return new;
-
-       /* install a new client and return with it unready */
-install_client:
-       clp = new;
-       list_add(&clp->cl_share_link, &nn->nfs_client_list);
-       spin_unlock(&nn->nfs_client_lock);
-
-       error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
-                                             authflavour, noresvport);
-       if (error < 0) {
-               nfs_put_client(clp);
-               return ERR_PTR(error);
-       }
-       dprintk("--> nfs_get_client() = %p [new]\n", clp);
-       return clp;
-
-       /* found an existing client
-        * - make sure it's ready before returning
-        */
-found_client:
-       spin_unlock(&nn->nfs_client_lock);
-
-       if (new)
-               nfs_free_client(new);
-
-       error = wait_event_killable(nfs_client_active_wq,
-                               clp->cl_cons_state < NFS_CS_INITING);
-       if (error < 0) {
-               nfs_put_client(clp);
-               return ERR_PTR(-ERESTARTSYS);
-       }
-
-       if (clp->cl_cons_state < NFS_CS_READY) {
-               error = clp->cl_cons_state;
-               nfs_put_client(clp);
-               return ERR_PTR(error);
-       }
-
-       BUG_ON(clp->cl_cons_state != NFS_CS_READY);
-
-       dprintk("--> nfs_get_client() = %p [share]\n", clp);
-       return clp;
 }
 
 /*
@@ -588,26 +602,11 @@ found_client:
  */
 void nfs_mark_client_ready(struct nfs_client *clp, int state)
 {
+       smp_wmb();
        clp->cl_cons_state = state;
        wake_up_all(&nfs_client_active_wq);
 }
 
-/*
- * With sessions, the client is not marked ready until after a
- * successful EXCHANGE_ID and CREATE_SESSION.
- *
- * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
- * other versions of NFS can be tried.
- */
-int nfs4_check_client_ready(struct nfs_client *clp)
-{
-       if (!nfs4_has_session(clp))
-               return 0;
-       if (clp->cl_cons_state < NFS_CS_READY)
-               return -EPROTONOSUPPORT;
-       return 0;
-}
-
 /*
  * Initialise the timeout values for a connection
  */
@@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
  */
 static int nfs_create_rpc_client(struct nfs_client *clp,
                                 const struct rpc_timeout *timeparms,
-                                rpc_authflavor_t flavor,
-                                int discrtry, int noresvport)
+                                rpc_authflavor_t flavor)
 {
        struct rpc_clnt         *clnt = NULL;
        struct rpc_create_args args = {
-               .net            = clp->net,
+               .net            = clp->cl_net,
                .protocol       = clp->cl_proto,
                .address        = (struct sockaddr *)&clp->cl_addr,
                .addrsize       = clp->cl_addrlen,
@@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
                .authflavor     = flavor,
        };
 
-       if (discrtry)
+       if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
                args.flags |= RPC_CLNT_CREATE_DISCRTRY;
-       if (noresvport)
+       if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
                args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
        if (!IS_ERR(clp->cl_rpcclient))
@@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)
                .nfs_version    = clp->rpc_ops->version,
                .noresvport     = server->flags & NFS_MOUNT_NORESVPORT ?
                                        1 : 0,
-               .net            = clp->net,
+               .net            = clp->cl_net,
        };
 
        if (nlm_init.nfs_version > 3)
@@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
        return 0;
 }
 
-/*
- * Initialise an NFS2 or NFS3 client
+/**
+ * nfs_init_client - Initialise an NFS2 or NFS3 client
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: IP presentation address (not used)
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
  */
-int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
-                   const char *ip_addr, rpc_authflavor_t authflavour,
-                   int noresvport)
+struct nfs_client *nfs_init_client(struct nfs_client *clp,
+                   const struct rpc_timeout *timeparms,
+                   const char *ip_addr, rpc_authflavor_t authflavour)
 {
        int error;
 
        if (clp->cl_cons_state == NFS_CS_READY) {
                /* the client is already initialised */
                dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
-               return 0;
+               return clp;
        }
 
        /*
         * Create a client RPC handle for doing FSSTAT with UNIX auth only
         * - RFC 2623, sec 2.3.2
         */
-       error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
-                                     0, noresvport);
+       error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
        if (error < 0)
                goto error;
        nfs_mark_client_ready(clp, NFS_CS_READY);
-       return 0;
+       return clp;
 
 error:
        nfs_mark_client_ready(clp, error);
+       nfs_put_client(clp);
        dprintk("<-- nfs_init_client() = xerror %d\n", error);
-       return error;
+       return ERR_PTR(error);
 }
 
 /*
@@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,
                .hostname = data->nfs_server.hostname,
                .addr = (const struct sockaddr *)&data->nfs_server.address,
                .addrlen = data->nfs_server.addrlen,
-               .rpc_ops = &nfs_v2_clientops,
+               .rpc_ops = NULL,
                .proto = data->nfs_server.protocol,
                .net = data->net,
        };
@@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,
 
        dprintk("--> nfs_init_server()\n");
 
+       switch (data->version) {
+#ifdef CONFIG_NFS_V2
+       case 2:
+               cl_init.rpc_ops = &nfs_v2_clientops;
+               break;
+#endif
 #ifdef CONFIG_NFS_V3
-       if (data->version == 3)
+       case 3:
                cl_init.rpc_ops = &nfs_v3_clientops;
+               break;
 #endif
+       default:
+               return -EPROTONOSUPPORT;
+       }
 
        nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
                        data->timeo, data->retrans);
+       if (data->flags & NFS_MOUNT_NORESVPORT)
+               set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
        /* Allocate or find a client reference we can use */
-       clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
-                            data->flags & NFS_MOUNT_NORESVPORT);
+       clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
        if (IS_ERR(clp)) {
                dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
                return PTR_ERR(clp);
@@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,
        server->options = data->options;
        server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
                NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
-               NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+               NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
 
        if (data->rsize)
                server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
 static void nfs_server_insert_lists(struct nfs_server *server)
 {
        struct nfs_client *clp = server->nfs_client;
-       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+       struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
        spin_lock(&nn->nfs_client_lock);
        list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
 
        if (clp == NULL)
                return;
-       nn = net_generic(clp->net, nfs_net_id);
+       nn = net_generic(clp->cl_net, nfs_net_id);
        spin_lock(&nn->nfs_client_lock);
        list_del_rcu(&server->client_link);
        if (list_empty(&clp->cl_superblocks))
@@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
                 * so that the client back channel can find the
                 * nfs_client struct
                 */
-               clp->cl_cons_state = NFS_CS_SESSION_INITING;
+               nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
        }
 #endif /* CONFIG_NFS_V4_1 */
 
        return nfs4_init_callback(clp);
 }
 
-/*
- * Initialise an NFS4 client record
+/**
+ * nfs4_init_client - Initialise an NFS4 client record
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: callback IP address in presentation format
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
  */
-int nfs4_init_client(struct nfs_client *clp,
-                    const struct rpc_timeout *timeparms,
-                    const char *ip_addr,
-                    rpc_authflavor_t authflavour,
-                    int noresvport)
+struct nfs_client *nfs4_init_client(struct nfs_client *clp,
+                                   const struct rpc_timeout *timeparms,
+                                   const char *ip_addr,
+                                   rpc_authflavor_t authflavour)
 {
        char buf[INET6_ADDRSTRLEN + 1];
        int error;
@@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,
        if (clp->cl_cons_state == NFS_CS_READY) {
                /* the client is initialised already */
                dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
-               return 0;
+               return clp;
        }
 
        /* Check NFS protocol revision and initialize RPC op vector */
        clp->rpc_ops = &nfs_v4_clientops;
 
-       error = nfs_create_rpc_client(clp, timeparms, authflavour,
-                                     1, noresvport);
+       __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+       error = nfs_create_rpc_client(clp, timeparms, authflavour);
        if (error < 0)
                goto error;
 
@@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,
 
        if (!nfs4_has_session(clp))
                nfs_mark_client_ready(clp, NFS_CS_READY);
-       return 0;
+       return clp;
 
 error:
        nfs_mark_client_ready(clp, error);
+       nfs_put_client(clp);
        dprintk("<-- nfs4_init_client() = xerror %d\n", error);
-       return error;
+       return ERR_PTR(error);
 }
 
 /*
@@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,
 
        dprintk("--> nfs4_set_client()\n");
 
+       if (server->flags & NFS_MOUNT_NORESVPORT)
+               set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
        /* Allocate or find a client reference we can use */
-       clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
-                            server->flags & NFS_MOUNT_NORESVPORT);
+       clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
        if (IS_ERR(clp)) {
                error = PTR_ERR(clp);
                goto error;
@@ -1465,8 +1490,8 @@ error:
  * the MDS.
  */
 struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
-               const struct sockaddr *ds_addr,
-               int ds_addrlen, int ds_proto)
+               const struct sockaddr *ds_addr, int ds_addrlen,
+               int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
 {
        struct nfs_client_initdata cl_init = {
                .addr = ds_addr,
@@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
                .rpc_ops = &nfs_v4_clientops,
                .proto = ds_proto,
                .minorversion = mds_clp->cl_minorversion,
-               .net = mds_clp->net,
-       };
-       struct rpc_timeout ds_timeout = {
-               .to_initval = 15 * HZ,
-               .to_maxval = 15 * HZ,
-               .to_retries = 1,
-               .to_exponential = 1,
+               .net = mds_clp->cl_net,
        };
+       struct rpc_timeout ds_timeout;
        struct nfs_client *clp;
 
        /*
@@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
         * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
         * (section 13.1 RFC 5661).
         */
+       nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
        clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
-                            mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
+                            mds_clp->cl_rpcclient->cl_auth->au_flavor);
 
        dprintk("<-- %s %p\n", __func__, clp);
        return clp;
@@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
                                rpc_protocol(parent_server->client),
                                parent_server->client->cl_timeout,
                                parent_client->cl_mvops->minor_version,
-                               parent_client->net);
+                               parent_client->cl_net);
        if (error < 0)
                goto error;
 
@@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)
        idr_init(&nn->cb_ident_idr);
 #endif
        spin_lock_init(&nn->nfs_client_lock);
+       nn->boot_time = CURRENT_TIME;
 }
 
 #ifdef CONFIG_PROC_FS
index 89af1d269274f3a91401f704226dec43f9c02528..bd3a9601d32d9915a70e1888ab5a710671e00aac 100644 (file)
@@ -316,6 +316,10 @@ out:
  * nfs_client_return_marked_delegations - return previously marked delegations
  * @clp: nfs_client to process
  *
+ * Note that this function is designed to be called by the state
+ * manager thread. For this reason, it cannot flush the dirty data,
+ * since that could deadlock in case of a state recovery error.
+ *
  * Returns zero on success, or a negative errno value.
  */
 int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
                                                                server);
                        rcu_read_unlock();
 
-                       if (delegation != NULL) {
-                               filemap_flush(inode->i_mapping);
+                       if (delegation != NULL)
                                err = __nfs_inode_return_delegation(inode,
                                                                delegation, 0);
-                       }
                        iput(inode);
                        if (!err)
                                goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
  * nfs_inode_return_delegation - synchronously return a delegation
  * @inode: inode to process
  *
+ * This routine will always flush any dirty data to disk on the
+ * assumption that if we need to return the delegation, then
+ * we should stop caching.
+ *
  * Returns zero on success, or a negative errno value.
  */
 int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
        struct nfs_delegation *delegation;
        int err = 0;
 
+       nfs_wb_all(inode);
        if (rcu_access_pointer(nfsi->delegation) != NULL) {
                delegation = nfs_detach_delegation(nfsi, server);
                if (delegation != NULL) {
-                       nfs_wb_all(inode);
                        err = __nfs_inode_return_delegation(inode, delegation, 1);
                }
        }
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
        struct nfs_client *clp = server->nfs_client;
        struct nfs_delegation *delegation;
 
+       filemap_flush(inode->i_mapping);
+
        rcu_read_lock();
        delegation = rcu_dereference(NFS_I(inode)->delegation);
 
index cd6a7a8dadae9054e5bd5557c0226bfd8accc116..72709c4193fa28ac3afeba63f65e2f46e954eb23 100644 (file)
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
 
 static inline int nfs_inode_return_delegation(struct inode *inode)
 {
+       nfs_wb_all(inode);
        return 0;
 }
 #endif
index eedd24d0ad2efc6a02e7ff7b552e2e707adadfbd..0989a2099688a377279d76f4f8c56dbc91070027 100644 (file)
@@ -474,6 +474,29 @@ different:
        return 0;
 }
 
+static
+bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
+{
+       if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
+               return false;
+       if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+               return true;
+       if (filp->f_pos == 0)
+               return true;
+       return false;
+}
+
+/*
+ * This function is called by the lookup code to request the use of
+ * readdirplus to accelerate any future lookups in the same
+ * directory.
+ */
+static
+void nfs_advise_use_readdirplus(struct inode *dir)
+{
+       set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
+}
+
 static
 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 {
@@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        desc->file = filp;
        desc->dir_cookie = &dir_ctx->dir_cookie;
        desc->decode = NFS_PROTO(inode)->decode_dirent;
-       desc->plus = NFS_USE_READDIRPLUS(inode);
+       desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
 
        nfs_block_sillyrename(dentry);
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
        if (!inode) {
                if (nfs_neg_need_reval(dir, dentry, nd))
                        goto out_bad;
-               goto out_valid;
+               goto out_valid_noent;
        }
 
        if (is_bad_inode(inode)) {
@@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
        if (fhandle == NULL || fattr == NULL)
                goto out_error;
 
-       error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+       error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
        if (error)
                goto out_bad;
        if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 out_set_verifier:
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  out_valid:
+       /* Success: notify readdir to use READDIRPLUS */
+       nfs_advise_use_readdirplus(dir);
+ out_valid_noent:
        dput(parent);
        dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
                        __func__, dentry->d_parent->d_name.name,
@@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
        parent = dentry->d_parent;
        /* Protect against concurrent sillydeletes */
        nfs_block_sillyrename(parent);
-       error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+       error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
        if (error == -ENOENT)
                goto no_entry;
        if (error < 0) {
@@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
        if (IS_ERR(res))
                goto out_unblock_sillyrename;
 
+       /* Success: notify readdir to use READDIRPLUS */
+       nfs_advise_use_readdirplus(dir);
+
 no_entry:
        res = d_materialise_unique(dentry, inode);
        if (res != NULL) {
@@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
        if (dentry->d_inode)
                goto out;
        if (fhandle->size == 0) {
-               error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+               error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
                if (error)
                        goto out_error;
        }
index 481be7f7bdd3b953179a2ab354c9f05fc4773925..23d170bc44f4b88bea2016983bb7fa7daa2a0ae5 100644 (file)
@@ -56,6 +56,7 @@
 
 #include "internal.h"
 #include "iostat.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY                NFSDBG_VFS
 
@@ -81,16 +82,19 @@ struct nfs_direct_req {
        struct completion       completion;     /* wait for i/o completion */
 
        /* commit state */
-       struct list_head        rewrite_list;   /* saved nfs_write_data structs */
-       struct nfs_write_data * commit_data;    /* special write_data for commits */
+       struct nfs_mds_commit_info mds_cinfo;   /* Storage for cinfo */
+       struct pnfs_ds_commit_info ds_cinfo;    /* Storage for cinfo */
+       struct work_struct      work;
        int                     flags;
 #define NFS_ODIRECT_DO_COMMIT          (1)     /* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES     (2)     /* write verification failed */
        struct nfs_writeverf    verf;           /* unstable write verifier */
 };
 
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
-static const struct rpc_call_ops nfs_write_direct_ops;
+static void nfs_direct_write_schedule_work(struct work_struct *work);
 
 static inline void get_dreq(struct nfs_direct_req *dreq)
 {
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
        return -EINVAL;
 }
 
-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
-       unsigned int npages;
-       unsigned int i;
-
-       if (count == 0)
-               return;
-       pages += (pgbase >> PAGE_SHIFT);
-       npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       for (i = 0; i < npages; i++) {
-               struct page *page = pages[i];
-               if (!PageCompound(page))
-                       set_page_dirty(page);
-       }
-}
-
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 {
        unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
                page_cache_release(pages[i]);
 }
 
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+                             struct nfs_direct_req *dreq)
+{
+       cinfo->lock = &dreq->lock;
+       cinfo->mds = &dreq->mds_cinfo;
+       cinfo->ds = &dreq->ds_cinfo;
+       cinfo->dreq = dreq;
+       cinfo->completion_ops = &nfs_direct_commit_completion_ops;
+}
+
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
        struct nfs_direct_req *dreq;
 
-       dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
+       dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
        if (!dreq)
                return NULL;
 
        kref_init(&dreq->kref);
        kref_get(&dreq->kref);
        init_completion(&dreq->completion);
-       INIT_LIST_HEAD(&dreq->rewrite_list);
-       dreq->iocb = NULL;
-       dreq->ctx = NULL;
-       dreq->l_ctx = NULL;
+       INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+       INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
        spin_lock_init(&dreq->lock);
-       atomic_set(&dreq->io_count, 0);
-       dreq->count = 0;
-       dreq->error = 0;
-       dreq->flags = 0;
 
        return dreq;
 }
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
        nfs_direct_req_release(dreq);
 }
 
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+static void nfs_direct_readpage_release(struct nfs_page *req)
 {
-       struct nfs_read_data *data = calldata;
-
-       nfs_readpage_result(task, data);
+       dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+               req->wb_context->dentry->d_inode->i_sb->s_id,
+               (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+               req->wb_bytes,
+               (long long)req_offset(req));
+       nfs_release_request(req);
 }
 
-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 {
+       unsigned long bytes = 0;
+       struct nfs_direct_req *dreq = hdr->dreq;
 
-       struct nfs_read_data *data = calldata;
-       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
-       int status = data->task.tk_status;
+       if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+               goto out_put;
 
        spin_lock(&dreq->lock);
-       if (unlikely(status < 0)) {
-               dreq->error = status;
-               spin_unlock(&dreq->lock);
-       } else {
-               dreq->count += data->res.count;
-               spin_unlock(&dreq->lock);
-               nfs_direct_dirty_pages(data->pagevec,
-                               data->args.pgbase,
-                               data->res.count);
-       }
-       nfs_direct_release_pages(data->pagevec, data->npages);
+       if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
+               dreq->error = hdr->error;
+       else
+               dreq->count += hdr->good_bytes;
+       spin_unlock(&dreq->lock);
 
+       while (!list_empty(&hdr->pages)) {
+               struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+               struct page *page = req->wb_page;
+
+               if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+                       if (bytes > hdr->good_bytes)
+                               zero_user(page, 0, PAGE_SIZE);
+                       else if (hdr->good_bytes - bytes < PAGE_SIZE)
+                               zero_user_segment(page,
+                                       hdr->good_bytes & ~PAGE_MASK,
+                                       PAGE_SIZE);
+               }
+               if (!PageCompound(page)) {
+                       if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+                               if (bytes < hdr->good_bytes)
+                                       set_page_dirty(page);
+                       } else
+                               set_page_dirty(page);
+               }
+               bytes += req->wb_bytes;
+               nfs_list_remove_request(req);
+               nfs_direct_readpage_release(req);
+       }
+out_put:
        if (put_dreq(dreq))
                nfs_direct_complete(dreq);
-       nfs_readdata_free(data);
+       hdr->release(hdr);
+}
+
+static void nfs_read_sync_pgio_error(struct list_head *head)
+{
+       struct nfs_page *req;
+
+       while (!list_empty(head)) {
+               req = nfs_list_entry(head->next);
+               nfs_list_remove_request(req);
+               nfs_release_request(req);
+       }
 }
 
-static const struct rpc_call_ops nfs_read_direct_ops = {
-       .rpc_call_prepare = nfs_read_prepare,
-       .rpc_call_done = nfs_direct_read_result,
-       .rpc_release = nfs_direct_read_release,
+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+       get_dreq(hdr->dreq);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
+       .error_cleanup = nfs_read_sync_pgio_error,
+       .init_hdr = nfs_direct_pgio_init,
+       .completion = nfs_direct_read_completion,
 };
 
 /*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
                                                const struct iovec *iov,
                                                loff_t pos)
 {
+       struct nfs_direct_req *dreq = desc->pg_dreq;
        struct nfs_open_context *ctx = dreq->ctx;
        struct inode *inode = ctx->dentry->d_inode;
        unsigned long user_addr = (unsigned long)iov->iov_base;
        size_t count = iov->iov_len;
        size_t rsize = NFS_SERVER(inode)->rsize;
-       struct rpc_task *task;
-       struct rpc_message msg = {
-               .rpc_cred = ctx->cred,
-       };
-       struct rpc_task_setup task_setup_data = {
-               .rpc_client = NFS_CLIENT(inode),
-               .rpc_message = &msg,
-               .callback_ops = &nfs_read_direct_ops,
-               .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
-       };
        unsigned int pgbase;
        int result;
        ssize_t started = 0;
+       struct page **pagevec = NULL;
+       unsigned int npages;
 
        do {
-               struct nfs_read_data *data;
                size_t bytes;
+               int i;
 
                pgbase = user_addr & ~PAGE_MASK;
-               bytes = min(rsize,count);
+               bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
 
                result = -ENOMEM;
-               data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
-               if (unlikely(!data))
+               npages = nfs_page_array_len(pgbase, bytes);
+               if (!pagevec)
+                       pagevec = kmalloc(npages * sizeof(struct page *),
+                                         GFP_KERNEL);
+               if (!pagevec)
                        break;
-
                down_read(&current->mm->mmap_sem);
                result = get_user_pages(current, current->mm, user_addr,
-                                       data->npages, 1, 0, data->pagevec, NULL);
+                                       npages, 1, 0, pagevec, NULL);
                up_read(&current->mm->mmap_sem);
-               if (result < 0) {
-                       nfs_readdata_free(data);
+               if (result < 0)
                        break;
-               }
-               if ((unsigned)result < data->npages) {
+               if ((unsigned)result < npages) {
                        bytes = result * PAGE_SIZE;
                        if (bytes <= pgbase) {
-                               nfs_direct_release_pages(data->pagevec, result);
-                               nfs_readdata_free(data);
+                               nfs_direct_release_pages(pagevec, result);
                                break;
                        }
                        bytes -= pgbase;
-                       data->npages = result;
+                       npages = result;
                }
 
-               get_dreq(dreq);
-
-               data->req = (struct nfs_page *) dreq;
-               data->inode = inode;
-               data->cred = msg.rpc_cred;
-               data->args.fh = NFS_FH(inode);
-               data->args.context = ctx;
-               data->args.lock_context = dreq->l_ctx;
-               data->args.offset = pos;
-               data->args.pgbase = pgbase;
-               data->args.pages = data->pagevec;
-               data->args.count = bytes;
-               data->res.fattr = &data->fattr;
-               data->res.eof = 0;
-               data->res.count = bytes;
-               nfs_fattr_init(&data->fattr);
-               msg.rpc_argp = &data->args;
-               msg.rpc_resp = &data->res;
-
-               task_setup_data.task = &data->task;
-               task_setup_data.callback_data = data;
-               NFS_PROTO(inode)->read_setup(data, &msg);
-
-               task = rpc_run_task(&task_setup_data);
-               if (IS_ERR(task))
-                       break;
-               rpc_put_task(task);
-
-               dprintk("NFS: %5u initiated direct read call "
-                       "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-                               data->task.tk_pid,
-                               inode->i_sb->s_id,
-                               (long long)NFS_FILEID(inode),
-                               bytes,
-                               (unsigned long long)data->args.offset);
-
-               started += bytes;
-               user_addr += bytes;
-               pos += bytes;
-               /* FIXME: Remove this unnecessary math from final patch */
-               pgbase += bytes;
-               pgbase &= ~PAGE_MASK;
-               BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
-               count -= bytes;
-       } while (count != 0);
+               for (i = 0; i < npages; i++) {
+                       struct nfs_page *req;
+                       unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+                       /* XXX do we need to do the eof zeroing found in async_filler? */
+                       req = nfs_create_request(dreq->ctx, dreq->inode,
+                                                pagevec[i],
+                                                pgbase, req_len);
+                       if (IS_ERR(req)) {
+                               result = PTR_ERR(req);
+                               break;
+                       }
+                       req->wb_index = pos >> PAGE_SHIFT;
+                       req->wb_offset = pos & ~PAGE_MASK;
+                       if (!nfs_pageio_add_request(desc, req)) {
+                               result = desc->pg_error;
+                               nfs_release_request(req);
+                               break;
+                       }
+                       pgbase = 0;
+                       bytes -= req_len;
+                       started += req_len;
+                       user_addr += req_len;
+                       pos += req_len;
+                       count -= req_len;
+               }
+               /* The nfs_page now hold references to these pages */
+               nfs_direct_release_pages(pagevec, npages);
+       } while (count != 0 && result >= 0);
+
+       kfree(pagevec);
 
        if (started)
                return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                                              unsigned long nr_segs,
                                              loff_t pos)
 {
+       struct nfs_pageio_descriptor desc;
        ssize_t result = -EINVAL;
        size_t requested_bytes = 0;
        unsigned long seg;
 
+       nfs_pageio_init_read(&desc, dreq->inode,
+                            &nfs_direct_read_completion_ops);
        get_dreq(dreq);
+       desc.pg_dreq = dreq;
 
        for (seg = 0; seg < nr_segs; seg++) {
                const struct iovec *vec = &iov[seg];
-               result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+               result = nfs_direct_read_schedule_segment(&desc, vec, pos);
                if (result < 0)
                        break;
                requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                pos += vec->iov_len;
        }
 
+       nfs_pageio_complete(&desc);
+
        /*
         * If no bytes were started, return the error, and let the
         * generic layer handle the completion.
@@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
        result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
        if (!result)
                result = nfs_direct_wait(dreq);
+       NFS_I(inode)->read_io += result;
 out_release:
        nfs_direct_req_release(dreq);
 out:
        return result;
 }
 
-static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
-{
-       while (!list_empty(&dreq->rewrite_list)) {
-               struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
-               list_del(&data->pages);
-               nfs_direct_release_pages(data->pagevec, data->npages);
-               nfs_writedata_free(data);
-       }
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
-       struct inode *inode = dreq->inode;
-       struct list_head *p;
-       struct nfs_write_data *data;
-       struct rpc_task *task;
-       struct rpc_message msg = {
-               .rpc_cred = dreq->ctx->cred,
-       };
-       struct rpc_task_setup task_setup_data = {
-               .rpc_client = NFS_CLIENT(inode),
-               .rpc_message = &msg,
-               .callback_ops = &nfs_write_direct_ops,
-               .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
-       };
+       struct nfs_pageio_descriptor desc;
+       struct nfs_page *req, *tmp;
+       LIST_HEAD(reqs);
+       struct nfs_commit_info cinfo;
+       LIST_HEAD(failed);
+
+       nfs_init_cinfo_from_dreq(&cinfo, dreq);
+       pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
+       spin_lock(cinfo.lock);
+       nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
+       spin_unlock(cinfo.lock);
 
        dreq->count = 0;
        get_dreq(dreq);
 
-       list_for_each(p, &dreq->rewrite_list) {
-               data = list_entry(p, struct nfs_write_data, pages);
-
-               get_dreq(dreq);
-
-               /* Use stable writes */
-               data->args.stable = NFS_FILE_SYNC;
-
-               /*
-                * Reset data->res.
-                */
-               nfs_fattr_init(&data->fattr);
-               data->res.count = data->args.count;
-               memset(&data->verf, 0, sizeof(data->verf));
-
-               /*
-                * Reuse data->task; data->args should not have changed
-                * since the original request was sent.
-                */
-               task_setup_data.task = &data->task;
-               task_setup_data.callback_data = data;
-               msg.rpc_argp = &data->args;
-               msg.rpc_resp = &data->res;
-               NFS_PROTO(inode)->write_setup(data, &msg);
-
-               /*
-                * We're called via an RPC callback, so BKL is already held.
-                */
-               task = rpc_run_task(&task_setup_data);
-               if (!IS_ERR(task))
-                       rpc_put_task(task);
-
-               dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
-                               data->task.tk_pid,
-                               inode->i_sb->s_id,
-                               (long long)NFS_FILEID(inode),
-                               data->args.count,
-                               (unsigned long long)data->args.offset);
+       nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+                             &nfs_direct_write_completion_ops);
+       desc.pg_dreq = dreq;
+
+       list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+               if (!nfs_pageio_add_request(&desc, req)) {
+                       nfs_list_add_request(req, &failed);
+                       spin_lock(cinfo.lock);
+                       dreq->flags = 0;
+                       dreq->error = -EIO;
+                       spin_unlock(cinfo.lock);
+               }
        }
+       nfs_pageio_complete(&desc);
 
-       if (put_dreq(dreq))
-               nfs_direct_write_complete(dreq, inode);
-}
-
-static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
-{
-       struct nfs_write_data *data = calldata;
+       while (!list_empty(&failed))
+               nfs_unlock_and_release_request(req);
 
-       /* Call the NFS version-specific code */
-       NFS_PROTO(data->inode)->commit_done(task, data);
+       if (put_dreq(dreq))
+               nfs_direct_write_complete(dreq, dreq->inode);
 }
 
-static void nfs_direct_commit_release(void *calldata)
+static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 {
-       struct nfs_write_data *data = calldata;
-       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+       struct nfs_direct_req *dreq = data->dreq;
+       struct nfs_commit_info cinfo;
+       struct nfs_page *req;
        int status = data->task.tk_status;
 
+       nfs_init_cinfo_from_dreq(&cinfo, dreq);
        if (status < 0) {
                dprintk("NFS: %5u commit failed with error %d.\n",
-                               data->task.tk_pid, status);
+                       data->task.tk_pid, status);
                dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
        } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
                dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)
        }
 
        dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
-       nfs_direct_write_complete(dreq, data->inode);
-       nfs_commit_free(data);
+       while (!list_empty(&data->pages)) {
+               req = nfs_list_entry(data->pages.next);
+               nfs_list_remove_request(req);
+               if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+                       /* Note the rewrite will go through mds */
+                       kref_get(&req->wb_kref);
+                       nfs_mark_request_commit(req, NULL, &cinfo);
+               }
+               nfs_unlock_and_release_request(req);
+       }
+
+       if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+               nfs_direct_write_complete(dreq, data->inode);
+}
+
+static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+{
+       /* There is no lock to clear */
 }
 
-static const struct rpc_call_ops nfs_commit_direct_ops = {
-       .rpc_call_prepare = nfs_write_prepare,
-       .rpc_call_done = nfs_direct_commit_result,
-       .rpc_release = nfs_direct_commit_release,
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
+       .completion = nfs_direct_commit_complete,
+       .error_cleanup = nfs_direct_error_cleanup,
 };
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-       struct nfs_write_data *data = dreq->commit_data;
-       struct rpc_task *task;
-       struct rpc_message msg = {
-               .rpc_argp = &data->args,
-               .rpc_resp = &data->res,
-               .rpc_cred = dreq->ctx->cred,
-       };
-       struct rpc_task_setup task_setup_data = {
-               .task = &data->task,
-               .rpc_client = NFS_CLIENT(dreq->inode),
-               .rpc_message = &msg,
-               .callback_ops = &nfs_commit_direct_ops,
-               .callback_data = data,
-               .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
-       };
-
-       data->inode = dreq->inode;
-       data->cred = msg.rpc_cred;
-
-       data->args.fh = NFS_FH(data->inode);
-       data->args.offset = 0;
-       data->args.count = 0;
-       data->args.context = dreq->ctx;
-       data->args.lock_context = dreq->l_ctx;
-       data->res.count = 0;
-       data->res.fattr = &data->fattr;
-       data->res.verf = &data->verf;
-       nfs_fattr_init(&data->fattr);
-
-       NFS_PROTO(data->inode)->commit_setup(data, &msg);
-
-       /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
-       dreq->commit_data = NULL;
-
-       dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
-
-       task = rpc_run_task(&task_setup_data);
-       if (!IS_ERR(task))
-               rpc_put_task(task);
+       int res;
+       struct nfs_commit_info cinfo;
+       LIST_HEAD(mds_list);
+
+       nfs_init_cinfo_from_dreq(&cinfo, dreq);
+       nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
+       res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
+       if (res < 0) /* res == -ENOMEM */
+               nfs_direct_write_reschedule(dreq);
 }
 
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
 {
+       struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
        int flags = dreq->flags;
 
        dreq->flags = 0;
@@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
                        nfs_direct_write_reschedule(dreq);
                        break;
                default:
-                       if (dreq->commit_data != NULL)
-                               nfs_commit_free(dreq->commit_data);
-                       nfs_direct_free_writedata(dreq);
-                       nfs_zap_mapping(inode, inode->i_mapping);
+                       nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
                        nfs_direct_complete(dreq);
        }
 }
 
-static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-       dreq->commit_data = nfs_commitdata_alloc();
-       if (dreq->commit_data != NULL)
-               dreq->commit_data->req = (struct nfs_page *) dreq;
+       schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
 }
+
 #else
-static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
 {
-       dreq->commit_data = NULL;
 }
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-       nfs_direct_free_writedata(dreq);
        nfs_zap_mapping(inode, inode->i_mapping);
        nfs_direct_complete(dreq);
 }
 #endif
 
-static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
-{
-       struct nfs_write_data *data = calldata;
-
-       nfs_writeback_done(task, data);
-}
-
 /*
  * NB: Return the value of the first error return code.  Subsequent
  *     errors after the first one are ignored.
  */
-static void nfs_direct_write_release(void *calldata)
-{
-       struct nfs_write_data *data = calldata;
-       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
-       int status = data->task.tk_status;
-
-       spin_lock(&dreq->lock);
-
-       if (unlikely(status < 0)) {
-               /* An error has occurred, so we should not commit */
-               dreq->flags = 0;
-               dreq->error = status;
-       }
-       if (unlikely(dreq->error != 0))
-               goto out_unlock;
-
-       dreq->count += data->res.count;
-
-       if (data->res.verf->committed != NFS_FILE_SYNC) {
-               switch (dreq->flags) {
-                       case 0:
-                               memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
-                               dreq->flags = NFS_ODIRECT_DO_COMMIT;
-                               break;
-                       case NFS_ODIRECT_DO_COMMIT:
-                               if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
-                                       dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
-                                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-                               }
-               }
-       }
-out_unlock:
-       spin_unlock(&dreq->lock);
-
-       if (put_dreq(dreq))
-               nfs_direct_write_complete(dreq, data->inode);
-}
-
-static const struct rpc_call_ops nfs_write_direct_ops = {
-       .rpc_call_prepare = nfs_write_prepare,
-       .rpc_call_done = nfs_direct_write_result,
-       .rpc_release = nfs_direct_write_release,
-};
-
 /*
  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * handled automatically by nfs_direct_write_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
                                                 const struct iovec *iov,
-                                                loff_t pos, int sync)
+                                                loff_t pos)
 {
+       struct nfs_direct_req *dreq = desc->pg_dreq;
        struct nfs_open_context *ctx = dreq->ctx;
        struct inode *inode = ctx->dentry->d_inode;
        unsigned long user_addr = (unsigned long)iov->iov_base;
        size_t count = iov->iov_len;
-       struct rpc_task *task;
-       struct rpc_message msg = {
-               .rpc_cred = ctx->cred,
-       };
-       struct rpc_task_setup task_setup_data = {
-               .rpc_client = NFS_CLIENT(inode),
-               .rpc_message = &msg,
-               .callback_ops = &nfs_write_direct_ops,
-               .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
-       };
        size_t wsize = NFS_SERVER(inode)->wsize;
        unsigned int pgbase;
        int result;
        ssize_t started = 0;
+       struct page **pagevec = NULL;
+       unsigned int npages;
 
        do {
-               struct nfs_write_data *data;
                size_t bytes;
+               int i;
 
                pgbase = user_addr & ~PAGE_MASK;
-               bytes = min(wsize,count);
+               bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
 
                result = -ENOMEM;
-               data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
-               if (unlikely(!data))
+               npages = nfs_page_array_len(pgbase, bytes);
+               if (!pagevec)
+                       pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+               if (!pagevec)
                        break;
 
                down_read(&current->mm->mmap_sem);
                result = get_user_pages(current, current->mm, user_addr,
-                                       data->npages, 0, 0, data->pagevec, NULL);
+                                       npages, 0, 0, pagevec, NULL);
                up_read(&current->mm->mmap_sem);
-               if (result < 0) {
-                       nfs_writedata_free(data);
+               if (result < 0)
                        break;
-               }
-               if ((unsigned)result < data->npages) {
+
+               if ((unsigned)result < npages) {
                        bytes = result * PAGE_SIZE;
                        if (bytes <= pgbase) {
-                               nfs_direct_release_pages(data->pagevec, result);
-                               nfs_writedata_free(data);
+                               nfs_direct_release_pages(pagevec, result);
                                break;
                        }
                        bytes -= pgbase;
-                       data->npages = result;
+                       npages = result;
                }
 
-               get_dreq(dreq);
-
-               list_move_tail(&data->pages, &dreq->rewrite_list);
-
-               data->req = (struct nfs_page *) dreq;
-               data->inode = inode;
-               data->cred = msg.rpc_cred;
-               data->args.fh = NFS_FH(inode);
-               data->args.context = ctx;
-               data->args.lock_context = dreq->l_ctx;
-               data->args.offset = pos;
-               data->args.pgbase = pgbase;
-               data->args.pages = data->pagevec;
-               data->args.count = bytes;
-               data->args.stable = sync;
-               data->res.fattr = &data->fattr;
-               data->res.count = bytes;
-               data->res.verf = &data->verf;
-               nfs_fattr_init(&data->fattr);
-
-               task_setup_data.task = &data->task;
-               task_setup_data.callback_data = data;
-               msg.rpc_argp = &data->args;
-               msg.rpc_resp = &data->res;
-               NFS_PROTO(inode)->write_setup(data, &msg);
-
-               task = rpc_run_task(&task_setup_data);
-               if (IS_ERR(task))
-                       break;
-               rpc_put_task(task);
-
-               dprintk("NFS: %5u initiated direct write call "
-                       "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-                               data->task.tk_pid,
-                               inode->i_sb->s_id,
-                               (long long)NFS_FILEID(inode),
-                               bytes,
-                               (unsigned long long)data->args.offset);
+               for (i = 0; i < npages; i++) {
+                       struct nfs_page *req;
+                       unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-               started += bytes;
-               user_addr += bytes;
-               pos += bytes;
-
-               /* FIXME: Remove this useless math from the final patch */
-               pgbase += bytes;
-               pgbase &= ~PAGE_MASK;
-               BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
+                       req = nfs_create_request(dreq->ctx, dreq->inode,
+                                                pagevec[i],
+                                                pgbase, req_len);
+                       if (IS_ERR(req)) {
+                               result = PTR_ERR(req);
+                               break;
+                       }
+                       nfs_lock_request(req);
+                       req->wb_index = pos >> PAGE_SHIFT;
+                       req->wb_offset = pos & ~PAGE_MASK;
+                       if (!nfs_pageio_add_request(desc, req)) {
+                               result = desc->pg_error;
+                               nfs_unlock_and_release_request(req);
+                               break;
+                       }
+                       pgbase = 0;
+                       bytes -= req_len;
+                       started += req_len;
+                       user_addr += req_len;
+                       pos += req_len;
+                       count -= req_len;
+               }
+               /* The nfs_page now hold references to these pages */
+               nfs_direct_release_pages(pagevec, npages);
+       } while (count != 0 && result >= 0);
 
-               count -= bytes;
-       } while (count != 0);
+       kfree(pagevec);
 
        if (started)
                return started;
        return result < 0 ? (ssize_t) result : -EFAULT;
 }
 
+static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+{
+       struct nfs_direct_req *dreq = hdr->dreq;
+       struct nfs_commit_info cinfo;
+       int bit = -1;
+       struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+       if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+               goto out_put;
+
+       nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+       spin_lock(&dreq->lock);
+
+       if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+               dreq->flags = 0;
+               dreq->error = hdr->error;
+       }
+       if (dreq->error != 0)
+               bit = NFS_IOHDR_ERROR;
+       else {
+               dreq->count += hdr->good_bytes;
+               if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+                       bit = NFS_IOHDR_NEED_RESCHED;
+               } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+                       if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+                               bit = NFS_IOHDR_NEED_RESCHED;
+                       else if (dreq->flags == 0) {
+                               memcpy(&dreq->verf, &req->wb_verf,
+                                      sizeof(dreq->verf));
+                               bit = NFS_IOHDR_NEED_COMMIT;
+                               dreq->flags = NFS_ODIRECT_DO_COMMIT;
+                       } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
+                               if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+                                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+                                       bit = NFS_IOHDR_NEED_RESCHED;
+                               } else
+                                       bit = NFS_IOHDR_NEED_COMMIT;
+                       }
+               }
+       }
+       spin_unlock(&dreq->lock);
+
+       while (!list_empty(&hdr->pages)) {
+               req = nfs_list_entry(hdr->pages.next);
+               nfs_list_remove_request(req);
+               switch (bit) {
+               case NFS_IOHDR_NEED_RESCHED:
+               case NFS_IOHDR_NEED_COMMIT:
+                       kref_get(&req->wb_kref);
+                       nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+               }
+               nfs_unlock_and_release_request(req);
+       }
+
+out_put:
+       if (put_dreq(dreq))
+               nfs_direct_write_complete(dreq, hdr->inode);
+       hdr->release(hdr);
+}
+
+static void nfs_write_sync_pgio_error(struct list_head *head)
+{
+       struct nfs_page *req;
+
+       while (!list_empty(head)) {
+               req = nfs_list_entry(head->next);
+               nfs_list_remove_request(req);
+               nfs_unlock_and_release_request(req);
+       }
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+       .error_cleanup = nfs_write_sync_pgio_error,
+       .init_hdr = nfs_direct_pgio_init,
+       .completion = nfs_direct_write_completion,
+};
+
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                                               const struct iovec *iov,
                                               unsigned long nr_segs,
-                                              loff_t pos, int sync)
+                                              loff_t pos)
 {
+       struct nfs_pageio_descriptor desc;
        ssize_t result = 0;
        size_t requested_bytes = 0;
        unsigned long seg;
 
+       nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
+                             &nfs_direct_write_completion_ops);
+       desc.pg_dreq = dreq;
        get_dreq(dreq);
 
        for (seg = 0; seg < nr_segs; seg++) {
                const struct iovec *vec = &iov[seg];
-               result = nfs_direct_write_schedule_segment(dreq, vec,
-                                                          pos, sync);
+               result = nfs_direct_write_schedule_segment(&desc, vec, pos);
                if (result < 0)
                        break;
                requested_bytes += result;
@@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                        break;
                pos += vec->iov_len;
        }
+       nfs_pageio_complete(&desc);
+       NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
 
        /*
         * If no bytes were started, return the error, and let the
@@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
        ssize_t result = -ENOMEM;
        struct inode *inode = iocb->ki_filp->f_mapping->host;
        struct nfs_direct_req *dreq;
-       size_t wsize = NFS_SERVER(inode)->wsize;
-       int sync = NFS_UNSTABLE;
 
        dreq = nfs_direct_req_alloc();
        if (!dreq)
                goto out;
-       nfs_alloc_commit_data(dreq);
-
-       if (dreq->commit_data == NULL || count <= wsize)
-               sync = NFS_FILE_SYNC;
 
        dreq->inode = inode;
        dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
 
-       result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
+       result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
        if (!result)
                result = nfs_direct_wait(dreq);
 out_release:
@@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
        task_io_account_write(count);
 
        retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+       if (retval > 0) {
+               struct inode *inode = mapping->host;
 
-       if (retval > 0)
                iocb->ki_pos = pos + retval;
-
+               spin_lock(&inode->i_lock);
+               if (i_size_read(inode) < iocb->ki_pos)
+                       i_size_write(inode, iocb->ki_pos);
+               spin_unlock(&inode->i_lock);
+       }
 out:
        return retval;
 }
index aa9b709fd328d7178cd1c4bdb417fb6f454472af..56311ca5f9f8183d3aa8c3aa8c9922db05ee32a3 100644 (file)
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
        if ((file->f_mode & FMODE_WRITE) == 0)
                return 0;
 
+       /*
+        * If we're holding a write delegation, then just start the i/o
+        * but don't wait for completion (or send a commit).
+        */
+       if (nfs_have_delegation(inode, FMODE_WRITE))
+               return filemap_fdatawrite(file->f_mapping);
+
        /* Flush writes to the server and return any errors */
        return vfs_fsync(file, 0);
 }
@@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 
        if (status < 0)
                return status;
+       NFS_I(mapping->host)->write_io += copied;
        return copied;
 }
 
index ae65c16b3670ebb5ed6da1d214f2cde16fd7f73e..c817787fbdb4024738a84d804a8800cd7b6dbedf 100644 (file)
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
  * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
  * superblock across an automount point of some nature.
  */
-void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
-                                 struct nfs_clone_mount *mntdata)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
 {
        struct nfs_fscache_key *key, *xkey;
        struct nfs_server *nfss = NFS_SB(sb);
        struct rb_node **p, *parent;
-       int diff, ulen;
-
-       if (uniq) {
-               ulen = strlen(uniq);
-       } else if (mntdata) {
-               struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
-               if (mnt_s->fscache_key) {
-                       uniq = mnt_s->fscache_key->key.uniquifier;
-                       ulen = mnt_s->fscache_key->key.uniq_len;
-               }
-       }
+       int diff;
 
        if (!uniq) {
                uniq = "";
index b9c572d0679f8ced0aead467778d56229de9a03b..c5b11b53ff33b33d4249a88443b102ae33f00df5 100644 (file)
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
 extern void nfs_fscache_get_client_cookie(struct nfs_client *);
 extern void nfs_fscache_release_client_cookie(struct nfs_client *);
 
-extern void nfs_fscache_get_super_cookie(struct super_block *,
-                                        const char *,
-                                        struct nfs_clone_mount *);
+extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
 extern void nfs_fscache_release_super_cookie(struct super_block *);
 
 extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
 static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
 static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
 
-static inline void nfs_fscache_get_super_cookie(
-       struct super_block *sb,
-       const char *uniq,
-       struct nfs_clone_mount *mntdata)
-{
-}
 static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
 
 static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
index 4ca6f5c8038e02dfddb3a8a33dec71a8cb20295f..8abfb19bd3aa3b739611ce1f97025d643539b09c 100644 (file)
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
                goto out;
 
        /* Start by getting the root filehandle from the server */
-       ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+       ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
        if (ret < 0) {
                dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
                goto out;
@@ -178,87 +178,4 @@ out:
        return ret;
 }
 
-/*
- * get an NFS4 root dentry from the root filehandle
- */
-struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
-                            const char *devname)
-{
-       struct nfs_server *server = NFS_SB(sb);
-       struct nfs_fattr *fattr = NULL;
-       struct dentry *ret;
-       struct inode *inode;
-       void *name = kstrdup(devname, GFP_KERNEL);
-       int error;
-
-       dprintk("--> nfs4_get_root()\n");
-
-       if (!name)
-               return ERR_PTR(-ENOMEM);
-
-       /* get the info about the server and filesystem */
-       error = nfs4_server_capabilities(server, mntfh);
-       if (error < 0) {
-               dprintk("nfs_get_root: getcaps error = %d\n",
-                       -error);
-               kfree(name);
-               return ERR_PTR(error);
-       }
-
-       fattr = nfs_alloc_fattr();
-       if (fattr == NULL) {
-               kfree(name);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       /* get the actual root for this mount */
-       error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
-       if (error < 0) {
-               dprintk("nfs_get_root: getattr error = %d\n", -error);
-               ret = ERR_PTR(error);
-               goto out;
-       }
-
-       if (fattr->valid & NFS_ATTR_FATTR_FSID &&
-           !nfs_fsid_equal(&server->fsid, &fattr->fsid))
-               memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
-
-       inode = nfs_fhget(sb, mntfh, fattr);
-       if (IS_ERR(inode)) {
-               dprintk("nfs_get_root: get root inode failed\n");
-               ret = ERR_CAST(inode);
-               goto out;
-       }
-
-       error = nfs_superblock_set_dummy_root(sb, inode);
-       if (error != 0) {
-               ret = ERR_PTR(error);
-               goto out;
-       }
-
-       /* root dentries normally start off anonymous and get spliced in later
-        * if the dentry tree reaches them; however if the dentry already
-        * exists, we'll pick it up at this point and use it as the root
-        */
-       ret = d_obtain_alias(inode);
-       if (IS_ERR(ret)) {
-               dprintk("nfs_get_root: get root dentry failed\n");
-               goto out;
-       }
-
-       security_d_instantiate(ret, inode);
-       spin_lock(&ret->d_lock);
-       if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
-               ret->d_fsdata = name;
-               name = NULL;
-       }
-       spin_unlock(&ret->d_lock);
-out:
-       if (name)
-               kfree(name);
-       nfs_free_fattr(fattr);
-       dprintk("<-- nfs4_get_root()\n");
-       return ret;
-}
-
 #endif /* CONFIG_NFS_V4 */
index ba3019f5934c21a610a96569b1d239b90eca0459..b5b86a05059c8c0cf157495878bad3621a25a8dc 100644 (file)
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
 static void nfs_idmap_unregister(struct nfs_client *clp,
                                      struct rpc_pipe *pipe)
 {
-       struct net *net = clp->net;
+       struct net *net = clp->cl_net;
        struct super_block *pipefs_sb;
 
        pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
                                   struct idmap *idmap,
                                   struct rpc_pipe *pipe)
 {
-       struct net *net = clp->net;
+       struct net *net = clp->cl_net;
        struct super_block *pipefs_sb;
        int err = 0;
 
@@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
        struct nfs_net *nn = net_generic(net, nfs_net_id);
        struct dentry *cl_dentry;
        struct nfs_client *clp;
+       int err;
 
+restart:
        spin_lock(&nn->nfs_client_lock);
        list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
+               /* Wait for initialisation to finish */
+               if (clp->cl_cons_state == NFS_CS_INITING) {
+                       atomic_inc(&clp->cl_count);
+                       spin_unlock(&nn->nfs_client_lock);
+                       err = nfs_wait_client_init_complete(clp);
+                       nfs_put_client(clp);
+                       if (err)
+                               return NULL;
+                       goto restart;
+               }
+               /* Skip nfs_clients that failed to initialise */
+               if (clp->cl_cons_state < 0)
+                       continue;
+               smp_rmb();
                if (clp->rpc_ops != &nfs_v4_clientops)
                        continue;
                cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
@@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
        struct idmap_msg *im;
        struct idmap *idmap = (struct idmap *)aux;
        struct key *key = cons->key;
-       int ret;
+       int ret = -ENOMEM;
 
        /* msg and im are freed in idmap_pipe_destroy_msg */
        msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-       if (IS_ERR(msg)) {
-               ret = PTR_ERR(msg);
+       if (!msg)
                goto out0;
-       }
 
        im = kmalloc(sizeof(*im), GFP_KERNEL);
-       if (IS_ERR(im)) {
-               ret = PTR_ERR(im);
+       if (!im)
                goto out1;
-       }
 
        ret = nfs_idmap_prepare_message(key->description, im, msg);
        if (ret < 0)
index c6073139b402f1250634dfa892ca6ae4a4885cc3..2f6f78c4b42d7f263419f7cf75153d5766f06c71 100644 (file)
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                inode->i_mode = fattr->mode;
                if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
                                && nfs_server_capable(inode, NFS_CAP_MODE))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_ACCESS
-                               | NFS_INO_INVALID_ACL;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                /* Why so? Because we want revalidate for devices/FIFOs, and
                 * that's precisely what we have in nfs_file_inode_operations.
                 */
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                        inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
                        inode->i_fop = &nfs_dir_operations;
                        inode->i_data.a_ops = &nfs_dir_aops;
-                       if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
-                               set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        /* Deal with crossing mountpoints */
                        if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
                                        fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                inode->i_gid = -2;
                inode->i_blocks = 0;
                memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+               nfsi->write_io = 0;
+               nfsi->read_io = 0;
 
                nfsi->read_cache_jiffies = fattr->time_start;
                nfsi->attr_gencount = fattr->gencount;
@@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                if (fattr->valid & NFS_ATTR_FATTR_MTIME)
                        inode->i_mtime = fattr->mtime;
                else if (nfs_server_capable(inode, NFS_CAP_MTIME))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_DATA;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
                        inode->i_ctime = fattr->ctime;
                else if (nfs_server_capable(inode, NFS_CAP_CTIME))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_ACCESS
-                               | NFS_INO_INVALID_ACL;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
                        inode->i_version = fattr->change_attr;
                else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_DATA;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_SIZE)
                        inode->i_size = nfs_size_to_loff_t(fattr->size);
                else
                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_DATA
                                | NFS_INO_REVAL_PAGECACHE;
                if (fattr->valid & NFS_ATTR_FATTR_NLINK)
                        set_nlink(inode, fattr->nlink);
@@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                if (fattr->valid & NFS_ATTR_FATTR_OWNER)
                        inode->i_uid = fattr->uid;
                else if (nfs_server_capable(inode, NFS_CAP_OWNER))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_ACCESS
-                               | NFS_INO_INVALID_ACL;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_GROUP)
                        inode->i_gid = fattr->gid;
                else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_ACCESS
-                               | NFS_INO_INVALID_ACL;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
                        inode->i_blocks = fattr->du.nfs2.blocks;
                if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
        nfs_init_lock_context(&ctx->lock_context);
        ctx->lock_context.open_context = ctx;
        INIT_LIST_HEAD(&ctx->list);
+       ctx->mdsthreshold = NULL;
        return ctx;
 }
 
@@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
                put_rpccred(ctx->cred);
        dput(ctx->dentry);
        nfs_sb_deactive(sb);
+       kfree(ctx->mdsthreshold);
        kfree(ctx);
 }
 
@@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
        return 0;
 }
 
+static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+       if (nfs_have_delegated_attributes(inode))
+               return false;
+       return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+               || nfs_attribute_timeout(inode)
+               || NFS_STALE(inode);
+}
+
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
@@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
        struct nfs_inode *nfsi = NFS_I(inode);
        int ret = 0;
 
-       if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-                       || nfs_attribute_cache_expired(inode)
-                       || NFS_STALE(inode)) {
+       if (nfs_mapping_need_revalidate_inode(inode)) {
                ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
                if (ret < 0)
                        goto out;
@@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        unsigned long invalid = 0;
 
 
+       if (nfs_have_delegated_attributes(inode))
+               return 0;
        /* Has the inode gone and changed behind our back? */
        if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
                return -EIO;
@@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
        /* Verify a few of the more important attributes */
        if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
-               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+               invalid |= NFS_INO_INVALID_ATTR;
 
        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
                cur_size = i_size_read(inode);
@@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        nfs_display_fhandle_hash(NFS_FH(inode)),
                        atomic_read(&inode->i_count), fattr->valid);
 
-       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
-               goto out_fileid;
+       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
+               printk(KERN_ERR "NFS: server %s error: fileid changed\n"
+                       "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
+                       NFS_SERVER(inode)->nfs_client->cl_hostname,
+                       inode->i_sb->s_id, (long long)nfsi->fileid,
+                       (long long)fattr->fileid);
+               goto out_err;
+       }
 
        /*
         * Make sure the inode's type hasn't changed.
         */
-       if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
-               goto out_changed;
+       if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+               /*
+               * Big trouble! The inode has become a different object.
+               */
+               printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
+                               __func__, inode->i_ino, inode->i_mode, fattr->mode);
+               goto out_err;
+       }
 
        server = NFS_SERVER(inode);
        /* Update the fsid? */
@@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                if (inode->i_version != fattr->change_attr) {
                        dprintk("NFS: change_attr change on server for file %s/%ld\n",
                                        inode->i_sb->s_id, inode->i_ino);
-                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       invalid |= NFS_INO_INVALID_ATTR
+                               | NFS_INO_INVALID_DATA
+                               | NFS_INO_INVALID_ACCESS
+                               | NFS_INO_INVALID_ACL
+                               | NFS_INO_REVAL_PAGECACHE;
                        if (S_ISDIR(inode->i_mode))
                                nfs_force_lookup_revalidate(inode);
                        inode->i_version = fattr->change_attr;
@@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                invalid |= save_cache_validity;
 
        if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
-               /* NFSv2/v3: Check if the mtime agrees */
-               if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
-                       dprintk("NFS: mtime change on server for file %s/%ld\n",
-                                       inode->i_sb->s_id, inode->i_ino);
-                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
-                       if (S_ISDIR(inode->i_mode))
-                               nfs_force_lookup_revalidate(inode);
-                       memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-               }
+               memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
        } else if (server->caps & NFS_CAP_MTIME)
                invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_DATA
-                               | NFS_INO_REVAL_PAGECACHE
                                | NFS_INO_REVAL_FORCED);
 
        if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
-               /* If ctime has changed we should definitely clear access+acl caches */
-               if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
-                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-                       /* and probably clear data for a directory too as utimes can cause
-                        * havoc with our cache.
-                        */
-                       if (S_ISDIR(inode->i_mode)) {
-                               invalid |= NFS_INO_INVALID_DATA;
-                               nfs_force_lookup_revalidate(inode);
-                       }
-                       memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-               }
+               memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
        } else if (server->caps & NFS_CAP_CTIME)
                invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
-                               | NFS_INO_INVALID_ACCESS
-                               | NFS_INO_INVALID_ACL
                                | NFS_INO_REVAL_FORCED);
 
        /* Check if our cached file size is stale */
@@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                nfsi->cache_validity |= invalid;
 
        return 0;
- out_changed:
-       /*
-        * Big trouble! The inode has become a different object.
-        */
-       printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
-                       __func__, inode->i_ino, inode->i_mode, fattr->mode);
  out_err:
        /*
         * No need to worry about unhashing the dentry, as the
@@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
         */
        nfs_invalidate_inode(inode);
        return -ESTALE;
-
- out_fileid:
-       printk(KERN_ERR "NFS: server %s error: fileid changed\n"
-               "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-               NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
-               (long long)nfsi->fileid, (long long)fattr->fileid);
-       goto out_err;
 }
 
 
@@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
        nfsi->delegation_state = 0;
        init_rwsem(&nfsi->rwsem);
        nfsi->layout = NULL;
-       atomic_set(&nfsi->commits_outstanding, 0);
+       atomic_set(&nfsi->commit_info.rpcs_out, 0);
 #endif
 }
 
@@ -1559,9 +1539,9 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->open_files);
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
-       INIT_LIST_HEAD(&nfsi->commit_list);
+       INIT_LIST_HEAD(&nfsi->commit_info.list);
        nfsi->npages = 0;
-       nfsi->ncommit = 0;
+       nfsi->commit_info.ncommit = 0;
        atomic_set(&nfsi->silly_count, 1);
        INIT_HLIST_HEAD(&nfsi->silly_list);
        init_waitqueue_head(&nfsi->waitqueue);
index b777bdaba4c52e72ee86a1d6c1e67ec381a37788..1848a72755928f226807e5971355331368c3ad1d 100644 (file)
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
        unsigned int            version;
        unsigned int            minorversion;
        char                    *fscache_uniq;
+       bool                    need_mount;
 
        struct {
                struct sockaddr_storage address;
@@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
                                           struct nfs_fh *,
                                           struct nfs_fattr *,
                                           rpc_authflavor_t);
+extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
 extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
-extern int nfs4_check_client_ready(struct nfs_client *clp);
 extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
                                             const struct sockaddr *ds_addr,
-                                            int ds_addrlen, int ds_proto);
+                                            int ds_addrlen, int ds_proto,
+                                            unsigned int ds_timeo,
+                                            unsigned int ds_retrans);
 #ifdef CONFIG_PROC_FS
 extern int __init nfs_fs_proc_init(void);
 extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
 }
 #endif
 
-/* nfs4namespace.c */
-#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
-#else
-static inline
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
-{
-       return ERR_PTR(-ENOENT);
-}
-#endif
-
 /* callback_xdr.c */
 extern struct svc_version nfs4_callback_version1;
 extern struct svc_version nfs4_callback_version4;
 
+struct nfs_pageio_descriptor;
 /* pagelist.c */
 extern int __init nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
 
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+                             struct nfs_pgio_header *hdr,
+                             void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 
 /* nfs2xdr.c */
-extern int nfs_stat_to_errno(enum nfs_stat);
 extern struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
                                struct nfs_entry *, int);
@@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;
 extern struct rpc_procinfo nfs4_procedures[];
 #endif
 
-extern int nfs4_init_ds_session(struct nfs_client *clp);
+extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
 
 /* proc.c */
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
-extern int nfs_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
                           const struct rpc_timeout *timeparms,
-                          const char *ip_addr, rpc_authflavor_t authflavour,
-                          int noresvport);
+                          const char *ip_addr, rpc_authflavor_t authflavour);
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
 extern char *nfs_path(char **p, struct dentry *dentry,
                      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
-#endif
+struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
+                             struct nfs_fh *, struct nfs_fattr *);
+struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
+                                struct nfs_fattr *, rpc_authflavor_t);
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
-struct nfs_pageio_descriptor;
+struct nfs_pgio_completion_ops;
 /* read.c */
-extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
-                            const struct rpc_call_ops *call_ops);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+                       struct inode *inode,
+                       const struct nfs_pgio_completion_ops *compl_ops);
+extern int nfs_initiate_read(struct rpc_clnt *clnt,
+                            struct nfs_read_data *data,
+                            const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
-               struct list_head *head);
-
+                             struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-               struct inode *inode);
+                       struct inode *inode,
+                       const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+                       struct inode *inode, int ioflags,
+                       const struct nfs_pgio_completion_ops *compl_ops);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
-               struct list_head *head);
+                            struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-                                 struct inode *inode, int ioflags);
+                       struct inode *inode, int ioflags,
+                       const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
-extern int nfs_initiate_write(struct nfs_write_data *data,
-                             struct rpc_clnt *clnt,
+extern void nfs_commit_free(struct nfs_commit_data *p);
+extern int nfs_initiate_write(struct rpc_clnt *clnt,
+                             struct nfs_write_data *data,
                              const struct rpc_call_ops *call_ops,
-                             int how);
+                             int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
-                              struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+                              struct nfs_commit_data *data,
                               const struct rpc_call_ops *call_ops,
-                              int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+                              int how, int flags);
+extern void nfs_init_commit(struct nfs_commit_data *data,
                            struct list_head *head,
-                           struct pnfs_layout_segment *lseg);
+                           struct pnfs_layout_segment *lseg,
+                           struct nfs_commit_info *cinfo);
+int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+                        struct nfs_commit_info *cinfo, int max);
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+                   struct nfs_commit_info *cinfo);
+void nfs_mark_request_commit(struct nfs_page *req,
+                            struct pnfs_layout_segment *lseg,
+                            struct nfs_commit_info *cinfo);
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+                           int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
-                     struct pnfs_layout_segment *lseg);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
-void nfs_request_remove_commit_list(struct nfs_page *req);
+                     struct pnfs_layout_segment *lseg,
+                     struct nfs_commit_info *cinfo);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+                                struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+                                   struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+                   struct inode *inode,
+                   struct nfs_direct_req *dreq);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,
 #define nfs_migrate_page NULL
 #endif
 
+/* direct.c */
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+                             struct nfs_direct_req *dreq);
+
 /* nfs4proc.c */
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
-extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
-extern int nfs4_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
                            const struct rpc_timeout *timeparms,
                            const char *ip_addr,
-                           rpc_authflavor_t authflavour,
-                           int noresvport);
-extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
+                           rpc_authflavor_t authflavour);
 extern int _nfs4_call_sync(struct rpc_clnt *clnt,
                           struct nfs_server *server,
                           struct rpc_message *msg,
@@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
                PAGE_SIZE - 1) >> PAGE_SHIFT;
 }
 
+/*
+ * Convert a struct timespec into a 64-bit change attribute
+ *
+ * This does approximately the same thing as timespec_to_ns(),
+ * but for calculation efficiency, we multiply the seconds by
+ * 1024*1024*1024.
+ */
+static inline
+u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+{
+       return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
+}
index d51868e5683c0b34530c9db38a0cd4c26277c0b8..08b9c93675da512e0ef8174a25e1e048f282b527 100644 (file)
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
 static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
-                                       struct nfs_fh *fh,
-                                       struct nfs_fattr *fattr,
-                                       rpc_authflavor_t authflavor);
-
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
        return ERR_PTR(-ENAMETOOLONG);
 }
 
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
-{
-       struct gss_api_mech *mech;
-       struct xdr_netobj oid;
-       int i;
-       rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
-
-       for (i = 0; i < flavors->num_flavors; i++) {
-               struct nfs4_secinfo_flavor *flavor;
-               flavor = &flavors->flavors[i];
-
-               if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
-                       pseudoflavor = flavor->flavor;
-                       break;
-               } else if (flavor->flavor == RPC_AUTH_GSS) {
-                       oid.len  = flavor->gss.sec_oid4.len;
-                       oid.data = flavor->gss.sec_oid4.data;
-                       mech = gss_mech_get_by_OID(&oid);
-                       if (!mech)
-                               continue;
-                       pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
-                       gss_mech_put(mech);
-                       break;
-               }
-       }
-
-       return pseudoflavor;
-}
-
-static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
-                                             struct qstr *name,
-                                             struct nfs_fh *fh,
-                                             struct nfs_fattr *fattr)
-{
-       int err;
-
-       if (NFS_PROTO(dir)->version == 4)
-               return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
-
-       err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
-       if (err)
-               return ERR_PTR(err);
-       return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#else /* CONFIG_NFS_V4 */
-static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
-                                                    struct qstr *name,
-                                                    struct nfs_fh *fh,
-                                                    struct nfs_fattr *fattr)
-{
-       int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
-       if (err)
-               return ERR_PTR(err);
-       return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#endif /* CONFIG_NFS_V4 */
-
 /*
  * nfs_d_automount - Handle crossing a mountpoint on the server
  * @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
 struct vfsmount *nfs_d_automount(struct path *path)
 {
        struct vfsmount *mnt;
-       struct dentry *parent;
+       struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
        struct nfs_fh *fh = NULL;
        struct nfs_fattr *fattr = NULL;
-       struct rpc_clnt *client;
 
        dprintk("--> nfs_d_automount()\n");
 
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
 
        dprintk("%s: enter\n", __func__);
 
-       /* Look it up again to get its attributes */
-       parent = dget_parent(path->dentry);
-       client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
-       dput(parent);
-       if (IS_ERR(client)) {
-               mnt = ERR_CAST(client);
-               goto out;
-       }
-
-       if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
-               mnt = nfs_do_refmount(client, path->dentry);
-       else
-               mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
-       rpc_shutdown_client(client);
-
+       mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
        if (IS_ERR(mnt))
                goto out;
 
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
  * @authflavor - security flavor to use when performing the mount
  *
  */
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
-                                       struct nfs_fh *fh,
-                                       struct nfs_fattr *fattr,
-                                       rpc_authflavor_t authflavor)
+struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
+                                struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
 {
        struct nfs_clone_mount mountdata = {
                .sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
        dprintk("<-- nfs_do_submount() = %p\n", mnt);
        return mnt;
 }
+
+struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
+                             struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+       int err;
+       struct dentry *parent = dget_parent(dentry);
+
+       /* Look it up again to get its attributes */
+       err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
+       dput(parent);
+       if (err != 0)
+               return ERR_PTR(err);
+
+       return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+}
index aa14ec303e9408a7111bf2cf5ceffeda7703d17e..8a6394edb8b015375eb26016594a09e7b275261f 100644 (file)
@@ -1,3 +1,7 @@
+/*
+ * NFS-private data for each "struct net".  Accessed with net_generic().
+ */
+
 #ifndef __NFS_NETNS_H__
 #define __NFS_NETNS_H__
 
@@ -20,6 +24,7 @@ struct nfs_net {
        struct idr cb_ident_idr; /* Protected by nfs_client_lock */
 #endif
        spinlock_t nfs_client_lock;
+       struct timespec boot_time;
 };
 
 extern int nfs_net_id;
index 1f56000fabbdc1b1283961e340661e20e986e3bf..baf759bccd054d562d24d9adf43af18915da8267 100644 (file)
@@ -61,6 +61,7 @@
 #define NFS_readdirres_sz      (1)
 #define NFS_statfsres_sz       (1+NFS_info_sz)
 
+static int nfs_stat_to_errno(enum nfs_stat);
 
 /*
  * While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        p = xdr_decode_time(p, &fattr->atime);
        p = xdr_decode_time(p, &fattr->mtime);
        xdr_decode_time(p, &fattr->ctime);
+       fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+
        return 0;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
  * Returns a local errno value, or -EIO if the NFS status code is
  * not recognized.  This function is used jointly by NFSv2 and NFSv3.
  */
-int nfs_stat_to_errno(enum nfs_stat status)
+static int nfs_stat_to_errno(enum nfs_stat status)
 {
        int i;
 
index 75c68299358e226e805e861e4ac8ffc81d96ab4f..2292a0fd2bffd3b042b43e9dc4b607acbd0eb6ad 100644 (file)
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
                 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
        struct nfs3_diropargs   arg = {
@@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-       if (nfs3_async_handle_jukebox(task, data->inode))
+       struct inode *inode = data->header->inode;
+
+       if (nfs3_async_handle_jukebox(task, inode))
                return -EAGAIN;
 
-       nfs_invalidate_atime(data->inode);
-       nfs_refresh_inode(data->inode, &data->fattr);
+       nfs_invalidate_atime(inode);
+       nfs_refresh_inode(inode, &data->fattr);
        return 0;
 }
 
@@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       if (nfs3_async_handle_jukebox(task, data->inode))
+       struct inode *inode = data->header->inode;
+
+       if (nfs3_async_handle_jukebox(task, inode))
                return -EAGAIN;
        if (task->tk_status >= 0)
-               nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+               nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
        return 0;
 }
 
@@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
        rpc_call_start(task);
 }
 
-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+       rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
        if (nfs3_async_handle_jukebox(task, data->inode))
                return -EAGAIN;
@@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
        return 0;
 }
 
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
        msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
@@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .file_inode_ops = &nfs3_file_inode_operations,
        .file_ops       = &nfs_file_operations,
        .getroot        = nfs3_proc_get_root,
+       .submount       = nfs_submount,
        .getattr        = nfs3_proc_getattr,
        .setattr        = nfs3_proc_setattr,
        .lookup         = nfs3_proc_lookup,
@@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
        .write_done     = nfs3_write_done,
        .commit_setup   = nfs3_proc_commit_setup,
+       .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
        .commit_done    = nfs3_commit_done,
        .lock           = nfs3_proc_lock,
        .clear_acl_cache = nfs3_forget_cached_acls,
index a77cc9a3ce5561f1d8b23e78bb16ac49fcbf14b4..902de489ec9bac793dd2e3fa65b663262879b271 100644 (file)
@@ -86,6 +86,8 @@
                                XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
 #define ACL3_setaclres_sz      (1+NFS3_post_op_attr_sz)
 
+static int nfs3_stat_to_errno(enum nfs_stat);
+
 /*
  * Map file type to S_IFMT bits
  */
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        p = xdr_decode_nfstime3(p, &fattr->atime);
        p = xdr_decode_nfstime3(p, &fattr->mtime);
        xdr_decode_nfstime3(p, &fattr->ctime);
+       fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
 
        fattr->valid |= NFS_ATTR_FATTR_V3;
        return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
                goto out_overflow;
 
        fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+               | NFS_ATTR_FATTR_PRECHANGE
                | NFS_ATTR_FATTR_PREMTIME
                | NFS_ATTR_FATTR_PRECTIME;
 
        p = xdr_decode_size3(p, &fattr->pre_size);
        p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
        xdr_decode_nfstime3(p, &fattr->pre_ctime);
+       fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
 
        return 0;
 out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
  *     };
  */
 static void encode_commit3args(struct xdr_stream *xdr,
-                              const struct nfs_writeargs *args)
+                              const struct nfs_commitargs *args)
 {
        __be32 *p;
 
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
                                     struct xdr_stream *xdr,
-                                    const struct nfs_writeargs *args)
+                                    const struct nfs_commitargs *args)
 {
        encode_commit3args(xdr, args);
 }
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
 out:
        return error;
 out_default:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1472,7 +1477,7 @@ out_default:
        error = decode_post_op_attr(xdr, result->dir_attr);
        if (unlikely(error))
                goto out;
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
 out:
        return error;
 out_default:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
 out:
        return error;
 out_default:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1770,7 +1775,7 @@ out_default:
        error = decode_wcc_data(xdr, result->dir_attr);
        if (unlikely(error))
                goto out;
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /**
@@ -2088,7 +2093,7 @@ out_default:
        error = decode_post_op_attr(xdr, result->dir_attr);
        if (unlikely(error))
                goto out;
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 /*
@@ -2319,7 +2324,7 @@ out_status:
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
                                   struct xdr_stream *xdr,
-                                  struct nfs_writeres *result)
+                                  struct nfs_commitres *result)
 {
        enum nfs_stat status;
        int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 out:
        return error;
 out_status:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 #ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 out:
        return error;
 out_default:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
 out:
        return error;
 out_default:
-       return nfs_stat_to_errno(status);
+       return nfs3_stat_to_errno(status);
 }
 
 #endif  /* CONFIG_NFS_V3_ACL */
 
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static const struct {
+       int stat;
+       int errno;
+} nfs_errtbl[] = {
+       { NFS_OK,               0               },
+       { NFSERR_PERM,          -EPERM          },
+       { NFSERR_NOENT,         -ENOENT         },
+       { NFSERR_IO,            -errno_NFSERR_IO},
+       { NFSERR_NXIO,          -ENXIO          },
+/*     { NFSERR_EAGAIN,        -EAGAIN         }, */
+       { NFSERR_ACCES,         -EACCES         },
+       { NFSERR_EXIST,         -EEXIST         },
+       { NFSERR_XDEV,          -EXDEV          },
+       { NFSERR_NODEV,         -ENODEV         },
+       { NFSERR_NOTDIR,        -ENOTDIR        },
+       { NFSERR_ISDIR,         -EISDIR         },
+       { NFSERR_INVAL,         -EINVAL         },
+       { NFSERR_FBIG,          -EFBIG          },
+       { NFSERR_NOSPC,         -ENOSPC         },
+       { NFSERR_ROFS,          -EROFS          },
+       { NFSERR_MLINK,         -EMLINK         },
+       { NFSERR_NAMETOOLONG,   -ENAMETOOLONG   },
+       { NFSERR_NOTEMPTY,      -ENOTEMPTY      },
+       { NFSERR_DQUOT,         -EDQUOT         },
+       { NFSERR_STALE,         -ESTALE         },
+       { NFSERR_REMOTE,        -EREMOTE        },
+#ifdef EWFLUSH
+       { NFSERR_WFLUSH,        -EWFLUSH        },
+#endif
+       { NFSERR_BADHANDLE,     -EBADHANDLE     },
+       { NFSERR_NOT_SYNC,      -ENOTSYNC       },
+       { NFSERR_BAD_COOKIE,    -EBADCOOKIE     },
+       { NFSERR_NOTSUPP,       -ENOTSUPP       },
+       { NFSERR_TOOSMALL,      -ETOOSMALL      },
+       { NFSERR_SERVERFAULT,   -EREMOTEIO      },
+       { NFSERR_BADTYPE,       -EBADTYPE       },
+       { NFSERR_JUKEBOX,       -EJUKEBOX       },
+       { -1,                   -EIO            }
+};
+
+/**
+ * nfs3_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized.  This function is used jointly by NFSv2 and NFSv3.
+ */
+static int nfs3_stat_to_errno(enum nfs_stat status)
+{
+       int i;
+
+       for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+               if (nfs_errtbl[i].stat == (int)status)
+                       return nfs_errtbl[i].errno;
+       }
+       dprintk("NFS: Unrecognized nfs status value: %u\n", status);
+       return nfs_errtbl[i].errno;
+}
+
+
 #define PROC(proc, argtype, restype, timer)                            \
 [NFS3PROC_##proc] = {                                                  \
        .p_proc      = NFS3PROC_##proc,                                 \
index 8d75021020b31f44f0fcb9ec6f1ff05a8b39b313..c6827f93ab57caeab4e613e97919c04a51aa5d64 100644 (file)
@@ -24,6 +24,8 @@ enum nfs4_client_state {
        NFS4CLNT_RECALL_SLOT,
        NFS4CLNT_LEASE_CONFIRM,
        NFS4CLNT_SERVER_SCOPE_MISMATCH,
+       NFS4CLNT_PURGE_STATE,
+       NFS4CLNT_BIND_CONN_TO_SESSION,
 };
 
 enum nfs4_session_state {
@@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {
        const struct nfs4_state_maintenance_ops *state_renewal_ops;
 };
 
-struct nfs_unique_id {
-       struct rb_node rb_node;
-       __u64 id;
-};
-
 #define NFS_SEQID_CONFIRMED 1
 struct nfs_seqid_counter {
        ktime_t create_time;
@@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;
 extern const struct inode_operations nfs4_dir_inode_operations;
 
 /* nfs4namespace.c */
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
 struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
+                              struct nfs_fh *, struct nfs_fattr *);
 
 /* nfs4proc.c */
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
 extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+extern int nfs4_destroy_clientid(struct nfs_client *clp);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,
                struct rpc_task *task);
 extern void nfs4_destroy_session(struct nfs4_session *session);
 extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
-extern int nfs4_proc_create_session(struct nfs_client *);
-extern int nfs4_proc_destroy_session(struct nfs4_session *);
+extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
 extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
                struct nfs_fsinfo *fsinfo);
@@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
 #if defined(CONFIG_NFS_V4_1)
 struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
-extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
 #else
-static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */
@@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs41_handle_server_scope(struct nfs_client *,
-                                     struct server_scope **);
+                                     struct nfs41_server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
index 5acfd9ea8a31390eb0f6b3ab6872f8efc334bc2b..e1340293872c7a70e747d051888e5ab603db905e 100644 (file)
@@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
        BUG();
 }
 
+static void filelayout_reset_write(struct nfs_write_data *data)
+{
+       struct nfs_pgio_header *hdr = data->header;
+       struct rpc_task *task = &data->task;
+
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+               dprintk("%s Reset task %5u for i/o through MDS "
+                       "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+                       data->task.tk_pid,
+                       hdr->inode->i_sb->s_id,
+                       (long long)NFS_FILEID(hdr->inode),
+                       data->args.count,
+                       (unsigned long long)data->args.offset);
+
+               task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+                                                       &hdr->pages,
+                                                       hdr->completion_ops);
+       }
+}
+
+static void filelayout_reset_read(struct nfs_read_data *data)
+{
+       struct nfs_pgio_header *hdr = data->header;
+       struct rpc_task *task = &data->task;
+
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+               dprintk("%s Reset task %5u for i/o through MDS "
+                       "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+                       data->task.tk_pid,
+                       hdr->inode->i_sb->s_id,
+                       (long long)NFS_FILEID(hdr->inode),
+                       data->args.count,
+                       (unsigned long long)data->args.offset);
+
+               task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+                                                       &hdr->pages,
+                                                       hdr->completion_ops);
+       }
+}
+
 static int filelayout_async_handle_error(struct rpc_task *task,
                                         struct nfs4_state *state,
                                         struct nfs_client *clp,
-                                        int *reset)
+                                        struct pnfs_layout_segment *lseg)
 {
-       struct nfs_server *mds_server = NFS_SERVER(state->inode);
+       struct inode *inode = lseg->pls_layout->plh_inode;
+       struct nfs_server *mds_server = NFS_SERVER(inode);
+       struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
        struct nfs_client *mds_client = mds_server->nfs_client;
+       struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
        if (task->tk_status >= 0)
                return 0;
-       *reset = 0;
 
        switch (task->tk_status) {
        /* MDS state errors */
        case -NFS4ERR_DELEG_REVOKED:
        case -NFS4ERR_ADMIN_REVOKED:
        case -NFS4ERR_BAD_STATEID:
+               if (state == NULL)
+                       break;
                nfs_remove_bad_delegation(state->inode);
        case -NFS4ERR_OPENMODE:
+               if (state == NULL)
+                       break;
                nfs4_schedule_stateid_recovery(mds_server, state);
                goto wait_on_recovery;
        case -NFS4ERR_EXPIRED:
-               nfs4_schedule_stateid_recovery(mds_server, state);
+               if (state != NULL)
+                       nfs4_schedule_stateid_recovery(mds_server, state);
                nfs4_schedule_lease_recovery(mds_client);
                goto wait_on_recovery;
        /* DS session errors */
@@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                dprintk("%s ERROR %d, Reset session. Exchangeid "
                        "flags 0x%x\n", __func__, task->tk_status,
                        clp->cl_exchange_flags);
-               nfs4_schedule_session_recovery(clp->cl_session);
+               nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
                break;
        case -NFS4ERR_DELAY:
        case -NFS4ERR_GRACE:
@@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                break;
        case -NFS4ERR_RETRY_UNCACHED_REP:
                break;
+       /* Invalidate Layout errors */
+       case -NFS4ERR_PNFS_NO_LAYOUT:
+       case -ESTALE:           /* mapped NFS4ERR_STALE */
+       case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
+       case -EISDIR:           /* mapped NFS4ERR_ISDIR */
+       case -NFS4ERR_FHEXPIRED:
+       case -NFS4ERR_WRONG_TYPE:
+               dprintk("%s Invalid layout error %d\n", __func__,
+                       task->tk_status);
+               /*
+                * Destroy layout so new i/o will get a new layout.
+                * Layout will not be destroyed until all current lseg
+                * references are put. Mark layout as invalid to resend failed
+                * i/o and all i/o waiting on the slot table to the MDS until
+                * layout is destroyed and a new valid layout is obtained.
+                */
+               set_bit(NFS_LAYOUT_INVALID,
+                               &NFS_I(inode)->layout->plh_flags);
+               pnfs_destroy_layout(NFS_I(inode));
+               rpc_wake_up(&tbl->slot_tbl_waitq);
+               goto reset;
+       /* RPC connection errors */
+       case -ECONNREFUSED:
+       case -EHOSTDOWN:
+       case -EHOSTUNREACH:
+       case -ENETUNREACH:
+       case -EIO:
+       case -ETIMEDOUT:
+       case -EPIPE:
+               dprintk("%s DS connection error %d\n", __func__,
+                       task->tk_status);
+               if (!filelayout_test_devid_invalid(devid))
+                       _pnfs_return_layout(inode);
+               filelayout_mark_devid_invalid(devid);
+               rpc_wake_up(&tbl->slot_tbl_waitq);
+               nfs4_ds_disconnect(clp);
+               /* fall through */
        default:
-               dprintk("%s DS error. Retry through MDS %d\n", __func__,
+reset:
+               dprintk("%s Retry through MDS. Error %d\n", __func__,
                        task->tk_status);
-               *reset = 1;
-               break;
+               return -NFS4ERR_RESET_TO_MDS;
        }
 out:
        task->tk_status = 0;
@@ -148,18 +232,17 @@ wait_on_recovery:
 static int filelayout_read_done_cb(struct rpc_task *task,
                                struct nfs_read_data *data)
 {
-       int reset = 0;
+       struct nfs_pgio_header *hdr = data->header;
+       int err;
 
-       dprintk("%s DS read\n", __func__);
+       err = filelayout_async_handle_error(task, data->args.context->state,
+                                           data->ds_clp, hdr->lseg);
 
-       if (filelayout_async_handle_error(task, data->args.context->state,
-                                         data->ds_clp, &reset) == -EAGAIN) {
-               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
-                       __func__, data->ds_clp, data->ds_clp->cl_session);
-               if (reset) {
-                       pnfs_set_lo_fail(data->lseg);
-                       nfs4_reset_read(task, data);
-               }
+       switch (err) {
+       case -NFS4ERR_RESET_TO_MDS:
+               filelayout_reset_read(data);
+               return task->tk_status;
+       case -EAGAIN:
                rpc_restart_call_prepare(task);
                return -EAGAIN;
        }
@@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 static void
 filelayout_set_layoutcommit(struct nfs_write_data *wdata)
 {
-       if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+       struct nfs_pgio_header *hdr = wdata->header;
+
+       if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
            wdata->res.verf->committed == NFS_FILE_SYNC)
                return;
 
        pnfs_set_layoutcommit(wdata);
-       dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
-               (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+       dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+               (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
 /*
@@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-       struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+       struct nfs_read_data *rdata = data;
 
+       if (filelayout_reset_to_mds(rdata->header->lseg)) {
+               dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+               filelayout_reset_read(rdata);
+               rpc_exit(task, 0);
+               return;
+       }
        rdata->read_done_cb = filelayout_read_done_cb;
 
        if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-       struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+       struct nfs_read_data *rdata = data;
 
        dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
+       if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+           task->tk_status == 0)
+               return;
+
        /* Note this may cause RPC to be resent */
-       rdata->mds_ops->rpc_call_done(task, data);
+       rdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-       struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+       struct nfs_read_data *rdata = data;
 
-       rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+       rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-       struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+       struct nfs_read_data *rdata = data;
 
-       put_lseg(rdata->lseg);
-       rdata->mds_ops->rpc_release(data);
+       nfs_put_client(rdata->ds_clp);
+       rdata->header->mds_ops->rpc_release(data);
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
                                struct nfs_write_data *data)
 {
-       int reset = 0;
-
-       if (filelayout_async_handle_error(task, data->args.context->state,
-                                         data->ds_clp, &reset) == -EAGAIN) {
-               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
-                       __func__, data->ds_clp, data->ds_clp->cl_session);
-               if (reset) {
-                       pnfs_set_lo_fail(data->lseg);
-                       nfs4_reset_write(task, data);
-               }
+       struct nfs_pgio_header *hdr = data->header;
+       int err;
+
+       err = filelayout_async_handle_error(task, data->args.context->state,
+                                           data->ds_clp, hdr->lseg);
+
+       switch (err) {
+       case -NFS4ERR_RESET_TO_MDS:
+               filelayout_reset_write(data);
+               return task->tk_status;
+       case -EAGAIN:
                rpc_restart_call_prepare(task);
                return -EAGAIN;
        }
@@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 }
 
 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
 {
        struct nfs_page *first = nfs_list_entry(data->pages.next);
 
@@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
 }
 
 static int filelayout_commit_done_cb(struct rpc_task *task,
-                                    struct nfs_write_data *data)
+                                    struct nfs_commit_data *data)
 {
-       int reset = 0;
-
-       if (filelayout_async_handle_error(task, data->args.context->state,
-                                         data->ds_clp, &reset) == -EAGAIN) {
-               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
-                       __func__, data->ds_clp, data->ds_clp->cl_session);
-               if (reset) {
-                       prepare_to_resend_writes(data);
-                       pnfs_set_lo_fail(data->lseg);
-               } else
-                       rpc_restart_call_prepare(task);
+       int err;
+
+       err = filelayout_async_handle_error(task, NULL, data->ds_clp,
+                                           data->lseg);
+
+       switch (err) {
+       case -NFS4ERR_RESET_TO_MDS:
+               prepare_to_resend_writes(data);
+               return -EAGAIN;
+       case -EAGAIN:
+               rpc_restart_call_prepare(task);
                return -EAGAIN;
        }
 
@@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+       struct nfs_write_data *wdata = data;
 
+       if (filelayout_reset_to_mds(wdata->header->lseg)) {
+               dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+               filelayout_reset_write(wdata);
+               rpc_exit(task, 0);
+               return;
+       }
        if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
                                &wdata->args.seq_args, &wdata->res.seq_res,
                                task))
@@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+       struct nfs_write_data *wdata = data;
+
+       if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+           task->tk_status == 0)
+               return;
 
        /* Note this may cause RPC to be resent */
-       wdata->mds_ops->rpc_call_done(task, data);
+       wdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+       struct nfs_write_data *wdata = data;
 
-       rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+       rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+       struct nfs_write_data *wdata = data;
+
+       nfs_put_client(wdata->ds_clp);
+       wdata->header->mds_ops->rpc_release(data);
+}
+
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
+{
+       struct nfs_commit_data *wdata = data;
 
-       put_lseg(wdata->lseg);
-       wdata->mds_ops->rpc_release(data);
+       if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+                               &wdata->args.seq_args, &wdata->res.seq_res,
+                               task))
+               return;
+
+       rpc_call_start(task);
+}
+
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
+{
+       struct nfs_commit_data *wdata = data;
+
+       /* Note this may cause RPC to be resent */
+       wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+       struct nfs_commit_data *cdata = data;
+
+       rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
 }
 
-static void filelayout_commit_release(void *data)
+static void filelayout_commit_release(void *calldata)
 {
-       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+       struct nfs_commit_data *data = calldata;
 
-       nfs_commit_release_pages(wdata);
-       if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
-               nfs_commit_clear_lock(NFS_I(wdata->inode));
-       put_lseg(wdata->lseg);
-       nfs_commitdata_release(wdata);
+       data->completion_ops->completion(data);
+       put_lseg(data->lseg);
+       nfs_put_client(data->ds_clp);
+       nfs_commitdata_release(data);
 }
 
 static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
 };
 
 static const struct rpc_call_ops filelayout_commit_call_ops = {
-       .rpc_call_prepare = filelayout_write_prepare,
-       .rpc_call_done = filelayout_write_call_done,
-       .rpc_count_stats = filelayout_write_count_stats,
+       .rpc_call_prepare = filelayout_commit_prepare,
+       .rpc_call_done = filelayout_write_commit_done,
+       .rpc_count_stats = filelayout_commit_count_stats,
        .rpc_release = filelayout_commit_release,
 };
 
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
-       struct pnfs_layout_segment *lseg = data->lseg;
+       struct nfs_pgio_header *hdr = data->header;
+       struct pnfs_layout_segment *lseg = hdr->lseg;
        struct nfs4_pnfs_ds *ds;
        loff_t offset = data->args.offset;
        u32 j, idx;
@@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
        int status;
 
        dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
-               __func__, data->inode->i_ino,
+               __func__, hdr->inode->i_ino,
                data->args.pgbase, (size_t)data->args.count, offset);
 
-       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
-               return PNFS_NOT_ATTEMPTED;
-
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
        ds = nfs4_fl_prepare_ds(lseg, idx);
-       if (!ds) {
-               /* Either layout fh index faulty, or ds connect failed */
-               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
-               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+       if (!ds)
                return PNFS_NOT_ATTEMPTED;
-       }
-       dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
+       dprintk("%s USE DS: %s cl_count %d\n", __func__,
+               ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
        /* No multipath support. Use first DS */
+       atomic_inc(&ds->ds_clp->cl_count);
        data->ds_clp = ds->ds_clp;
        fh = nfs4_fl_select_ds_fh(lseg, j);
        if (fh)
@@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
        data->mds_offset = offset;
 
        /* Perform an asynchronous read to ds */
-       status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
-                                  &filelayout_read_call_ops);
+       status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
+                                 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
        BUG_ON(status != 0);
        return PNFS_ATTEMPTED;
 }
@@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 static enum pnfs_try_status
 filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 {
-       struct pnfs_layout_segment *lseg = data->lseg;
+       struct nfs_pgio_header *hdr = data->header;
+       struct pnfs_layout_segment *lseg = hdr->lseg;
        struct nfs4_pnfs_ds *ds;
        loff_t offset = data->args.offset;
        u32 j, idx;
        struct nfs_fh *fh;
        int status;
 
-       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
-               return PNFS_NOT_ATTEMPTED;
-
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
        ds = nfs4_fl_prepare_ds(lseg, idx);
-       if (!ds) {
-               printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
-                       __func__);
-               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
-               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+       if (!ds)
                return PNFS_NOT_ATTEMPTED;
-       }
-       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
-               data->inode->i_ino, sync, (size_t) data->args.count, offset,
-               ds->ds_remotestr);
+       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+               __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+               offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
        data->write_done_cb = filelayout_write_done_cb;
+       atomic_inc(&ds->ds_clp->cl_count);
        data->ds_clp = ds->ds_clp;
        fh = nfs4_fl_select_ds_fh(lseg, j);
        if (fh)
@@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
        data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
        /* Perform an asynchronous write */
-       status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
-                                   &filelayout_write_call_ops, sync);
+       status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+                                   &filelayout_write_call_ops, sync,
+                                   RPC_TASK_SOFTCONN);
        BUG_ON(status != 0);
        return PNFS_ATTEMPTED;
 }
@@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
        dprintk("--> %s\n", __func__);
        nfs4_fl_put_deviceid(fl->dsaddr);
-       kfree(fl->commit_buckets);
+       /* This assumes a single RW lseg */
+       if (lseg->pls_range.iomode == IOMODE_RW) {
+               struct nfs4_filelayout *flo;
+
+               flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+               flo->commit_info.nbuckets = 0;
+               kfree(flo->commit_info.buckets);
+               flo->commit_info.buckets = NULL;
+       }
        _filelayout_free_lseg(fl);
 }
 
+static int
+filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+                            struct nfs_commit_info *cinfo,
+                            gfp_t gfp_flags)
+{
+       struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+       struct pnfs_commit_bucket *buckets;
+       int size;
+
+       if (fl->commit_through_mds)
+               return 0;
+       if (cinfo->ds->nbuckets != 0) {
+               /* This assumes there is only one IOMODE_RW lseg.  What
+                * we really want to do is have a layout_hdr level
+                * dictionary of <multipath_list4, fh> keys, each
+                * associated with a struct list_head, populated by calls
+                * to filelayout_write_pagelist().
+                * */
+               return 0;
+       }
+
+       size = (fl->stripe_type == STRIPE_SPARSE) ?
+               fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+       buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
+                         gfp_flags);
+       if (!buckets)
+               return -ENOMEM;
+       else {
+               int i;
+
+               spin_lock(cinfo->lock);
+               if (cinfo->ds->nbuckets != 0)
+                       kfree(buckets);
+               else {
+                       cinfo->ds->buckets = buckets;
+                       cinfo->ds->nbuckets = size;
+                       for (i = 0; i < size; i++) {
+                               INIT_LIST_HEAD(&buckets[i].written);
+                               INIT_LIST_HEAD(&buckets[i].committing);
+                       }
+               }
+               spin_unlock(cinfo->lock);
+               return 0;
+       }
+}
+
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
                      struct nfs4_layoutget_res *lgr,
@@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
                _filelayout_free_lseg(fl);
                return NULL;
        }
-
-       /* This assumes there is only one IOMODE_RW lseg.  What
-        * we really want to do is have a layout_hdr level
-        * dictionary of <multipath_list4, fh> keys, each
-        * associated with a struct list_head, populated by calls
-        * to filelayout_write_pagelist().
-        * */
-       if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
-               int i;
-               int size = (fl->stripe_type == STRIPE_SPARSE) ?
-                       fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
-               fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
-               if (!fl->commit_buckets) {
-                       filelayout_free_lseg(&fl->generic_hdr);
-                       return NULL;
-               }
-               fl->number_of_buckets = size;
-               for (i = 0; i < size; i++) {
-                       INIT_LIST_HEAD(&fl->commit_buckets[i].written);
-                       INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
-               }
-       }
        return &fl->generic_hdr;
 }
 
@@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
            !nfs_generic_pg_test(pgio, prev, req))
                return false;
 
-       p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
-       r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+       p_stripe = (u64)req_offset(prev);
+       r_stripe = (u64)req_offset(req);
        stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
 
        do_div(p_stripe, stripe_unit);
@@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 {
        BUG_ON(pgio->pg_lseg != NULL);
 
+       if (req->wb_offset != req->wb_pgbase) {
+               /*
+                * Handling unaligned pages is difficult, because have to
+                * somehow split a req in two in certain cases in the
+                * pg.test code.  Avoid this by just not using pnfs
+                * in this case.
+                */
+               nfs_pageio_reset_read_mds(pgio);
+               return;
+       }
        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                           req->wb_context,
                                           0,
@@ -747,8 +912,13 @@ static void
 filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
                         struct nfs_page *req)
 {
+       struct nfs_commit_info cinfo;
+       int status;
+
        BUG_ON(pgio->pg_lseg != NULL);
 
+       if (req->wb_offset != req->wb_pgbase)
+               goto out_mds;
        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                           req->wb_context,
                                           0,
@@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
                                           GFP_NOFS);
        /* If no lseg, fall back to write through mds */
        if (pgio->pg_lseg == NULL)
-               nfs_pageio_reset_write_mds(pgio);
+               goto out_mds;
+       nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+       status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
+       if (status < 0) {
+               put_lseg(pgio->pg_lseg);
+               pgio->pg_lseg = NULL;
+               goto out_mds;
+       }
+       return;
+out_mds:
+       nfs_pageio_reset_write_mds(pgio);
 }
 
 static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
  * If this will make the bucket empty, it will need to put the lseg reference.
  */
 static void
-filelayout_clear_request_commit(struct nfs_page *req)
+filelayout_clear_request_commit(struct nfs_page *req,
+                               struct nfs_commit_info *cinfo)
 {
        struct pnfs_layout_segment *freeme = NULL;
-       struct inode *inode = req->wb_context->dentry->d_inode;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(cinfo->lock);
        if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
                goto out;
+       cinfo->ds->nwritten--;
        if (list_is_singular(&req->wb_list)) {
-               struct pnfs_layout_segment *lseg;
+               struct pnfs_commit_bucket *bucket;
 
-               /* From here we can find the bucket, but for the moment,
-                * since there is only one relevant lseg...
-                */
-               list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
-                       if (lseg->pls_range.iomode == IOMODE_RW) {
-                               freeme = lseg;
-                               break;
-                       }
-               }
+               bucket = list_first_entry(&req->wb_list,
+                                         struct pnfs_commit_bucket,
+                                         written);
+               freeme = bucket->wlseg;
+               bucket->wlseg = NULL;
        }
 out:
-       nfs_request_remove_commit_list(req);
-       spin_unlock(&inode->i_lock);
+       nfs_request_remove_commit_list(req, cinfo);
+       spin_unlock(cinfo->lock);
        put_lseg(freeme);
 }
 
 static struct list_head *
 filelayout_choose_commit_list(struct nfs_page *req,
-                             struct pnfs_layout_segment *lseg)
+                             struct pnfs_layout_segment *lseg,
+                             struct nfs_commit_info *cinfo)
 {
        struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
        u32 i, j;
        struct list_head *list;
+       struct pnfs_commit_bucket *buckets;
 
        if (fl->commit_through_mds)
-               return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+               return &cinfo->mds->list;
 
        /* Note that we are calling nfs4_fl_calc_j_index on each page
         * that ends up being committed to a data server.  An attractive
@@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
         * to store the value calculated in filelayout_write_pagelist
         * and just use that here.
         */
-       j = nfs4_fl_calc_j_index(lseg,
-                                (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+       j = nfs4_fl_calc_j_index(lseg, req_offset(req));
        i = select_bucket_index(fl, j);
-       list = &fl->commit_buckets[i].written;
+       buckets = cinfo->ds->buckets;
+       list = &buckets[i].written;
        if (list_empty(list)) {
                /* Non-empty buckets hold a reference on the lseg.  That ref
                 * is normally transferred to the COMMIT call and released
                 * there.  It could also be released if the last req is pulled
                 * off due to a rewrite, in which case it will be done in
-                * filelayout_remove_commit_req
+                * filelayout_clear_request_commit
                 */
-               get_lseg(lseg);
+               buckets[i].wlseg = get_lseg(lseg);
        }
        set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+       cinfo->ds->nwritten++;
        return list;
 }
 
 static void
 filelayout_mark_request_commit(struct nfs_page *req,
-               struct pnfs_layout_segment *lseg)
+                              struct pnfs_layout_segment *lseg,
+                              struct nfs_commit_info *cinfo)
 {
        struct list_head *list;
 
-       list = filelayout_choose_commit_list(req, lseg);
-       nfs_request_add_commit_list(req, list);
+       list = filelayout_choose_commit_list(req, lseg, cinfo);
+       nfs_request_add_commit_list(req, list, cinfo);
 }
 
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
        return flseg->fh_array[i];
 }
 
-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 {
        struct pnfs_layout_segment *lseg = data->lseg;
        struct nfs4_pnfs_ds *ds;
@@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
        idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
        ds = nfs4_fl_prepare_ds(lseg, idx);
        if (!ds) {
-               printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
-                       __func__);
-               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
-               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                prepare_to_resend_writes(data);
                filelayout_commit_release(data);
                return -EAGAIN;
        }
-       dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
-       data->write_done_cb = filelayout_commit_done_cb;
+       dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
+               data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
+       data->commit_done_cb = filelayout_commit_done_cb;
+       atomic_inc(&ds->ds_clp->cl_count);
        data->ds_clp = ds->ds_clp;
        fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
        if (fh)
                data->args.fh = fh;
-       return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
-                                  &filelayout_commit_call_ops, how);
-}
-
-/*
- * This is only useful while we are using whole file layouts.
- */
-static struct pnfs_layout_segment *
-find_only_write_lseg_locked(struct inode *inode)
-{
-       struct pnfs_layout_segment *lseg;
-
-       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
-               if (lseg->pls_range.iomode == IOMODE_RW)
-                       return lseg;
-       return NULL;
-}
-
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
-{
-       struct pnfs_layout_segment *rv;
-
-       spin_lock(&inode->i_lock);
-       rv = find_only_write_lseg_locked(inode);
-       if (rv)
-               get_lseg(rv);
-       spin_unlock(&inode->i_lock);
-       return rv;
+       return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
+                                  &filelayout_commit_call_ops, how,
+                                  RPC_TASK_SOFTCONN);
 }
 
 static int
-filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
-               spinlock_t *lock)
+transfer_commit_list(struct list_head *src, struct list_head *dst,
+                    struct nfs_commit_info *cinfo, int max)
 {
-       struct list_head *src = &bucket->written;
-       struct list_head *dst = &bucket->committing;
        struct nfs_page *req, *tmp;
        int ret = 0;
 
        list_for_each_entry_safe(req, tmp, src, wb_list) {
                if (!nfs_lock_request(req))
                        continue;
-               if (cond_resched_lock(lock))
+               kref_get(&req->wb_kref);
+               if (cond_resched_lock(cinfo->lock))
                        list_safe_reset_next(req, tmp, wb_list);
-               nfs_request_remove_commit_list(req);
+               nfs_request_remove_commit_list(req, cinfo);
                clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
                nfs_list_add_request(req, dst);
                ret++;
-               if (ret == max)
+               if ((ret == max) && !cinfo->dreq)
                        break;
        }
        return ret;
 }
 
+static int
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+                              struct nfs_commit_info *cinfo,
+                              int max)
+{
+       struct list_head *src = &bucket->written;
+       struct list_head *dst = &bucket->committing;
+       int ret;
+
+       ret = transfer_commit_list(src, dst, cinfo, max);
+       if (ret) {
+               cinfo->ds->nwritten -= ret;
+               cinfo->ds->ncommitting += ret;
+               bucket->clseg = bucket->wlseg;
+               if (list_empty(src))
+                       bucket->wlseg = NULL;
+               else
+                       get_lseg(bucket->clseg);
+       }
+       return ret;
+}
+
 /* Move reqs from written to committing lists, returning count of number moved.
- * Note called with i_lock held.
+ * Note called with cinfo->lock held.
  */
-static int filelayout_scan_commit_lists(struct inode *inode, int max,
-               spinlock_t *lock)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+                                       int max)
 {
-       struct pnfs_layout_segment *lseg;
-       struct nfs4_filelayout_segment *fl;
        int i, rv = 0, cnt;
 
-       lseg = find_only_write_lseg_locked(inode);
-       if (!lseg)
-               goto out_done;
-       fl = FILELAYOUT_LSEG(lseg);
-       if (fl->commit_through_mds)
-               goto out_done;
-       for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
-               cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
-                               max, lock);
+       for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+               cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+                                                    cinfo, max);
                max -= cnt;
                rv += cnt;
        }
-out_done:
        return rv;
 }
 
+/* Pull everything off the committing lists and dump into @dst */
+static void filelayout_recover_commit_reqs(struct list_head *dst,
+                                          struct nfs_commit_info *cinfo)
+{
+       struct pnfs_commit_bucket *b;
+       int i;
+
+       /* NOTE cinfo->lock is NOT held, relying on fact that this is
+        * only called on single thread per dreq.
+        * Can't take the lock because need to do put_lseg
+        */
+       for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+               if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+                       BUG_ON(!list_empty(&b->written));
+                       put_lseg(b->wlseg);
+                       b->wlseg = NULL;
+               }
+       }
+       cinfo->ds->nwritten = 0;
+}
+
 static unsigned int
-alloc_ds_commits(struct inode *inode, struct list_head *list)
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
 {
-       struct pnfs_layout_segment *lseg;
-       struct nfs4_filelayout_segment *fl;
-       struct nfs_write_data *data;
+       struct pnfs_ds_commit_info *fl_cinfo;
+       struct pnfs_commit_bucket *bucket;
+       struct nfs_commit_data *data;
        int i, j;
        unsigned int nreq = 0;
 
-       /* Won't need this when non-whole file layout segments are supported
-        * instead we will use a pnfs_layout_hdr structure */
-       lseg = find_only_write_lseg(inode);
-       if (!lseg)
-               return 0;
-       fl = FILELAYOUT_LSEG(lseg);
-       for (i = 0; i < fl->number_of_buckets; i++) {
-               if (list_empty(&fl->commit_buckets[i].committing))
+       fl_cinfo = cinfo->ds;
+       bucket = fl_cinfo->buckets;
+       for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+               if (list_empty(&bucket->committing))
                        continue;
                data = nfs_commitdata_alloc();
                if (!data)
                        break;
                data->ds_commit_index = i;
-               data->lseg = lseg;
+               data->lseg = bucket->clseg;
+               bucket->clseg = NULL;
                list_add(&data->pages, list);
                nreq++;
        }
 
        /* Clean up on error */
-       for (j = i; j < fl->number_of_buckets; j++) {
-               if (list_empty(&fl->commit_buckets[i].committing))
+       for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
+               if (list_empty(&bucket->committing))
                        continue;
-               nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
-               put_lseg(lseg);  /* associated with emptying bucket */
+               nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
+               put_lseg(bucket->clseg);
+               bucket->clseg = NULL;
        }
-       put_lseg(lseg);
        /* Caller will clean up entries put on list */
        return nreq;
 }
@@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 /* This follows nfs_commit_list pretty closely */
 static int
 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
-                          int how)
+                          int how, struct nfs_commit_info *cinfo)
 {
-       struct nfs_write_data   *data, *tmp;
+       struct nfs_commit_data *data, *tmp;
        LIST_HEAD(list);
        unsigned int nreq = 0;
 
@@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
                        list_add(&data->pages, &list);
                        nreq++;
                } else
-                       nfs_retry_commit(mds_pages, NULL);
+                       nfs_retry_commit(mds_pages, NULL, cinfo);
        }
 
-       nreq += alloc_ds_commits(inode, &list);
+       nreq += alloc_ds_commits(cinfo, &list);
 
        if (nreq == 0) {
-               nfs_commit_clear_lock(NFS_I(inode));
+               cinfo->completion_ops->error_cleanup(NFS_I(inode));
                goto out;
        }
 
-       atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
+       atomic_add(nreq, &cinfo->mds->rpcs_out);
 
        list_for_each_entry_safe(data, tmp, &list, pages) {
                list_del_init(&data->pages);
                if (!data->lseg) {
-                       nfs_init_commit(data, mds_pages, NULL);
-                       nfs_initiate_commit(data, NFS_CLIENT(inode),
-                                           data->mds_ops, how);
+                       nfs_init_commit(data, mds_pages, NULL, cinfo);
+                       nfs_initiate_commit(NFS_CLIENT(inode), data,
+                                           data->mds_ops, how, 0);
                } else {
-                       nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
+                       struct pnfs_commit_bucket *buckets;
+
+                       buckets = cinfo->ds->buckets;
+                       nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
                        filelayout_initiate_commit(data, how);
                }
        }
 out:
+       cinfo->ds->ncommitting = 0;
        return PNFS_ATTEMPTED;
 }
 
@@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
        nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
 }
 
+static struct pnfs_layout_hdr *
+filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+       struct nfs4_filelayout *flo;
+
+       flo = kzalloc(sizeof(*flo), gfp_flags);
+       return &flo->generic_hdr;
+}
+
+static void
+filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       kfree(FILELAYOUT_FROM_HDR(lo));
+}
+
+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+       struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
+
+       if (layout == NULL)
+               return NULL;
+       else
+               return &FILELAYOUT_FROM_HDR(layout)->commit_info;
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
        .id                     = LAYOUT_NFSV4_1_FILES,
        .name                   = "LAYOUT_NFSV4_1_FILES",
        .owner                  = THIS_MODULE,
+       .alloc_layout_hdr       = filelayout_alloc_layout_hdr,
+       .free_layout_hdr        = filelayout_free_layout_hdr,
        .alloc_lseg             = filelayout_alloc_lseg,
        .free_lseg              = filelayout_free_lseg,
        .pg_read_ops            = &filelayout_pg_read_ops,
        .pg_write_ops           = &filelayout_pg_write_ops,
+       .get_ds_info            = &filelayout_get_ds_info,
        .mark_request_commit    = filelayout_mark_request_commit,
        .clear_request_commit   = filelayout_clear_request_commit,
        .scan_commit_lists      = filelayout_scan_commit_lists,
+       .recover_commit_reqs    = filelayout_recover_commit_reqs,
        .commit_pagelist        = filelayout_commit_pagelist,
        .read_pagelist          = filelayout_read_pagelist,
        .write_pagelist         = filelayout_write_pagelist,
index 21190bb1f5e348c5549e5985afb8cdf896aa72dd..43fe802dd67855fbfe521825d0c24ef3acca7c38 100644 (file)
 
 #include "pnfs.h"
 
+/*
+ * Default data server connection timeout and retrans vaules.
+ * Set by module paramters dataserver_timeo and dataserver_retrans.
+ */
+#define NFS4_DEF_DS_TIMEO   60
+#define NFS4_DEF_DS_RETRANS 5
+
 /*
  * Field testing shows we need to support up to 4096 stripe indices.
  * We store each index as a u8 (u32 on the wire) to keep the memory footprint
@@ -41,6 +48,9 @@
 #define NFS4_PNFS_MAX_STRIPE_CNT 4096
 #define NFS4_PNFS_MAX_MULTI_CNT  256 /* 256 fit into a u8 stripe_index */
 
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS   12001
+
 enum stripetype4 {
        STRIPE_SPARSE = 1,
        STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
        atomic_t                ds_count;
 };
 
-/* nfs4_file_layout_dsaddr flags */
-#define NFS4_DEVICE_ID_NEG_ENTRY       0x00000001
-
 struct nfs4_file_layout_dsaddr {
        struct nfs4_deviceid_node       id_node;
-       unsigned long                   flags;
        u32                             stripe_count;
        u8                              *stripe_indices;
        u32                             ds_num;
        struct nfs4_pnfs_ds             *ds_list[1];
 };
 
-struct nfs4_fl_commit_bucket {
-       struct list_head written;
-       struct list_head committing;
-};
-
 struct nfs4_filelayout_segment {
        struct pnfs_layout_segment generic_hdr;
        u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
        struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
        unsigned int num_fh;
        struct nfs_fh **fh_array;
-       struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
-       int number_of_buckets;
 };
 
+struct nfs4_filelayout {
+       struct pnfs_layout_hdr generic_hdr;
+       struct pnfs_ds_commit_info commit_info;
+};
+
+static inline struct nfs4_filelayout *
+FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
+{
+       return container_of(lo, struct nfs4_filelayout, generic_hdr);
+}
+
 static inline struct nfs4_filelayout_segment *
 FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
 {
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
        return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
 }
 
+static inline void
+filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
+{
+       u32 *p = (u32 *)&node->deviceid;
+
+       printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
+               p[0], p[1], p[2], p[3]);
+
+       set_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
+{
+       return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
+}
+
+static inline bool
+filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
+{
+       return test_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
+{
+       return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
+               filelayout_test_layout_invalid(lseg->pls_layout);
+}
+
 extern struct nfs_fh *
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
 
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+void nfs4_ds_disconnect(struct nfs_client *clp);
 
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
index c9cff9adb2d3f7c832f3e3bc7e6d76ec45a6e692..a1fab8da7f03c8819951af81d95e6268a71dc80f 100644 (file)
 
 #include <linux/nfs_fs.h>
 #include <linux/vmalloc.h>
+#include <linux/module.h>
 
 #include "internal.h"
 #include "nfs4filelayout.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS_LD
 
+static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
+static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+
 /*
  * Data server cache
  *
@@ -144,6 +148,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
        return NULL;
 }
 
+/*
+ * Lookup DS by nfs_client pointer. Zero data server client pointer
+ */
+void nfs4_ds_disconnect(struct nfs_client *clp)
+{
+       struct nfs4_pnfs_ds *ds;
+       struct nfs_client *found = NULL;
+
+       dprintk("%s clp %p\n", __func__, clp);
+       spin_lock(&nfs4_ds_cache_lock);
+       list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+               if (ds->ds_clp && ds->ds_clp == clp) {
+                       found = ds->ds_clp;
+                       ds->ds_clp = NULL;
+               }
+       spin_unlock(&nfs4_ds_cache_lock);
+       if (found) {
+               set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
+               nfs_put_client(clp);
+       }
+}
+
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server
  * Currently only supports IPv4 and IPv6 addresses
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                        __func__, ds->ds_remotestr, da->da_remotestr);
 
                clp = nfs4_set_ds_client(mds_srv->nfs_client,
-                                (struct sockaddr *)&da->da_addr,
-                                da->da_addrlen, IPPROTO_TCP);
+                                       (struct sockaddr *)&da->da_addr,
+                                       da->da_addrlen, IPPROTO_TCP,
+                                       dataserver_timeo, dataserver_retrans);
                if (!IS_ERR(clp))
                        break;
        }
@@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                goto out;
        }
 
-       if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
-               if (!is_ds_client(clp)) {
-                       status = -ENODEV;
-                       goto out_put;
-               }
-               ds->ds_clp = clp;
-               dprintk("%s [existing] server=%s\n", __func__,
-                       ds->ds_remotestr);
-               goto out;
-       }
-
-       /*
-        * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
-        * be equal to the MDS lease. Renewal is scheduled in create_session.
-        */
-       spin_lock(&mds_srv->nfs_client->cl_lock);
-       clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
-       spin_unlock(&mds_srv->nfs_client->cl_lock);
-       clp->cl_last_renewal = jiffies;
-
-       /* New nfs_client */
-       status = nfs4_init_ds_session(clp);
+       status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
        if (status)
                goto out_put;
 
@@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 
                mp_count = be32_to_cpup(p); /* multipath count */
                for (j = 0; j < mp_count; j++) {
-                       da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
+                       da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
                                            &stream, gfp_flags);
                        if (da)
                                list_add_tail(&da->da_node, &dsaddrs);
@@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
        return flseg->fh_array[i];
 }
 
-static void
-filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
-                              int err, const char *ds_remotestr)
-{
-       u32 *p = (u32 *)&dsaddr->id_node.deviceid;
-
-       printk(KERN_ERR "NFS: data server %s connection error %d."
-               " Deviceid [%x%x%x%x] marked out of use.\n",
-               ds_remotestr, err, p[0], p[1], p[2], p[3]);
-
-       spin_lock(&nfs4_ds_cache_lock);
-       dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
-       spin_unlock(&nfs4_ds_cache_lock);
-}
-
 struct nfs4_pnfs_ds *
 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 {
        struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
+       struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
+
+       if (filelayout_test_devid_invalid(devid))
+               return NULL;
 
        if (ds == NULL) {
                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
                        __func__, ds_idx);
-               return NULL;
+               goto mark_dev_invalid;
        }
 
        if (!ds->ds_clp) {
                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
                int err;
 
-               if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
-                       /* Already tried to connect, don't try again */
-                       dprintk("%s Deviceid marked out of use\n", __func__);
-                       return NULL;
-               }
                err = nfs4_ds_connect(s, ds);
-               if (err) {
-                       filelayout_mark_devid_negative(dsaddr, err,
-                                                      ds->ds_remotestr);
-                       return NULL;
-               }
+               if (err)
+                       goto mark_dev_invalid;
        }
        return ds;
+
+mark_dev_invalid:
+       filelayout_mark_devid_invalid(devid);
+       return NULL;
 }
+
+module_param(dataserver_retrans, uint, 0644);
+MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
+                       "retries a request before it attempts further "
+                       " recovery  action.");
+module_param(dataserver_timeo, uint, 0644);
+MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
+                       "NFSv4.1  client  waits for a response from a "
+                       " data server before it retries an NFS request.");
index a7f3dedc4ec7bade9df84ed6f0fc7524507b9c21..017b4b01a69c7a747b4afab8c7591b5f2ac59a20 100644 (file)
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
        return ret;
 }
 
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+{
+       struct gss_api_mech *mech;
+       struct xdr_netobj oid;
+       int i;
+       rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+
+       for (i = 0; i < flavors->num_flavors; i++) {
+               struct nfs4_secinfo_flavor *flavor;
+               flavor = &flavors->flavors[i];
+
+               if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
+                       pseudoflavor = flavor->flavor;
+                       break;
+               } else if (flavor->flavor == RPC_AUTH_GSS) {
+                       oid.len  = flavor->gss.sec_oid4.len;
+                       oid.data = flavor->gss.sec_oid4.data;
+                       mech = gss_mech_get_by_OID(&oid);
+                       if (!mech)
+                               continue;
+                       pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
+                       gss_mech_put(mech);
+                       break;
+               }
+       }
+
+       return pseudoflavor;
+}
+
 static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
 {
        struct page *page;
@@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino
        rpc_authflavor_t flavor;
 
        flavor = nfs4_negotiate_security(inode, name);
-       if (flavor < 0)
+       if ((int)flavor < 0)
                return ERR_PTR(flavor);
 
        clone = rpc_clone_client(clnt);
@@ -300,7 +329,7 @@ out:
  * @dentry - dentry of referral
  *
  */
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
 {
        struct vfsmount *mnt = ERR_PTR(-ENOMEM);
        struct dentry *parent;
@@ -341,3 +370,25 @@ out:
        dprintk("%s: done\n", __func__);
        return mnt;
 }
+
+struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
+                              struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+       struct dentry *parent = dget_parent(dentry);
+       struct rpc_clnt *client;
+       struct vfsmount *mnt;
+
+       /* Look it up again to get its attributes and sec flavor */
+       client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+       dput(parent);
+       if (IS_ERR(client))
+               return ERR_CAST(client);
+
+       if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+               mnt = nfs_do_refmount(client, dentry);
+       else
+               mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+
+       rpc_shutdown_client(client);
+       return mnt;
+}
index ab985f6f0da8d93da67f625ba8027ff851678c55..d48dbefa0e71ebf6d9ac90edbb893364afd2d0a3 100644 (file)
@@ -64,6 +64,7 @@
 #include "iostat.h"
 #include "callback.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PROC
 
@@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
+static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                            struct nfs_fattr *fattr, struct iattr *sattr,
@@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)
        case -NFS4ERR_BADOWNER:
        case -NFS4ERR_BADNAME:
                return -EINVAL;
+       case -NFS4ERR_SHARE_DENIED:
+               return -EACCES;
        default:
                dprintk("%s could not handle NFSv4 error %d\n",
                                __func__, -err);
@@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
                case -NFS4ERR_SEQ_MISORDERED:
                        dprintk("%s ERROR: %d Reset session\n", __func__,
                                errorcode);
-                       nfs4_schedule_session_recovery(clp->cl_session);
+                       nfs4_schedule_session_recovery(clp->cl_session, errorcode);
                        exception->retry = 1;
                        break;
 #endif /* defined(CONFIG_NFS_V4_1) */
@@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
        struct nfs_inode *nfsi = NFS_I(dir);
 
        spin_lock(&dir->i_lock);
-       nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
+       nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
        if (!cinfo->atomic || cinfo->before != dir->i_version)
                nfs_force_lookup_revalidate(dir);
        dir->i_version = cinfo->after;
@@ -788,7 +792,6 @@ struct nfs4_opendata {
        struct nfs4_string owner_name;
        struct nfs4_string group_name;
        struct nfs_fattr f_attr;
-       struct nfs_fattr dir_attr;
        struct dentry *dir;
        struct dentry *dentry;
        struct nfs4_state_owner *owner;
@@ -804,12 +807,10 @@ struct nfs4_opendata {
 static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 {
        p->o_res.f_attr = &p->f_attr;
-       p->o_res.dir_attr = &p->dir_attr;
        p->o_res.seqid = p->o_arg.seqid;
        p->c_res.seqid = p->c_arg.seqid;
        p->o_res.server = p->o_arg.server;
        nfs_fattr_init(&p->f_attr);
-       nfs_fattr_init(&p->dir_attr);
        nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
 }
 
@@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.name = &dentry->d_name;
        p->o_arg.server = server;
        p->o_arg.bitmask = server->attr_bitmask;
-       p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
        p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
        if (attrs != NULL && attrs->ia_valid != 0) {
                __be32 verf[2];
@@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
-                               nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+                               nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
                                goto out;
                        case -NFS4ERR_STALE_CLIENTID:
                        case -NFS4ERR_STALE_STATEID:
@@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 
        nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
 
-       nfs_refresh_inode(dir, o_res->dir_attr);
-
        if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
                status = _nfs4_proc_open_confirm(data);
                if (status != 0)
@@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
        nfs_fattr_map_and_free_names(server, &data->f_attr);
 
-       if (o_arg->open_flags & O_CREAT) {
+       if (o_arg->open_flags & O_CREAT)
                update_changeattr(dir, &o_res->cinfo);
-               nfs_post_op_update_inode(dir, o_res->dir_attr);
-       } else
-               nfs_refresh_inode(dir, o_res->dir_attr);
        if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
                server->caps &= ~NFS_CAP_POSIX_LOCK;
        if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+                       struct dentry *dentry,
+                       fmode_t fmode,
+                       int flags,
+                       struct iattr *sattr,
+                       struct rpc_cred *cred,
+                       struct nfs4_state **res,
+                       struct nfs4_threshold **ctx_th)
 {
        struct nfs4_state_owner  *sp;
        struct nfs4_state     *state = NULL;
@@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
        if (opendata == NULL)
                goto err_put_state_owner;
 
+       if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+               opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+               if (!opendata->f_attr.mdsthreshold)
+                       goto err_opendata_put;
+       }
        if (dentry->d_inode != NULL)
                opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
 
@@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
                        nfs_setattr_update_inode(state->inode, sattr);
                nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
        }
+
+       if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+               *ctx_th = opendata->f_attr.mdsthreshold;
+       else
+               kfree(opendata->f_attr.mdsthreshold);
+       opendata->f_attr.mdsthreshold = NULL;
+
        nfs4_opendata_put(opendata);
        nfs4_put_state_owner(sp);
        *res = state;
        return 0;
 err_opendata_put:
+       kfree(opendata->f_attr.mdsthreshold);
        nfs4_opendata_put(opendata);
 err_put_state_owner:
        nfs4_put_state_owner(sp);
@@ -1853,14 +1868,21 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+                                       struct dentry *dentry,
+                                       fmode_t fmode,
+                                       int flags,
+                                       struct iattr *sattr,
+                                       struct rpc_cred *cred,
+                                       struct nfs4_threshold **ctx_th)
 {
        struct nfs4_exception exception = { };
        struct nfs4_state *res;
        int status;
 
        do {
-               status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+               status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+                                      &res, ctx_th);
                if (status == 0)
                        break;
                /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
        struct nfs4_state *state;
 
        /* Protect against concurrent sillydeletes */
-       state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+       state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+                            ctx->cred, &ctx->mdsthreshold);
        if (IS_ERR(state))
                return ERR_CAST(state);
        ctx->state = state;
@@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 /*
  * get the file handle for the "/" directory on the server
  */
-static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-                             struct nfs_fsinfo *info)
+int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
+                        struct nfs_fsinfo *info)
 {
        int minor_version = server->nfs_client->cl_minorversion;
        int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
        return nfs4_map_errors(status);
 }
 
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
+                             struct nfs_fsinfo *info)
+{
+       int error;
+       struct nfs_fattr *fattr = info->fattr;
+
+       error = nfs4_server_capabilities(server, mntfh);
+       if (error < 0) {
+               dprintk("nfs4_get_root: getcaps error = %d\n", -error);
+               return error;
+       }
+
+       error = nfs4_proc_getattr(server, mntfh, fattr);
+       if (error < 0) {
+               dprintk("nfs4_get_root: getattr error = %d\n", -error);
+               return error;
+       }
+
+       if (fattr->valid & NFS_ATTR_FATTR_FSID &&
+           !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+               memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+
+       return error;
+}
+
 /*
  * Get locations and (maybe) other attributes of a referral.
  * Note that we'll actually follow the referral later when
@@ -2578,7 +2626,7 @@ out:
        return err;
 }
 
-static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
                            struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
        int status;
@@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                fmode = ctx->mode;
        }
        sattr->ia_mode &= ~current_umask();
-       state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+       state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
        d_drop(dentry);
        if (IS_ERR(state)) {
                status = PTR_ERR(state);
@@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
        struct nfs_removeargs args = {
                .fh = NFS_FH(dir),
                .name = *name,
-               .bitmask = server->attr_bitmask,
        };
        struct nfs_removeres res = {
                .server = server,
@@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
                .rpc_argp = &args,
                .rpc_resp = &res,
        };
-       int status = -ENOMEM;
-
-       res.dir_attr = nfs_alloc_fattr();
-       if (res.dir_attr == NULL)
-               goto out;
+       int status;
 
        status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
-       if (status == 0) {
+       if (status == 0)
                update_changeattr(dir, &res.cinfo);
-               nfs_post_op_update_inode(dir, res.dir_attr);
-       }
-       nfs_free_fattr(res.dir_attr);
-out:
        return status;
 }
 
@@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
        struct nfs_removeargs *args = msg->rpc_argp;
        struct nfs_removeres *res = msg->rpc_resp;
 
-       args->bitmask = server->cache_consistency_bitmask;
        res->server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
        nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
        if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
                return 0;
        update_changeattr(dir, &res->cinfo);
-       nfs_post_op_update_inode(dir, res->dir_attr);
        return 1;
 }
 
@@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
        struct nfs_renameres *res = msg->rpc_resp;
 
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
-       arg->bitmask = server->attr_bitmask;
        res->server = server;
        nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
 }
@@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
                return 0;
 
        update_changeattr(old_dir, &res->old_cinfo);
-       nfs_post_op_update_inode(old_dir, res->old_fattr);
        update_changeattr(new_dir, &res->new_cinfo);
-       nfs_post_op_update_inode(new_dir, res->new_fattr);
        return 1;
 }
 
@@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
                .new_dir = NFS_FH(new_dir),
                .old_name = old_name,
                .new_name = new_name,
-               .bitmask = server->attr_bitmask,
        };
        struct nfs_renameres res = {
                .server = server,
@@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
        };
        int status = -ENOMEM;
        
-       res.old_fattr = nfs_alloc_fattr();
-       res.new_fattr = nfs_alloc_fattr();
-       if (res.old_fattr == NULL || res.new_fattr == NULL)
-               goto out;
-
        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
        if (!status) {
                update_changeattr(old_dir, &res.old_cinfo);
-               nfs_post_op_update_inode(old_dir, res.old_fattr);
                update_changeattr(new_dir, &res.new_cinfo);
-               nfs_post_op_update_inode(new_dir, res.new_fattr);
        }
-out:
-       nfs_free_fattr(res.new_fattr);
-       nfs_free_fattr(res.old_fattr);
        return status;
 }
 
@@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
        int status = -ENOMEM;
 
        res.fattr = nfs_alloc_fattr();
-       res.dir_attr = nfs_alloc_fattr();
-       if (res.fattr == NULL || res.dir_attr == NULL)
+       if (res.fattr == NULL)
                goto out;
 
        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
        if (!status) {
                update_changeattr(dir, &res.cinfo);
-               nfs_post_op_update_inode(dir, res.dir_attr);
                nfs_post_op_update_inode(inode, res.fattr);
        }
 out:
-       nfs_free_fattr(res.dir_attr);
        nfs_free_fattr(res.fattr);
        return status;
 }
@@ -3002,7 +3022,6 @@ struct nfs4_createdata {
        struct nfs4_create_res res;
        struct nfs_fh fh;
        struct nfs_fattr fattr;
-       struct nfs_fattr dir_fattr;
 };
 
 static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
                data->res.server = server;
                data->res.fh = &data->fh;
                data->res.fattr = &data->fattr;
-               data->res.dir_fattr = &data->dir_fattr;
                nfs_fattr_init(data->res.fattr);
-               nfs_fattr_init(data->res.dir_fattr);
        }
        return data;
 }
@@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
                                    &data->arg.seq_args, &data->res.seq_res, 1);
        if (status == 0) {
                update_changeattr(dir, &data->res.dir_cinfo);
-               nfs_post_op_update_inode(dir, data->res.dir_fattr);
                status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
        }
        return status;
@@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 void __nfs4_read_done_cb(struct nfs_read_data *data)
 {
-       nfs_invalidate_atime(data->inode);
+       nfs_invalidate_atime(data->header->inode);
 }
 
 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_server *server = NFS_SERVER(data->inode);
+       struct nfs_server *server = NFS_SERVER(data->header->inode);
 
        if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
                rpc_restart_call_prepare(task);
@@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
 
 static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
-       if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+       if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
                                &data->args.seq_args,
                                &data->res.seq_res,
                                task))
@@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
        rpc_call_start(task);
 }
 
-/* Reset the the nfs_read_data to send the read to the MDS. */
-void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
-{
-       dprintk("%s Reset task for i/o through\n", __func__);
-       put_lseg(data->lseg);
-       data->lseg = NULL;
-       /* offsets will differ in the dense stripe case */
-       data->args.offset = data->mds_offset;
-       data->ds_clp = NULL;
-       data->args.fh     = NFS_FH(data->inode);
-       data->read_done_cb = nfs4_read_done_cb;
-       task->tk_ops = data->mds_ops;
-       rpc_task_reset_client(task, NFS_CLIENT(data->inode));
-}
-EXPORT_SYMBOL_GPL(nfs4_reset_read);
-
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct inode *inode = data->inode;
+       struct inode *inode = data->header->inode;
        
        if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
                rpc_restart_call_prepare(task);
@@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
        }
        if (task->tk_status >= 0) {
                renew_lease(NFS_SERVER(inode), data->timestamp);
-               nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+               nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
        }
        return 0;
 }
@@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
                nfs4_write_done_cb(task, data);
 }
 
-/* Reset the the nfs_write_data to send the write to the MDS. */
-void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+static
+bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
 {
-       dprintk("%s Reset task for i/o through\n", __func__);
-       put_lseg(data->lseg);
-       data->lseg          = NULL;
-       data->ds_clp        = NULL;
-       data->write_done_cb = nfs4_write_done_cb;
-       data->args.fh       = NFS_FH(data->inode);
-       data->args.bitmask  = data->res.server->cache_consistency_bitmask;
-       data->args.offset   = data->mds_offset;
-       data->res.fattr     = &data->fattr;
-       task->tk_ops        = data->mds_ops;
-       rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+       const struct nfs_pgio_header *hdr = data->header;
+
+       /* Don't request attributes for pNFS or O_DIRECT writes */
+       if (data->ds_clp != NULL || hdr->dreq != NULL)
+               return false;
+       /* Otherwise, request attributes if and only if we don't hold
+        * a delegation
+        */
+       return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
-EXPORT_SYMBOL_GPL(nfs4_reset_write);
 
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-       struct nfs_server *server = NFS_SERVER(data->inode);
+       struct nfs_server *server = NFS_SERVER(data->header->inode);
 
-       if (data->lseg) {
+       if (!nfs4_write_need_cache_consistency_data(data)) {
                data->args.bitmask = NULL;
                data->res.fattr = NULL;
        } else
                data->args.bitmask = server->cache_consistency_bitmask;
+
        if (!data->write_done_cb)
                data->write_done_cb = nfs4_write_done_cb;
        data->res.server = server;
@@ -3458,6 +3456,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 }
 
 static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+       if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+                               &data->args.seq_args,
+                               &data->res.seq_res,
+                               task))
+               return;
+       rpc_call_start(task);
+}
+
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
        if (nfs4_setup_sequence(NFS_SERVER(data->inode),
                                &data->args.seq_args,
@@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
        rpc_call_start(task);
 }
 
-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
 {
        struct inode *inode = data->inode;
 
@@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
                rpc_restart_call_prepare(task);
                return -EAGAIN;
        }
-       nfs_refresh_inode(inode, data->res.fattr);
        return 0;
 }
 
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
        if (!nfs4_sequence_done(task, &data->res.seq_res))
                return -EAGAIN;
-       return data->write_done_cb(task, data);
+       return data->commit_done_cb(task, data);
 }
 
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
 
-       if (data->lseg) {
-               data->args.bitmask = NULL;
-               data->res.fattr = NULL;
-       } else
-               data->args.bitmask = server->cache_consistency_bitmask;
-       if (!data->write_done_cb)
-               data->write_done_cb = nfs4_commit_done_cb;
+       if (data->commit_done_cb == NULL)
+               data->commit_done_cb = nfs4_commit_done_cb;
        data->res.server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
        nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
                case -NFS4ERR_SEQ_MISORDERED:
                        dprintk("%s ERROR %d, Reset session\n", __func__,
                                task->tk_status);
-                       nfs4_schedule_session_recovery(clp->cl_session);
+                       nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
                        task->tk_status = 0;
                        return -EAGAIN;
 #endif /* CONFIG_NFS_V4_1 */
@@ -3931,13 +3933,21 @@ wait_on_recovery:
        return -EAGAIN;
 }
 
-static void nfs4_construct_boot_verifier(struct nfs_client *clp,
-                                        nfs4_verifier *bootverf)
+static void nfs4_init_boot_verifier(const struct nfs_client *clp,
+                                   nfs4_verifier *bootverf)
 {
        __be32 verf[2];
 
-       verf[0] = htonl((u32)clp->cl_boot_time.tv_sec);
-       verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec);
+       if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+               /* An impossible timestamp guarantees this value
+                * will never match a generated boot time. */
+               verf[0] = 0;
+               verf[1] = (__be32)(NSEC_PER_SEC + 1);
+       } else {
+               struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+               verf[0] = (__be32)nn->boot_time.tv_sec;
+               verf[1] = (__be32)nn->boot_time.tv_nsec;
+       }
        memcpy(bootverf->data, verf, sizeof(bootverf->data));
 }
 
@@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
        int loop = 0;
        int status;
 
-       nfs4_construct_boot_verifier(clp, &sc_verifier);
+       nfs4_init_boot_verifier(clp, &sc_verifier);
 
        for(;;) {
                rcu_read_lock();
@@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
        data->args.fhandle = &data->fh;
        data->args.stateid = &data->stateid;
-       data->args.bitmask = server->attr_bitmask;
+       data->args.bitmask = server->cache_consistency_bitmask;
        nfs_copy_fh(&data->fh, NFS_FH(inode));
        nfs4_stateid_copy(&data->stateid, stateid);
        data->res.fattr = &data->fattr;
@@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        if (status != 0)
                goto out;
        status = data->rpc_status;
-       if (status != 0)
-               goto out;
-       nfs_refresh_inode(inode, &data->fattr);
+       if (status == 0)
+               nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+       else
+               nfs_refresh_inode(inode, &data->fattr);
 out:
        rpc_put_task(task);
        return status;
@@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
-                               nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+                               nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
                                goto out;
                        case -ERESTARTSYS:
                                /*
@@ -5079,7 +5090,8 @@ out_inval:
 }
 
 static bool
-nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+nfs41_same_server_scope(struct nfs41_server_scope *a,
+                       struct nfs41_server_scope *b)
 {
        if (a->server_scope_sz == b->server_scope_sz &&
            memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5088,6 +5100,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
        return false;
 }
 
+/*
+ * nfs4_proc_bind_conn_to_session()
+ *
+ * The 4.1 client currently uses the same TCP connection for the
+ * fore and backchannel.
+ */
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+{
+       int status;
+       struct nfs41_bind_conn_to_session_res res;
+       struct rpc_message msg = {
+               .rpc_proc =
+                       &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
+               .rpc_argp = clp,
+               .rpc_resp = &res,
+               .rpc_cred = cred,
+       };
+
+       dprintk("--> %s\n", __func__);
+       BUG_ON(clp == NULL);
+
+       res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+       if (unlikely(res.session == NULL)) {
+               status = -ENOMEM;
+               goto out;
+       }
+
+       status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+       if (status == 0) {
+               if (memcmp(res.session->sess_id.data,
+                   clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
+                       dprintk("NFS: %s: Session ID mismatch\n", __func__);
+                       status = -EIO;
+                       goto out_session;
+               }
+               if (res.dir != NFS4_CDFS4_BOTH) {
+                       dprintk("NFS: %s: Unexpected direction from server\n",
+                               __func__);
+                       status = -EIO;
+                       goto out_session;
+               }
+               if (res.use_conn_in_rdma_mode) {
+                       dprintk("NFS: %s: Server returned RDMA mode = true\n",
+                               __func__);
+                       status = -EIO;
+                       goto out_session;
+               }
+       }
+out_session:
+       kfree(res.session);
+out:
+       dprintk("<-- %s status= %d\n", __func__, status);
+       return status;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
@@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
        };
        struct nfs41_exchange_id_res res = {
-               .client = clp,
+               0
        };
        int status;
        struct rpc_message msg = {
@@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
        dprintk("--> %s\n", __func__);
        BUG_ON(clp == NULL);
 
-       nfs4_construct_boot_verifier(clp, &verifier);
+       nfs4_init_boot_verifier(clp, &verifier);
 
        args.id_len = scnprintf(args.id, sizeof(args.id),
                                "%s/%s/%u",
@@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                                clp->cl_rpcclient->cl_nodename,
                                clp->cl_rpcclient->cl_auth->au_flavor);
 
-       res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
-       if (unlikely(!res.server_scope)) {
+       res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
+                                       GFP_NOFS);
+       if (unlikely(res.server_owner == NULL)) {
                status = -ENOMEM;
                goto out;
        }
 
-       res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
-       if (unlikely(!res.impl_id)) {
+       res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
+                                       GFP_NOFS);
+       if (unlikely(res.server_scope == NULL)) {
+               status = -ENOMEM;
+               goto out_server_owner;
+       }
+
+       res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
+       if (unlikely(res.impl_id == NULL)) {
                status = -ENOMEM;
                goto out_server_scope;
        }
 
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
-       if (!status)
-               status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+       if (status == 0)
+               status = nfs4_check_cl_exchange_flags(res.flags);
+
+       if (status == 0) {
+               clp->cl_clientid = res.clientid;
+               clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
+               if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+                       clp->cl_seqid = res.seqid;
+
+               kfree(clp->cl_serverowner);
+               clp->cl_serverowner = res.server_owner;
+               res.server_owner = NULL;
 
-       if (!status) {
                /* use the most recent implementation id */
-               kfree(clp->impl_id);
-               clp->impl_id = res.impl_id;
-       } else
-               kfree(res.impl_id);
+               kfree(clp->cl_implid);
+               clp->cl_implid = res.impl_id;
 
-       if (!status) {
-               if (clp->server_scope &&
-                   !nfs41_same_server_scope(clp->server_scope,
+               if (clp->cl_serverscope != NULL &&
+                   !nfs41_same_server_scope(clp->cl_serverscope,
                                             res.server_scope)) {
                        dprintk("%s: server_scope mismatch detected\n",
                                __func__);
                        set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
-                       kfree(clp->server_scope);
-                       clp->server_scope = NULL;
+                       kfree(clp->cl_serverscope);
+                       clp->cl_serverscope = NULL;
                }
 
-               if (!clp->server_scope) {
-                       clp->server_scope = res.server_scope;
+               if (clp->cl_serverscope == NULL) {
+                       clp->cl_serverscope = res.server_scope;
                        goto out;
                }
-       }
+       } else
+               kfree(res.impl_id);
 
+out_server_owner:
+       kfree(res.server_owner);
 out_server_scope:
        kfree(res.server_scope);
 out:
-       if (clp->impl_id)
+       if (clp->cl_implid != NULL)
                dprintk("%s: Server Implementation ID: "
                        "domain: %s, name: %s, date: %llu,%u\n",
-                       __func__, clp->impl_id->domain, clp->impl_id->name,
-                       clp->impl_id->date.seconds,
-                       clp->impl_id->date.nseconds);
+                       __func__, clp->cl_implid->domain, clp->cl_implid->name,
+                       clp->cl_implid->date.seconds,
+                       clp->cl_implid->date.nseconds);
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
 
+static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
+               struct rpc_cred *cred)
+{
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
+               .rpc_argp = clp,
+               .rpc_cred = cred,
+       };
+       int status;
+
+       status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+       if (status)
+               pr_warn("NFS: Got error %d from the server %s on "
+                       "DESTROY_CLIENTID.", status, clp->cl_hostname);
+       return status;
+}
+
+static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
+               struct rpc_cred *cred)
+{
+       unsigned int loop;
+       int ret;
+
+       for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
+               ret = _nfs4_proc_destroy_clientid(clp, cred);
+               switch (ret) {
+               case -NFS4ERR_DELAY:
+               case -NFS4ERR_CLIENTID_BUSY:
+                       ssleep(1);
+                       break;
+               default:
+                       return ret;
+               }
+       }
+       return 0;
+}
+
+int nfs4_destroy_clientid(struct nfs_client *clp)
+{
+       struct rpc_cred *cred;
+       int ret = 0;
+
+       if (clp->cl_mvops->minor_version < 1)
+               goto out;
+       if (clp->cl_exchange_flags == 0)
+               goto out;
+       cred = nfs4_get_exchange_id_cred(clp);
+       ret = nfs4_proc_destroy_clientid(clp, cred);
+       if (cred)
+               put_rpccred(cred);
+       switch (ret) {
+       case 0:
+       case -NFS4ERR_STALE_CLIENTID:
+               clp->cl_exchange_flags = 0;
+       }
+out:
+       return ret;
+}
+
 struct nfs4_get_lease_time_data {
        struct nfs4_get_lease_time_args *args;
        struct nfs4_get_lease_time_res *res;
@@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 void nfs4_destroy_session(struct nfs4_session *session)
 {
        struct rpc_xprt *xprt;
+       struct rpc_cred *cred;
 
-       nfs4_proc_destroy_session(session);
+       cred = nfs4_get_exchange_id_cred(session->clp);
+       nfs4_proc_destroy_session(session, cred);
+       if (cred)
+               put_rpccred(cred);
 
        rcu_read_lock();
        xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
@@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
        return nfs4_verify_back_channel_attrs(args, session);
 }
 
-static int _nfs4_proc_create_session(struct nfs_client *clp)
+static int _nfs4_proc_create_session(struct nfs_client *clp,
+               struct rpc_cred *cred)
 {
        struct nfs4_session *session = clp->cl_session;
        struct nfs41_create_session_args args = {
@@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
                .rpc_argp = &args,
                .rpc_resp = &res,
+               .rpc_cred = cred,
        };
        int status;
 
@@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
  * It is the responsibility of the caller to verify the session is
  * expired before calling this routine.
  */
-int nfs4_proc_create_session(struct nfs_client *clp)
+int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
 {
        int status;
        unsigned *ptr;
@@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)
 
        dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
 
-       status = _nfs4_proc_create_session(clp);
+       status = _nfs4_proc_create_session(clp, cred);
        if (status)
                goto out;
 
@@ -5578,10 +5727,15 @@ out:
  * Issue the over-the-wire RPC DESTROY_SESSION.
  * The caller must serialize access to this routine.
  */
-int nfs4_proc_destroy_session(struct nfs4_session *session)
+int nfs4_proc_destroy_session(struct nfs4_session *session,
+               struct rpc_cred *cred)
 {
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
+               .rpc_argp = session,
+               .rpc_cred = cred,
+       };
        int status = 0;
-       struct rpc_message msg;
 
        dprintk("--> nfs4_proc_destroy_session\n");
 
@@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
        if (session->clp->cl_cons_state != NFS_CS_READY)
                return status;
 
-       msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
-       msg.rpc_argp = session;
-       msg.rpc_resp = NULL;
-       msg.rpc_cred = NULL;
        status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 
        if (status)
@@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
        return status;
 }
 
+/*
+ * With sessions, the client is not marked ready until after a
+ * successful EXCHANGE_ID and CREATE_SESSION.
+ *
+ * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
+ * other versions of NFS can be tried.
+ */
+static int nfs41_check_session_ready(struct nfs_client *clp)
+{
+       int ret;
+       
+       if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+               ret = nfs4_client_recover_expired_lease(clp);
+               if (ret)
+                       return ret;
+       }
+       if (clp->cl_cons_state < NFS_CS_READY)
+               return -EPROTONOSUPPORT;
+       smp_rmb();
+       return 0;
+}
+
 int nfs4_init_session(struct nfs_server *server)
 {
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_session *session;
        unsigned int rsize, wsize;
-       int ret;
 
        if (!nfs4_has_session(clp))
                return 0;
 
        session = clp->cl_session;
-       if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
-               return 0;
+       spin_lock(&clp->cl_lock);
+       if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
 
-       rsize = server->rsize;
-       if (rsize == 0)
-               rsize = NFS_MAX_FILE_IO_SIZE;
-       wsize = server->wsize;
-       if (wsize == 0)
-               wsize = NFS_MAX_FILE_IO_SIZE;
+               rsize = server->rsize;
+               if (rsize == 0)
+                       rsize = NFS_MAX_FILE_IO_SIZE;
+               wsize = server->wsize;
+               if (wsize == 0)
+                       wsize = NFS_MAX_FILE_IO_SIZE;
 
-       session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
-       session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+               session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+               session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+       }
+       spin_unlock(&clp->cl_lock);
 
-       ret = nfs4_recover_expired_lease(server);
-       if (!ret)
-               ret = nfs4_check_client_ready(clp);
-       return ret;
+       return nfs41_check_session_ready(clp);
 }
 
-int nfs4_init_ds_session(struct nfs_client *clp)
+int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
 {
        struct nfs4_session *session = clp->cl_session;
        int ret;
 
-       if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
-               return 0;
-
-       ret = nfs4_client_recover_expired_lease(clp);
-       if (!ret)
-               /* Test for the DS role */
-               if (!is_ds_client(clp))
-                       ret = -ENODEV;
-       if (!ret)
-               ret = nfs4_check_client_ready(clp);
-       return ret;
+       spin_lock(&clp->cl_lock);
+       if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
+               /*
+                * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
+                * DS lease to be equal to the MDS lease.
+                */
+               clp->cl_lease_time = lease_time;
+               clp->cl_last_renewal = jiffies;
+       }
+       spin_unlock(&clp->cl_lock);
 
+       ret = nfs41_check_session_ready(clp);
+       if (ret)
+               return ret;
+       /* Test for the DS role */
+       if (!is_ds_client(clp))
+               return -ENODEV;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
 
@@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .file_inode_ops = &nfs4_file_inode_operations,
        .file_ops       = &nfs4_file_operations,
        .getroot        = nfs4_proc_get_root,
+       .submount       = nfs4_submount,
        .getattr        = nfs4_proc_getattr,
        .setattr        = nfs4_proc_setattr,
        .lookup         = nfs4_proc_lookup,
@@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
        .write_done     = nfs4_write_done,
        .commit_setup   = nfs4_proc_commit_setup,
+       .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
        .commit_done    = nfs4_commit_done,
        .lock           = nfs4_proc_lock,
        .clear_acl_cache = nfs4_zap_acl_attr,
        .close_context  = nfs4_close_context,
        .open_context   = nfs4_atomic_open,
        .init_client    = nfs4_init_client,
-       .secinfo        = nfs4_proc_secinfo,
 };
 
 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
index dc484c0eae7f9706716e4a73cabe03857dd3ae15..6930bec91bca22a8f8f7cf0548dcfe9cc6964762 100644 (file)
@@ -49,7 +49,7 @@
 #include "nfs4_fs.h"
 #include "delegation.h"
 
-#define NFSDBG_FACILITY        NFSDBG_PROC
+#define NFSDBG_FACILITY                NFSDBG_STATE
 
 void
 nfs4_renew_state(struct work_struct *work)
index 7f0fcfc1fe9db51e9bc3748f511163dfed7cdce7..c679b9ecef634c80d4738e3cc2a9624f51c327c2 100644 (file)
@@ -57,6 +57,8 @@
 #include "internal.h"
 #include "pnfs.h"
 
+#define NFSDBG_FACILITY                NFSDBG_STATE
+
 #define OPENOWNER_POOL_SIZE    8
 
 const nfs4_stateid zero_stateid;
@@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
                goto out;
        set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 do_confirm:
-       status = nfs4_proc_create_session(clp);
+       status = nfs4_proc_create_session(clp, cred);
        if (status != 0)
                goto out;
        clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
                return;
        if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
                set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+       dprintk("%s: scheduling lease recovery for server %s\n", __func__,
+                       clp->cl_hostname);
        nfs4_schedule_state_manager(clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
@@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
 {
        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
        nfs_expire_all_delegations(clp);
+       dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
+                       clp->cl_hostname);
 }
 
 void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
@@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
        struct nfs_client *clp = server->nfs_client;
 
        nfs4_state_mark_reclaim_nograce(clp, state);
+       dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
+                       clp->cl_hostname);
        nfs4_schedule_state_manager(clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                case -NFS4ERR_BADSLOT:
                case -NFS4ERR_BAD_HIGH_SLOT:
                case -NFS4ERR_DEADSESSION:
-               case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                case -NFS4ERR_SEQ_FALSE_RETRY:
                case -NFS4ERR_SEQ_MISORDERED:
                        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
                        /* Zero session reset errors */
                        break;
+               case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+                       set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+                       break;
                case -EKEYEXPIRED:
                        /* Nothing we can do */
                        nfs4_warn_keyexpired(clp->cl_hostname);
                        break;
                default:
+                       dprintk("%s: failed to handle error %d for server %s\n",
+                                       __func__, error, clp->cl_hostname);
                        return error;
        }
+       dprintk("%s: handled error %d for server %s\n", __func__, error,
+                       clp->cl_hostname);
        return 0;
 }
 
@@ -1572,34 +1586,82 @@ out:
        return nfs4_recovery_handle_error(clp, status);
 }
 
+/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
+ * on EXCHANGE_ID for v4.1
+ */
+static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
+{
+       switch (status) {
+       case -NFS4ERR_SEQ_MISORDERED:
+               if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
+                       return -ESERVERFAULT;
+               /* Lease confirmation error: retry after purging the lease */
+               ssleep(1);
+       case -NFS4ERR_CLID_INUSE:
+       case -NFS4ERR_STALE_CLIENTID:
+               clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+               break;
+       case -EACCES:
+               if (clp->cl_machine_cred == NULL)
+                       return -EACCES;
+               /* Handle case where the user hasn't set up machine creds */
+               nfs4_clear_machine_cred(clp);
+       case -NFS4ERR_DELAY:
+       case -ETIMEDOUT:
+       case -EAGAIN:
+               ssleep(1);
+               break;
+
+       case -NFS4ERR_MINOR_VERS_MISMATCH:
+               if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+                       nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
+               dprintk("%s: exit with error %d for server %s\n",
+                               __func__, -EPROTONOSUPPORT, clp->cl_hostname);
+               return -EPROTONOSUPPORT;
+       case -EKEYEXPIRED:
+               nfs4_warn_keyexpired(clp->cl_hostname);
+       case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+                                * in nfs4_exchange_id */
+       default:
+               dprintk("%s: exit with error %d for server %s\n", __func__,
+                               status, clp->cl_hostname);
+               return status;
+       }
+       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+       dprintk("%s: handled error %d for server %s\n", __func__, status,
+                       clp->cl_hostname);
+       return 0;
+}
+
 static int nfs4_reclaim_lease(struct nfs_client *clp)
 {
        struct rpc_cred *cred;
        const struct nfs4_state_recovery_ops *ops =
                clp->cl_mvops->reboot_recovery_ops;
-       int status = -ENOENT;
+       int status;
 
        cred = ops->get_clid_cred(clp);
-       if (cred != NULL) {
-               status = ops->establish_clid(clp, cred);
-               put_rpccred(cred);
-               /* Handle case where the user hasn't set up machine creds */
-               if (status == -EACCES && cred == clp->cl_machine_cred) {
-                       nfs4_clear_machine_cred(clp);
-                       status = -EAGAIN;
-               }
-               if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
-                       status = -EPROTONOSUPPORT;
-       }
-       return status;
+       if (cred == NULL)
+               return -ENOENT;
+       status = ops->establish_clid(clp, cred);
+       put_rpccred(cred);
+       if (status != 0)
+               return nfs4_handle_reclaim_lease_error(clp, status);
+       return 0;
 }
 
 #ifdef CONFIG_NFS_V4_1
-void nfs4_schedule_session_recovery(struct nfs4_session *session)
+void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
 {
        struct nfs_client *clp = session->clp;
 
-       set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+       switch (err) {
+       default:
+               set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+               break;
+       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+               set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+       }
        nfs4_schedule_lease_recovery(clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
 void nfs41_handle_recall_slot(struct nfs_client *clp)
 {
        set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+       dprintk("%s: scheduling slot recall for server %s\n", __func__,
+                       clp->cl_hostname);
        nfs4_schedule_state_manager(clp);
 }
 
 static void nfs4_reset_all_state(struct nfs_client *clp)
 {
        if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
-               clp->cl_boot_time = CURRENT_TIME;
+               set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+               clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
                nfs4_state_start_reclaim_nograce(clp);
+               dprintk("%s: scheduling reset of all state for server %s!\n",
+                               __func__, clp->cl_hostname);
                nfs4_schedule_state_manager(clp);
        }
 }
@@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
 {
        if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
                nfs4_state_start_reclaim_reboot(clp);
+               dprintk("%s: server %s rebooted!\n", __func__,
+                               clp->cl_hostname);
                nfs4_schedule_state_manager(clp);
        }
 }
 
 static void nfs41_handle_state_revoked(struct nfs_client *clp)
 {
-       /* Temporary */
        nfs4_reset_all_state(clp);
+       dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
 }
 
 static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
 {
        /* This will need to handle layouts too */
        nfs_expire_all_delegations(clp);
+       dprintk("%s: Recallable state revoked on server %s!\n", __func__,
+                       clp->cl_hostname);
 }
 
-static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
 {
        nfs_expire_all_delegations(clp);
        if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
                nfs4_schedule_state_manager(clp);
+       dprintk("%s: server %s declared a backchannel fault\n", __func__,
+                       clp->cl_hostname);
+}
+
+static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+{
+       if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+               &clp->cl_state) == 0)
+               nfs4_schedule_state_manager(clp);
 }
 
 void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
 {
        if (!flags)
                return;
+
+       dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
+               __func__, clp->cl_hostname, clp->cl_clientid, flags);
+
        if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
                nfs41_handle_server_reboot(clp);
        if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
                nfs41_handle_state_revoked(clp);
        if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
                nfs41_handle_recallable_state_revoked(clp);
-       if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
-                           SEQ4_STATUS_BACKCHANNEL_FAULT |
-                           SEQ4_STATUS_CB_PATH_DOWN_SESSION))
+       if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
+               nfs41_handle_backchannel_fault(clp);
+       else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+                               SEQ4_STATUS_CB_PATH_DOWN_SESSION))
                nfs41_handle_cb_path_down(clp);
 }
 
 static int nfs4_reset_session(struct nfs_client *clp)
 {
+       struct rpc_cred *cred;
        int status;
 
        nfs4_begin_drain_session(clp);
-       status = nfs4_proc_destroy_session(clp->cl_session);
+       cred = nfs4_get_exchange_id_cred(clp);
+       status = nfs4_proc_destroy_session(clp->cl_session, cred);
        if (status && status != -NFS4ERR_BADSESSION &&
            status != -NFS4ERR_DEADSESSION) {
                status = nfs4_recovery_handle_error(clp, status);
@@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)
        }
 
        memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
-       status = nfs4_proc_create_session(clp);
+       status = nfs4_proc_create_session(clp, cred);
        if (status) {
-               status = nfs4_recovery_handle_error(clp, status);
+               dprintk("%s: session reset failed with status %d for server %s!\n",
+                       __func__, status, clp->cl_hostname);
+               status = nfs4_handle_reclaim_lease_error(clp, status);
                goto out;
        }
        clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
        /* create_session negotiated new slot table */
        clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+       clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+       dprintk("%s: session reset was successful for server %s!\n",
+                       __func__, clp->cl_hostname);
 
         /* Let the state manager reestablish state */
        if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
                nfs41_setup_state_renewal(clp);
 out:
+       if (cred)
+               put_rpccred(cred);
        return status;
 }
 
@@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
        return 0;
 }
 
-#else /* CONFIG_NFS_V4_1 */
-static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
-static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
-static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
-#endif /* CONFIG_NFS_V4_1 */
-
-/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
- * on EXCHANGE_ID for v4.1
- */
-static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
 {
-       switch (status) {
-       case -NFS4ERR_CLID_INUSE:
-       case -NFS4ERR_STALE_CLIENTID:
-               clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+       struct rpc_cred *cred;
+       int ret;
+
+       nfs4_begin_drain_session(clp);
+       cred = nfs4_get_exchange_id_cred(clp);
+       ret = nfs4_proc_bind_conn_to_session(clp, cred);
+       if (cred)
+               put_rpccred(cred);
+       clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+       switch (ret) {
+       case 0:
+               dprintk("%s: bind_conn_to_session was successful for server %s!\n",
+                       __func__, clp->cl_hostname);
                break;
        case -NFS4ERR_DELAY:
-       case -ETIMEDOUT:
-       case -EAGAIN:
                ssleep(1);
+               set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
                break;
-
-       case -EKEYEXPIRED:
-               nfs4_warn_keyexpired(clp->cl_hostname);
-       case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
-                                * in nfs4_exchange_id */
        default:
-               return;
+               return nfs4_recovery_handle_error(clp, ret);
        }
-       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+       return 0;
 }
+#else /* CONFIG_NFS_V4_1 */
+static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
+static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
+
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
+{
+       return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
 
 static void nfs4_state_manager(struct nfs_client *clp)
 {
@@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
 
        /* Ensure exclusive access to NFSv4 state */
        do {
+               if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+                       status = nfs4_reclaim_lease(clp);
+                       if (status < 0)
+                               goto out_error;
+                       clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+                       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+               }
+
                if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
                        /* We're going to have to re-establish a clientid */
                        status = nfs4_reclaim_lease(clp);
-                       if (status) {
-                               nfs4_set_lease_expired(clp, status);
-                               if (test_bit(NFS4CLNT_LEASE_EXPIRED,
-                                                       &clp->cl_state))
-                                       continue;
-                               if (clp->cl_cons_state ==
-                                                       NFS_CS_SESSION_INITING)
-                                       nfs_mark_client_ready(clp, status);
+                       if (status < 0)
                                goto out_error;
-                       }
+                       if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+                               continue;
                        clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
 
                        if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
                                goto out_error;
                }
 
+               /* Send BIND_CONN_TO_SESSION */
+               if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+                               &clp->cl_state) && nfs4_has_session(clp)) {
+                       status = nfs4_bind_conn_to_session(clp);
+                       if (status < 0)
+                               goto out_error;
+                       continue;
+               }
+
                /* First recover reboot state... */
                if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
                        status = nfs4_do_reclaim(clp,
index c54aae364beebd38833151f97328c3edfe7c2337..ee4a74db95d0b1b7ea49e8dd1263f0504f2fffa8 100644 (file)
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
+
 #include "nfs4_fs.h"
 #include "internal.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY                NFSDBG_XDR
 
@@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);
 #define nfs4_path_maxsz                (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
 #define nfs4_owner_maxsz       (1 + XDR_QUADLEN(IDMAP_NAMESZ))
 #define nfs4_group_maxsz       (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-                               3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+                               3 + 3 + 3 + nfs4_owner_maxsz + \
+                               nfs4_group_maxsz + decode_mdsthreshold_maxsz))
 #define nfs4_fattr_maxsz       (nfs4_fattr_bitmap_maxsz + \
                                nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);
                                     1 /* csr_flags */ + \
                                     decode_channel_attrs_maxsz + \
                                     decode_channel_attrs_maxsz)
+#define encode_bind_conn_to_session_maxsz  (op_encode_hdr_maxsz + \
+                                    /* bctsa_sessid */ \
+                                    XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+                                    1 /* bctsa_dir */ + \
+                                    1 /* bctsa_use_conn_in_rdma_mode */)
+#define decode_bind_conn_to_session_maxsz  (op_decode_hdr_maxsz +      \
+                                    /* bctsr_sessid */ \
+                                    XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+                                    1 /* bctsr_dir */ + \
+                                    1 /* bctsr_use_conn_in_rdma_mode */)
 #define encode_destroy_session_maxsz    (op_encode_hdr_maxsz + 4)
 #define decode_destroy_session_maxsz    (op_decode_hdr_maxsz)
+#define encode_destroy_clientid_maxsz   (op_encode_hdr_maxsz + 2)
+#define decode_destroy_clientid_maxsz   (op_decode_hdr_maxsz)
 #define encode_sequence_maxsz  (op_encode_hdr_maxsz + \
                                XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
 #define decode_sequence_maxsz  (op_decode_hdr_maxsz + \
@@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_commit_sz     (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
-                               encode_commit_maxsz + \
-                               encode_getattr_maxsz)
+                               encode_commit_maxsz)
 #define NFS4_dec_commit_sz     (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
-                               decode_commit_maxsz + \
-                               decode_getattr_maxsz)
+                               decode_commit_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
-                               encode_savefh_maxsz + \
                                encode_open_maxsz + \
                                encode_getfh_maxsz + \
-                               encode_getattr_maxsz + \
-                               encode_restorefh_maxsz + \
                                encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
-                               decode_savefh_maxsz + \
                                decode_open_maxsz + \
                                decode_getfh_maxsz + \
-                               decode_getattr_maxsz + \
-                               decode_restorefh_maxsz + \
                                decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz \
                                (compound_encode_hdr_maxsz + \
@@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_remove_sz     (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
-                               encode_remove_maxsz + \
-                               encode_getattr_maxsz)
+                               encode_remove_maxsz)
 #define NFS4_dec_remove_sz     (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
-                               decode_remove_maxsz + \
-                               decode_getattr_maxsz)
+                               decode_remove_maxsz)
 #define NFS4_enc_rename_sz     (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
                                encode_savefh_maxsz + \
                                encode_putfh_maxsz + \
-                               encode_rename_maxsz + \
-                               encode_getattr_maxsz + \
-                               encode_restorefh_maxsz + \
-                               encode_getattr_maxsz)
+                               encode_rename_maxsz)
 #define NFS4_dec_rename_sz     (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
                                decode_savefh_maxsz + \
                                decode_putfh_maxsz + \
-                               decode_rename_maxsz + \
-                               decode_getattr_maxsz + \
-                               decode_restorefh_maxsz + \
-                               decode_getattr_maxsz)
+                               decode_rename_maxsz)
 #define NFS4_enc_link_sz       (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
                                encode_savefh_maxsz + \
                                encode_putfh_maxsz + \
                                encode_link_maxsz + \
-                               decode_getattr_maxsz + \
                                encode_restorefh_maxsz + \
-                               decode_getattr_maxsz)
+                               encode_getattr_maxsz)
 #define NFS4_dec_link_sz       (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
                                decode_savefh_maxsz + \
                                decode_putfh_maxsz + \
                                decode_link_maxsz + \
-                               decode_getattr_maxsz + \
                                decode_restorefh_maxsz + \
                                decode_getattr_maxsz)
 #define NFS4_enc_symlink_sz    (compound_encode_hdr_maxsz + \
@@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_create_sz     (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
                                encode_putfh_maxsz + \
-                               encode_savefh_maxsz + \
                                encode_create_maxsz + \
                                encode_getfh_maxsz + \
-                               encode_getattr_maxsz + \
-                               encode_restorefh_maxsz + \
                                encode_getattr_maxsz)
 #define NFS4_dec_create_sz     (compound_decode_hdr_maxsz + \
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
-                               decode_savefh_maxsz + \
                                decode_create_maxsz + \
                                decode_getfh_maxsz + \
-                               decode_getattr_maxsz + \
-                               decode_restorefh_maxsz + \
                                decode_getattr_maxsz)
 #define NFS4_enc_pathconf_sz   (compound_encode_hdr_maxsz + \
                                encode_sequence_maxsz + \
@@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);
                                decode_putfh_maxsz + \
                                decode_secinfo_maxsz)
 #if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_bind_conn_to_session_sz \
+                               (compound_encode_hdr_maxsz + \
+                                encode_bind_conn_to_session_maxsz)
+#define NFS4_dec_bind_conn_to_session_sz \
+                               (compound_decode_hdr_maxsz + \
+                                decode_bind_conn_to_session_maxsz)
 #define NFS4_enc_exchange_id_sz \
                                (compound_encode_hdr_maxsz + \
                                 encode_exchange_id_maxsz)
@@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);
                                         encode_destroy_session_maxsz)
 #define NFS4_dec_destroy_session_sz    (compound_decode_hdr_maxsz + \
                                         decode_destroy_session_maxsz)
+#define NFS4_enc_destroy_clientid_sz   (compound_encode_hdr_maxsz + \
+                                        encode_destroy_clientid_maxsz)
+#define NFS4_dec_destroy_clientid_sz   (compound_decode_hdr_maxsz + \
+                                        decode_destroy_clientid_maxsz)
 #define NFS4_enc_sequence_sz \
                                (compound_decode_hdr_maxsz + \
                                 encode_sequence_maxsz)
@@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
        encode_nfs4_stateid(xdr, arg->stateid);
 }
 
-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
@@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
                           bitmask[1] & nfs4_fattr_bitmap[1], hdr);
 }
 
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+                                struct compound_hdr *hdr)
+{
+       encode_getattr_three(xdr,
+                            bitmask[0] & nfs4_fattr_bitmap[0],
+                            bitmask[1] & nfs4_fattr_bitmap[1],
+                            bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+                            hdr);
+}
+
 static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
        encode_getattr_three(xdr,
@@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
 
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
+static void encode_bind_conn_to_session(struct xdr_stream *xdr,
+                                  struct nfs4_session *session,
+                                  struct compound_hdr *hdr)
+{
+       __be32 *p;
+
+       encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
+               decode_bind_conn_to_session_maxsz, hdr);
+       encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+       p = xdr_reserve_space(xdr, 8);
+       *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
+       *p = 0; /* use_conn_in_rdma_mode = False */
+}
+
 static void encode_exchange_id(struct xdr_stream *xdr,
                               struct nfs41_exchange_id_args *args,
                               struct compound_hdr *hdr)
@@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,
        char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
        uint32_t len;
        struct nfs_client *clp = args->client;
+       struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
        u32 max_resp_sz_cached;
 
        /*
@@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,
        *p++ = cpu_to_be32(RPC_AUTH_UNIX);                      /* auth_sys */
 
        /* authsys_parms rfc1831 */
-       *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec);     /* stamp */
+       *p++ = (__be32)nn->boot_time.tv_nsec;           /* stamp */
        p = xdr_encode_opaque(p, machine_name, len);
        *p++ = cpu_to_be32(0);                          /* UID */
        *p++ = cpu_to_be32(0);                          /* GID */
@@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
        encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 }
 
+static void encode_destroy_clientid(struct xdr_stream *xdr,
+                                  uint64_t clientid,
+                                  struct compound_hdr *hdr)
+{
+       encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
+       encode_uint64(xdr, clientid);
+}
+
 static void encode_reclaim_complete(struct xdr_stream *xdr,
                                    struct nfs41_reclaim_complete_args *args,
                                    struct compound_hdr *hdr)
@@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fh, &hdr);
        encode_remove(xdr, &args->name, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_savefh(xdr, &hdr);
        encode_putfh(xdr, args->new_dir, &hdr);
        encode_rename(xdr, args->old_name, args->new_name, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
-       encode_restorefh(xdr, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_savefh(xdr, &hdr);
        encode_putfh(xdr, args->dir_fh, &hdr);
        encode_link(xdr, args->name, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
        encode_restorefh(xdr, &hdr);
        encode_getfattr(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
@@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_compound_hdr(xdr, req, &hdr);
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->dir_fh, &hdr);
-       encode_savefh(xdr, &hdr);
        encode_create(xdr, args, &hdr);
        encode_getfh(xdr, &hdr);
        encode_getfattr(xdr, args->bitmask, &hdr);
-       encode_restorefh(xdr, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_compound_hdr(xdr, req, &hdr);
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fh, &hdr);
-       encode_savefh(xdr, &hdr);
        encode_open(xdr, args, &hdr);
        encode_getfh(xdr, &hdr);
-       encode_getfattr(xdr, args->bitmask, &hdr);
-       encode_restorefh(xdr, &hdr);
-       encode_getfattr(xdr, args->dir_bitmask, &hdr);
+       encode_getfattr_open(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-                               struct nfs_writeargs *args)
+                               struct nfs_commitargs *args)
 {
        struct compound_hdr hdr = {
                .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fh, &hdr);
        encode_commit(xdr, args, &hdr);
-       if (args->bitmask)
-               encode_getfattr(xdr, args->bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
        encode_compound_hdr(xdr, req, &hdr);
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fhandle, &hdr);
-       encode_delegreturn(xdr, args->stateid, &hdr);
        encode_getfattr(xdr, args->bitmask, &hdr);
+       encode_delegreturn(xdr, args->stateid, &hdr);
        encode_nops(&hdr);
 }
 
@@ -2650,6 +2673,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ * BIND_CONN_TO_SESSION request
+ */
+static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
+                               struct xdr_stream *xdr,
+                               struct nfs_client *clp)
+{
+       struct compound_hdr hdr = {
+               .minorversion = clp->cl_mvops->minor_version,
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
+       encode_nops(&hdr);
+}
+
 /*
  * EXCHANGE_ID request
  */
@@ -2698,6 +2737,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
        encode_nops(&hdr);
 }
 
+/*
+ * a DESTROY_CLIENTID request
+ */
+static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
+                                        struct xdr_stream *xdr,
+                                        struct nfs_client *clp)
+{
+       struct compound_hdr hdr = {
+               .minorversion = clp->cl_mvops->minor_version,
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
+       encode_nops(&hdr);
+}
+
 /*
  * a SEQUENCE request
  */
@@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
        return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
 }
 
-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
 {
        int status;
 
@@ -4220,6 +4275,110 @@ xdr_error:
        return status;
 }
 
+static int decode_threshold_hint(struct xdr_stream *xdr,
+                                 uint32_t *bitmap,
+                                 uint64_t *res,
+                                 uint32_t hint_bit)
+{
+       __be32 *p;
+
+       *res = 0;
+       if (likely(bitmap[0] & hint_bit)) {
+               p = xdr_inline_decode(xdr, 8);
+               if (unlikely(!p))
+                       goto out_overflow;
+               xdr_decode_hyper(p, res);
+       }
+       return 0;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+                                       struct nfs4_threshold *res)
+{
+       __be32 *p, *savep;
+       uint32_t bitmap[3] = {0,}, attrlen;
+       int status;
+
+       /* layout type */
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p)) {
+               print_overflow_msg(__func__, xdr);
+               return -EIO;
+       }
+       res->l_type = be32_to_cpup(p);
+
+       /* thi_hintset bitmap */
+       status = decode_attr_bitmap(xdr, bitmap);
+       if (status < 0)
+               goto xdr_error;
+
+       /* thi_hintlist length */
+       status = decode_attr_length(xdr, &attrlen, &savep);
+       if (status < 0)
+               goto xdr_error;
+       /* thi_hintlist */
+       status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+       if (status < 0)
+               goto xdr_error;
+       status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+       if (status < 0)
+               goto xdr_error;
+       status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+                                      THRESHOLD_RD_IO);
+       if (status < 0)
+               goto xdr_error;
+       status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+                                      THRESHOLD_WR_IO);
+       if (status < 0)
+               goto xdr_error;
+
+       status = verify_attr_len(xdr, savep, attrlen);
+       res->bm = bitmap[0];
+
+       dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+                __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+               res->wr_io_sz);
+xdr_error:
+       dprintk("%s ret=%d!\n", __func__, status);
+       return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+                                   uint32_t *bitmap,
+                                   struct nfs4_threshold *res)
+{
+       __be32 *p;
+       int status = 0;
+       uint32_t num;
+
+       if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+               return -EIO;
+       if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               num = be32_to_cpup(p);
+               if (num == 0)
+                       return 0;
+               if (num > 1)
+                       printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+                               "drivers per filesystem not supported\n",
+                               __func__);
+
+               status = decode_first_threshold_item4(xdr, res);
+       }
+       return status;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
                struct nfs_fattr *fattr, struct nfs_fh *fh,
                struct nfs4_fs_locations *fs_loc,
@@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
                goto xdr_error;
        fattr->valid |= status;
 
+       status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+       if (status < 0)
+               goto xdr_error;
+
 xdr_error:
        dprintk("%s: xdr returned %d\n", __func__, -status);
        return status;
@@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        uint32_t dummy;
        char *dummy_str;
        int status;
-       struct nfs_client *clp = res->client;
        uint32_t impl_id_count;
 
        status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
                goto out_overflow;
-       xdr_decode_hyper(p, &clp->cl_clientid);
+       xdr_decode_hyper(p, &res->clientid);
        p = xdr_inline_decode(xdr, 12);
        if (unlikely(!p))
                goto out_overflow;
-       clp->cl_seqid = be32_to_cpup(p++);
-       clp->cl_exchange_flags = be32_to_cpup(p++);
+       res->seqid = be32_to_cpup(p++);
+       res->flags = be32_to_cpup(p++);
 
        /* We ask for SP4_NONE */
        dummy = be32_to_cpup(p);
        if (dummy != SP4_NONE)
                return -EIO;
 
-       /* Throw away minor_id */
+       /* server_owner4.so_minor_id */
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
                goto out_overflow;
+       p = xdr_decode_hyper(p, &res->server_owner->minor_id);
 
-       /* Throw away Major id */
+       /* server_owner4.so_major_id */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
                return status;
+       if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+               return -EIO;
+       memcpy(res->server_owner->major_id, dummy_str, dummy);
+       res->server_owner->major_id_sz = dummy;
 
-       /* Save server_scope */
+       /* server_scope4 */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
                return status;
-
        if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
                return -EIO;
-
        memcpy(res->server_scope->server_scope, dummy_str, dummy);
        res->server_scope->server_scope_sz = dummy;
 
@@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
        return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
 }
 
+static int decode_bind_conn_to_session(struct xdr_stream *xdr,
+                               struct nfs41_bind_conn_to_session_res *res)
+{
+       __be32 *p;
+       int status;
+
+       status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
+       if (!status)
+               status = decode_sessionid(xdr, &res->session->sess_id);
+       if (unlikely(status))
+               return status;
+
+       /* dir flags, rdma mode bool */
+       p = xdr_inline_decode(xdr, 8);
+       if (unlikely(!p))
+               goto out_overflow;
+
+       res->dir = be32_to_cpup(p++);
+       if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
+               return -EIO;
+       if (be32_to_cpup(p) == 0)
+               res->use_conn_in_rdma_mode = false;
+       else
+               res->use_conn_in_rdma_mode = true;
+
+       return 0;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
 static int decode_create_session(struct xdr_stream *xdr,
                                 struct nfs41_create_session_res *res)
 {
@@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
        return decode_op_hdr(xdr, OP_DESTROY_SESSION);
 }
 
+static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
+{
+       return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
+}
+
 static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
 {
        return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        if (status)
                goto out;
        status = decode_remove(xdr, &res->cinfo);
-       if (status)
-               goto out;
-       decode_getfattr(xdr, res->dir_attr, res->server);
 out:
        return status;
 }
@@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        if (status)
                goto out;
        status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
-       if (status)
-               goto out;
-       /* Current FH is target directory */
-       if (decode_getfattr(xdr, res->new_fattr, res->server))
-               goto out;
-       status = decode_restorefh(xdr);
-       if (status)
-               goto out;
-       decode_getfattr(xdr, res->old_fattr, res->server);
 out:
        return status;
 }
@@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
         * Note order: OP_LINK leaves the directory as the current
         *             filehandle.
         */
-       if (decode_getfattr(xdr, res->dir_attr, res->server))
-               goto out;
        status = decode_restorefh(xdr);
        if (status)
                goto out;
@@ -5902,9 +6089,6 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        if (status)
                goto out;
        status = decode_putfh(xdr);
-       if (status)
-               goto out;
-       status = decode_savefh(xdr);
        if (status)
                goto out;
        status = decode_create(xdr, &res->dir_cinfo);
@@ -5913,12 +6097,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        status = decode_getfh(xdr, res->fh);
        if (status)
                goto out;
-       if (decode_getfattr(xdr, res->fattr, res->server))
-               goto out;
-       status = decode_restorefh(xdr);
-       if (status)
-               goto out;
-       decode_getfattr(xdr, res->dir_fattr, res->server);
+       decode_getfattr(xdr, res->fattr, res->server);
 out:
        return status;
 }
@@ -6073,9 +6252,6 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        if (status)
                goto out;
        status = decode_putfh(xdr);
-       if (status)
-               goto out;
-       status = decode_savefh(xdr);
        if (status)
                goto out;
        status = decode_open(xdr, res);
@@ -6083,11 +6259,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
                goto out;
        if (decode_getfh(xdr, &res->fh) != 0)
                goto out;
-       if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
-               goto out;
-       if (decode_restorefh(xdr) != 0)
-               goto out;
-       decode_getfattr(xdr, res->dir_attr, res->server);
+       decode_getfattr(xdr, res->f_attr, res->server);
 out:
        return status;
 }
@@ -6353,7 +6525,7 @@ out:
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-                              struct nfs_writeres *res)
+                              struct nfs_commitres *res)
 {
        struct compound_hdr hdr;
        int status;
@@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        if (status)
                goto out;
        status = decode_commit(xdr, res);
-       if (status)
-               goto out;
-       if (res->fattr)
-               decode_getfattr(xdr, res->fattr, res->server);
 out:
        return status;
 }
@@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
        status = decode_putfh(xdr);
        if (status != 0)
                goto out;
-       status = decode_delegreturn(xdr);
+       status = decode_getfattr(xdr, res->fattr, res->server);
        if (status != 0)
                goto out;
-       decode_getfattr(xdr, res->fattr, res->server);
+       status = decode_delegreturn(xdr);
 out:
        return status;
 }
@@ -6590,6 +6758,22 @@ out:
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ * Decode BIND_CONN_TO_SESSION response
+ */
+static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
+                                       struct xdr_stream *xdr,
+                                       void *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (!status)
+               status = decode_bind_conn_to_session(xdr, res);
+       return status;
+}
+
 /*
  * Decode EXCHANGE_ID response
  */
@@ -6638,6 +6822,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
        return status;
 }
 
+/*
+ * Decode DESTROY_CLIENTID response
+ */
+static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
+                                       struct xdr_stream *xdr,
+                                       void *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (!status)
+               status = decode_destroy_clientid(xdr, res);
+       return status;
+}
+
 /*
  * Decode SEQUENCE response
  */
@@ -7085,6 +7285,9 @@ struct rpc_procinfo       nfs4_procedures[] = {
        PROC(TEST_STATEID,      enc_test_stateid,       dec_test_stateid),
        PROC(FREE_STATEID,      enc_free_stateid,       dec_free_stateid),
        PROC(GETDEVICELIST,     enc_getdevicelist,      dec_getdevicelist),
+       PROC(BIND_CONN_TO_SESSION,
+                       enc_bind_conn_to_session, dec_bind_conn_to_session),
+       PROC(DESTROY_CLIENTID,  enc_destroy_clientid,   dec_destroy_clientid),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
index 4bff4a3dab4602ffa8fe1f48df5d3adc3e8709c3..b47277baebab92930bee6c1fbac445fd8978a6b9 100644 (file)
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
        memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
 }
 
-int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
+static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
                       struct objio_segment **pseg)
 {
 /*     This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
 
 int objio_read_pagelist(struct nfs_read_data *rdata)
 {
+       struct nfs_pgio_header *hdr = rdata->header;
        struct objio_state *objios;
        int ret;
 
-       ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
-                       rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+       ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+                       hdr->lseg, rdata->args.pages, rdata->args.pgbase,
                        rdata->args.offset, rdata->args.count, rdata,
                        GFP_KERNEL, &objios);
        if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
        struct objio_state *objios = priv;
        struct nfs_write_data *wdata = objios->oir.rpcdata;
+       struct address_space *mapping = wdata->header->inode->i_mapping;
        pgoff_t index = offset / PAGE_SIZE;
-       struct page *page = find_get_page(wdata->inode->i_mapping, index);
+       struct page *page = find_get_page(mapping, index);
 
        if (!page) {
-               page = find_or_create_page(wdata->inode->i_mapping,
-                                               index, GFP_NOFS);
+               page = find_or_create_page(mapping, index, GFP_NOFS);
                if (unlikely(!page)) {
                        dprintk("%s: grab_cache_page Failed index=0x%lx\n",
                                __func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
 
 int objio_write_pagelist(struct nfs_write_data *wdata, int how)
 {
+       struct nfs_pgio_header *hdr = wdata->header;
        struct objio_state *objios;
        int ret;
 
-       ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
-                       wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+       ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+                       hdr->lseg, wdata->args.pages, wdata->args.pgbase,
                        wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
                        &objios);
        if (unlikely(ret))
index 595c5fc21a19d15efaab48bff059336d7762c1b3..8746135453011dc70d30ebbd5b70e2b813f7b15d 100644 (file)
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
        if (status >= 0)
                rdata->res.count = status;
        else
-               rdata->pnfs_error = status;
+               rdata->header->pnfs_error = status;
        objlayout_iodone(oir);
        /* must not use oir after this point */
 
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 enum pnfs_try_status
 objlayout_read_pagelist(struct nfs_read_data *rdata)
 {
+       struct nfs_pgio_header *hdr = rdata->header;
+       struct inode *inode = hdr->inode;
        loff_t offset = rdata->args.offset;
        size_t count = rdata->args.count;
        int err;
        loff_t eof;
 
-       eof = i_size_read(rdata->inode);
+       eof = i_size_read(inode);
        if (unlikely(offset + count > eof)) {
                if (offset >= eof) {
                        err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
        }
 
        rdata->res.eof = (offset + count) >= eof;
-       _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+       _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
                              &rdata->args.pgbase,
                              rdata->args.offset, rdata->args.count);
 
        dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
-               __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+               __func__, inode->i_ino, offset, count, rdata->res.eof);
 
        err = objio_read_pagelist(rdata);
  out:
        if (unlikely(err)) {
-               rdata->pnfs_error = err;
+               hdr->pnfs_error = err;
                dprintk("%s: Returned Error %d\n", __func__, err);
                return PNFS_NOT_ATTEMPTED;
        }
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
                wdata->res.count = status;
                wdata->verf.committed = oir->committed;
        } else {
-               wdata->pnfs_error = status;
+               wdata->header->pnfs_error = status;
        }
        objlayout_iodone(oir);
        /* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
 objlayout_write_pagelist(struct nfs_write_data *wdata,
                         int how)
 {
+       struct nfs_pgio_header *hdr = wdata->header;
        int err;
 
-       _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+       _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
                              &wdata->args.pgbase,
                              wdata->args.offset, wdata->args.count);
 
        err = objio_write_pagelist(wdata, how);
        if (unlikely(err)) {
-               wdata->pnfs_error = err;
+               hdr->pnfs_error = err;
                dprintk("%s: Returned Error %d\n", __func__, err);
                return PNFS_NOT_ATTEMPTED;
        }
index d21fceaa9f6263fecff450506653c21ba055872f..aed913c833f422bbf6a88e2726be5eec6d9bbc40 100644 (file)
 
 static struct kmem_cache *nfs_page_cachep;
 
+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+       p->npages = pagecount;
+       if (pagecount <= ARRAY_SIZE(p->page_array))
+               p->pagevec = p->page_array;
+       else {
+               p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+               if (!p->pagevec)
+                       p->npages = 0;
+       }
+       return p->pagevec != NULL;
+}
+
+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+                      struct nfs_pgio_header *hdr,
+                      void (*release)(struct nfs_pgio_header *hdr))
+{
+       hdr->req = nfs_list_entry(desc->pg_list.next);
+       hdr->inode = desc->pg_inode;
+       hdr->cred = hdr->req->wb_context->cred;
+       hdr->io_start = req_offset(hdr->req);
+       hdr->good_bytes = desc->pg_count;
+       hdr->dreq = desc->pg_dreq;
+       hdr->release = release;
+       hdr->completion_ops = desc->pg_completion_ops;
+       if (hdr->completion_ops->init_hdr)
+               hdr->completion_ops->init_hdr(hdr);
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+{
+       spin_lock(&hdr->lock);
+       if (pos < hdr->io_start + hdr->good_bytes) {
+               set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+               clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+               hdr->good_bytes = pos - hdr->io_start;
+               hdr->error = error;
+       }
+       spin_unlock(&hdr->lock);
+}
+
 static inline struct nfs_page *
 nfs_page_alloc(void)
 {
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
         * long write-back delay. This will be adjusted in
         * update_nfs_request below if the region is not locked. */
        req->wb_page    = page;
-       atomic_set(&req->wb_complete, 0);
        req->wb_index   = page->index;
        page_cache_get(page);
-       BUG_ON(PagePrivate(page));
-       BUG_ON(!PageLocked(page));
-       BUG_ON(page->mapping->host != inode);
        req->wb_offset  = offset;
        req->wb_pgbase  = offset;
        req->wb_bytes   = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
        clear_bit(PG_BUSY, &req->wb_flags);
        smp_mb__after_clear_bit();
        wake_up_bit(&req->wb_flags, PG_BUSY);
+}
+
+/**
+ * nfs_unlock_and_release_request - Unlock request and release the nfs_page
+ * @req:
+ */
+void nfs_unlock_and_release_request(struct nfs_page *req)
+{
+       nfs_unlock_request(req);
        nfs_release_request(req);
 }
 
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                     struct inode *inode,
                     const struct nfs_pageio_ops *pg_ops,
+                    const struct nfs_pgio_completion_ops *compl_ops,
                     size_t bsize,
                     int io_flags)
 {
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
        desc->pg_recoalesce = 0;
        desc->pg_inode = inode;
        desc->pg_ops = pg_ops;
+       desc->pg_completion_ops = compl_ops;
        desc->pg_ioflags = io_flags;
        desc->pg_error = 0;
        desc->pg_lseg = NULL;
+       desc->pg_dreq = NULL;
 }
 
 /**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
                return false;
        if (req->wb_context->state != prev->wb_context->state)
                return false;
-       if (req->wb_index != (prev->wb_index + 1))
-               return false;
        if (req->wb_pgbase != 0)
                return false;
        if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
                return false;
+       if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+               return false;
        return pgio->pg_ops->pg_test(pgio, prev, req);
 }
 
index 38512bcd2e98b4c82e3b03e2592061c06897abe5..b8323aa7b54384af8f51b84b3077d98b8f22d951 100644 (file)
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
        dprintk("%s:Begin lo %p\n", __func__, lo);
 
        if (list_empty(&lo->plh_segs)) {
+               /* Reset MDS Threshold I/O counters */
+               NFS_I(lo->plh_inode)->write_io = 0;
+               NFS_I(lo->plh_inode)->read_io = 0;
                if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
                        put_layout_hdr_locked(lo);
                return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
        spin_unlock(&nfsi->vfs_inode.i_lock);
        pnfs_free_lseg_list(&tmp_list);
 }
+EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
 
 /*
  * Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
        dprintk("<-- %s status: %d\n", __func__, status);
        return status;
 }
+EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 
 bool pnfs_roc(struct inode *ino)
 {
@@ -930,6 +935,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
        return ret;
 }
 
+/*
+ * Use mdsthreshold hints set at each OPEN to determine if I/O should go
+ * to the MDS or over pNFS
+ *
+ * The nfs_inode read_io and write_io fields are cumulative counters reset
+ * when there are no layout segments. Note that in pnfs_update_layout iomode
+ * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
+ * WRITE request.
+ *
+ * A return of true means use MDS I/O.
+ *
+ * From rfc 5661:
+ * If a file's size is smaller than the file size threshold, data accesses
+ * SHOULD be sent to the metadata server.  If an I/O request has a length that
+ * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
+ * server.  If both file size and I/O size are provided, the client SHOULD
+ * reach or exceed  both thresholds before sending its read or write
+ * requests to the data server.
+ */
+static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
+                                    struct inode *ino, int iomode)
+{
+       struct nfs4_threshold *t = ctx->mdsthreshold;
+       struct nfs_inode *nfsi = NFS_I(ino);
+       loff_t fsize = i_size_read(ino);
+       bool size = false, size_set = false, io = false, io_set = false, ret = false;
+
+       if (t == NULL)
+               return ret;
+
+       dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+               __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
+
+       switch (iomode) {
+       case IOMODE_READ:
+               if (t->bm & THRESHOLD_RD) {
+                       dprintk("%s fsize %llu\n", __func__, fsize);
+                       size_set = true;
+                       if (fsize < t->rd_sz)
+                               size = true;
+               }
+               if (t->bm & THRESHOLD_RD_IO) {
+                       dprintk("%s nfsi->read_io %llu\n", __func__,
+                               nfsi->read_io);
+                       io_set = true;
+                       if (nfsi->read_io < t->rd_io_sz)
+                               io = true;
+               }
+               break;
+       case IOMODE_RW:
+               if (t->bm & THRESHOLD_WR) {
+                       dprintk("%s fsize %llu\n", __func__, fsize);
+                       size_set = true;
+                       if (fsize < t->wr_sz)
+                               size = true;
+               }
+               if (t->bm & THRESHOLD_WR_IO) {
+                       dprintk("%s nfsi->write_io %llu\n", __func__,
+                               nfsi->write_io);
+                       io_set = true;
+                       if (nfsi->write_io < t->wr_io_sz)
+                               io = true;
+               }
+               break;
+       }
+       if (size_set && io_set) {
+               if (size && io)
+                       ret = true;
+       } else if (size || io)
+               ret = true;
+
+       dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
+       return ret;
+}
+
 /*
  * Layout segment is retreived from the server if not cached.
  * The appropriate layout segment is referenced and returned to the caller.
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
 
        if (!pnfs_enabled_sb(NFS_SERVER(ino)))
                return NULL;
+
+       if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+               return NULL;
+
        spin_lock(&ino->i_lock);
        lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
        if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
 {
        BUG_ON(pgio->pg_lseg != NULL);
 
+       if (req->wb_offset != req->wb_pgbase) {
+               nfs_pageio_reset_read_mds(pgio);
+               return;
+       }
        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                           req->wb_context,
                                           req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
 {
        BUG_ON(pgio->pg_lseg != NULL);
 
+       if (req->wb_offset != req->wb_pgbase) {
+               nfs_pageio_reset_write_mds(pgio);
+               return;
+       }
        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                           req->wb_context,
                                           req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
 bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+                     const struct nfs_pgio_completion_ops *compl_ops)
 {
        struct nfs_server *server = NFS_SERVER(inode);
        struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
        if (ld == NULL)
                return false;
-       nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+       nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+                       server->rsize, 0);
        return true;
 }
 
 bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+                      int ioflags,
+                      const struct nfs_pgio_completion_ops *compl_ops)
 {
        struct nfs_server *server = NFS_SERVER(inode);
        struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
        if (ld == NULL)
                return false;
-       nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+       nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+                       server->wsize, ioflags);
        return true;
 }
 
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+int pnfs_write_done_resend_to_mds(struct inode *inode,
+                               struct list_head *head,
+                               const struct nfs_pgio_completion_ops *compl_ops)
 {
        struct nfs_pageio_descriptor pgio;
        LIST_HEAD(failed);
 
        /* Resend all requests through the MDS */
-       nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+       nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
        while (!list_empty(head)) {
                struct nfs_page *req = nfs_list_entry(head->next);
 
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
+
+static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+{
+       struct nfs_pgio_header *hdr = data->header;
+
+       dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+       if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
+           PNFS_LAYOUTRET_ON_ERROR) {
+               clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+               pnfs_return_layout(hdr->inode);
+       }
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+               data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+                                                       &hdr->pages,
+                                                       hdr->completion_ops);
+}
 
 /*
  * Called by non rpc-based layout drivers
  */
 void pnfs_ld_write_done(struct nfs_write_data *data)
 {
-       if (likely(!data->pnfs_error)) {
+       struct nfs_pgio_header *hdr = data->header;
+
+       if (!hdr->pnfs_error) {
                pnfs_set_layoutcommit(data);
-               data->mds_ops->rpc_call_done(&data->task, data);
-       } else {
-               dprintk("pnfs write error = %d\n", data->pnfs_error);
-               if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
-                                               PNFS_LAYOUTRET_ON_ERROR) {
-                       /* Don't lo_commit on error, Server will needs to
-                        * preform a file recovery.
-                        */
-                       clear_bit(NFS_INO_LAYOUTCOMMIT,
-                                 &NFS_I(data->inode)->flags);
-                       pnfs_return_layout(data->inode);
-               }
-               data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
-       }
-       put_lseg(data->lseg);
-       data->mds_ops->rpc_release(data);
+               hdr->mds_ops->rpc_call_done(&data->task, data);
+       } else
+               pnfs_ld_handle_write_error(data);
+       hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
@@ -1219,12 +1325,13 @@ static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
                struct nfs_write_data *data)
 {
-       list_splice_tail_init(&data->pages, &desc->pg_list);
-       if (data->req && list_empty(&data->req->wb_list))
-               nfs_list_add_request(data->req, &desc->pg_list);
-       nfs_pageio_reset_write_mds(desc);
-       desc->pg_recoalesce = 1;
-       put_lseg(data->lseg);
+       struct nfs_pgio_header *hdr = data->header;
+
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+               list_splice_tail_init(&hdr->pages, &desc->pg_list);
+               nfs_pageio_reset_write_mds(desc);
+               desc->pg_recoalesce = 1;
+       }
        nfs_writedata_release(data);
 }
 
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
                        struct pnfs_layout_segment *lseg,
                        int how)
 {
-       struct inode *inode = wdata->inode;
+       struct nfs_pgio_header *hdr = wdata->header;
+       struct inode *inode = hdr->inode;
        enum pnfs_try_status trypnfs;
        struct nfs_server *nfss = NFS_SERVER(inode);
 
-       wdata->mds_ops = call_ops;
-       wdata->lseg = get_lseg(lseg);
+       hdr->mds_ops = call_ops;
 
        dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
                inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
        trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
-       if (trypnfs == PNFS_NOT_ATTEMPTED) {
-               put_lseg(wdata->lseg);
-               wdata->lseg = NULL;
-       } else
+       if (trypnfs != PNFS_NOT_ATTEMPTED)
                nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
        dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
        return trypnfs;
 }
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
        while (!list_empty(head)) {
                enum pnfs_try_status trypnfs;
 
-               data = list_entry(head->next, struct nfs_write_data, list);
+               data = list_first_entry(head, struct nfs_write_data, list);
                list_del_init(&data->list);
 
                trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
        put_lseg(lseg);
 }
 
+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
+{
+       put_lseg(hdr->lseg);
+       nfs_writehdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-       LIST_HEAD(head);
+       struct nfs_write_header *whdr;
+       struct nfs_pgio_header *hdr;
        int ret;
 
-       ret = nfs_generic_flush(desc, &head);
-       if (ret != 0) {
+       whdr = nfs_writehdr_alloc();
+       if (!whdr) {
+               desc->pg_completion_ops->error_cleanup(&desc->pg_list);
                put_lseg(desc->pg_lseg);
                desc->pg_lseg = NULL;
-               return ret;
+               return -ENOMEM;
        }
-       pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
-       return 0;
+       hdr = &whdr->header;
+       nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
+       hdr->lseg = get_lseg(desc->pg_lseg);
+       atomic_inc(&hdr->refcnt);
+       ret = nfs_generic_flush(desc, hdr);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+       } else
+               pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+int pnfs_read_done_resend_to_mds(struct inode *inode,
+                               struct list_head *head,
+                               const struct nfs_pgio_completion_ops *compl_ops)
 {
        struct nfs_pageio_descriptor pgio;
+       LIST_HEAD(failed);
 
-       put_lseg(data->lseg);
-       data->lseg = NULL;
-       dprintk("pnfs write error = %d\n", data->pnfs_error);
-       if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
-                                               PNFS_LAYOUTRET_ON_ERROR)
-               pnfs_return_layout(data->inode);
-
-       nfs_pageio_init_read_mds(&pgio, data->inode);
-
-       while (!list_empty(&data->pages)) {
-               struct nfs_page *req = nfs_list_entry(data->pages.next);
+       /* Resend all requests through the MDS */
+       nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
+       while (!list_empty(head)) {
+               struct nfs_page *req = nfs_list_entry(head->next);
 
                nfs_list_remove_request(req);
-               nfs_pageio_add_request(&pgio, req);
+               if (!nfs_pageio_add_request(&pgio, req))
+                       nfs_list_add_request(req, &failed);
        }
        nfs_pageio_complete(&pgio);
+
+       if (!list_empty(&failed)) {
+               list_move(&failed, head);
+               return -EIO;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
+
+static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+{
+       struct nfs_pgio_header *hdr = data->header;
+
+       dprintk("pnfs read error = %d\n", hdr->pnfs_error);
+       if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
+           PNFS_LAYOUTRET_ON_ERROR) {
+               clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+               pnfs_return_layout(hdr->inode);
+       }
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+               data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+                                                       &hdr->pages,
+                                                       hdr->completion_ops);
 }
 
 /*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
  */
 void pnfs_ld_read_done(struct nfs_read_data *data)
 {
-       if (likely(!data->pnfs_error)) {
+       struct nfs_pgio_header *hdr = data->header;
+
+       if (likely(!hdr->pnfs_error)) {
                __nfs4_read_done_cb(data);
-               data->mds_ops->rpc_call_done(&data->task, data);
+               hdr->mds_ops->rpc_call_done(&data->task, data);
        } else
                pnfs_ld_handle_read_error(data);
-       put_lseg(data->lseg);
-       data->mds_ops->rpc_release(data);
+       hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
@@ -1334,11 +1476,13 @@ static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
                struct nfs_read_data *data)
 {
-       list_splice_tail_init(&data->pages, &desc->pg_list);
-       if (data->req && list_empty(&data->req->wb_list))
-               nfs_list_add_request(data->req, &desc->pg_list);
-       nfs_pageio_reset_read_mds(desc);
-       desc->pg_recoalesce = 1;
+       struct nfs_pgio_header *hdr = data->header;
+
+       if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+               list_splice_tail_init(&hdr->pages, &desc->pg_list);
+               nfs_pageio_reset_read_mds(desc);
+               desc->pg_recoalesce = 1;
+       }
        nfs_readdata_release(data);
 }
 
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
                       const struct rpc_call_ops *call_ops,
                       struct pnfs_layout_segment *lseg)
 {
-       struct inode *inode = rdata->inode;
+       struct nfs_pgio_header *hdr = rdata->header;
+       struct inode *inode = hdr->inode;
        struct nfs_server *nfss = NFS_SERVER(inode);
        enum pnfs_try_status trypnfs;
 
-       rdata->mds_ops = call_ops;
-       rdata->lseg = get_lseg(lseg);
+       hdr->mds_ops = call_ops;
 
        dprintk("%s: Reading ino:%lu %u@%llu\n",
                __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
 
        trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
-       if (trypnfs == PNFS_NOT_ATTEMPTED) {
-               put_lseg(rdata->lseg);
-               rdata->lseg = NULL;
-       } else {
+       if (trypnfs != PNFS_NOT_ATTEMPTED)
                nfs_inc_stats(inode, NFSIOS_PNFS_READ);
-       }
        dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
        return trypnfs;
 }
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
        while (!list_empty(head)) {
                enum pnfs_try_status trypnfs;
 
-               data = list_entry(head->next, struct nfs_read_data, list);
+               data = list_first_entry(head, struct nfs_read_data, list);
                list_del_init(&data->list);
 
                trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
        put_lseg(lseg);
 }
 
+static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
+{
+       put_lseg(hdr->lseg);
+       nfs_readhdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-       LIST_HEAD(head);
+       struct nfs_read_header *rhdr;
+       struct nfs_pgio_header *hdr;
        int ret;
 
-       ret = nfs_generic_pagein(desc, &head);
-       if (ret != 0) {
+       rhdr = nfs_readhdr_alloc();
+       if (!rhdr) {
+               desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+               ret = -ENOMEM;
                put_lseg(desc->pg_lseg);
                desc->pg_lseg = NULL;
                return ret;
        }
-       pnfs_do_multiple_reads(desc, &head);
-       return 0;
+       hdr = &rhdr->header;
+       nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
+       hdr->lseg = get_lseg(desc->pg_lseg);
+       atomic_inc(&hdr->refcnt);
+       ret = nfs_generic_pagein(desc, hdr);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+       } else
+               pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
 
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 void
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
-       struct nfs_inode *nfsi = NFS_I(wdata->inode);
+       struct nfs_pgio_header *hdr = wdata->header;
+       struct inode *inode = hdr->inode;
+       struct nfs_inode *nfsi = NFS_I(inode);
        loff_t end_pos = wdata->mds_offset + wdata->res.count;
        bool mark_as_dirty = false;
 
-       spin_lock(&nfsi->vfs_inode.i_lock);
+       spin_lock(&inode->i_lock);
        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
                mark_as_dirty = true;
                dprintk("%s: Set layoutcommit for inode %lu ",
-                       __func__, wdata->inode->i_ino);
+                       __func__, inode->i_ino);
        }
-       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
                /* references matched in nfs4_layoutcommit_release */
-               get_lseg(wdata->lseg);
+               get_lseg(hdr->lseg);
        }
        if (end_pos > nfsi->layout->plh_lwb)
                nfsi->layout->plh_lwb = end_pos;
-       spin_unlock(&nfsi->vfs_inode.i_lock);
+       spin_unlock(&inode->i_lock);
        dprintk("%s: lseg %p end_pos %llu\n",
-               __func__, wdata->lseg, nfsi->layout->plh_lwb);
+               __func__, hdr->lseg, nfsi->layout->plh_lwb);
 
        /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
         * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
        if (mark_as_dirty)
-               mark_inode_dirty_sync(wdata->inode);
+               mark_inode_dirty_sync(inode);
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
@@ -1550,3 +1712,15 @@ out_free:
        kfree(data);
        goto out;
 }
+
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+       struct nfs4_threshold *thp;
+
+       thp = kzalloc(sizeof(*thp), GFP_NOFS);
+       if (!thp) {
+               dprintk("%s mdsthreshold allocation failed\n", __func__);
+               return NULL;
+       }
+       return thp;
+}
index 442ebf68eeecf51dfaa6b8835318b53010eefe19..29fd23c0efdcb07c699c5e2e94c1e23dad8de103 100644 (file)
@@ -63,6 +63,7 @@ enum {
        NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
        NFS_LAYOUT_ROC,                 /* some lseg had roc bit set */
        NFS_LAYOUT_DESTROYED,           /* no new use of layout allowed */
+       NFS_LAYOUT_INVALID,             /* layout is being destroyed */
 };
 
 enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
        const struct nfs_pageio_ops *pg_read_ops;
        const struct nfs_pageio_ops *pg_write_ops;
 
+       struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
        void (*mark_request_commit) (struct nfs_page *req,
-                                       struct pnfs_layout_segment *lseg);
-       void (*clear_request_commit) (struct nfs_page *req);
-       int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
-       int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+                                    struct pnfs_layout_segment *lseg,
+                                    struct nfs_commit_info *cinfo);
+       void (*clear_request_commit) (struct nfs_page *req,
+                                     struct nfs_commit_info *cinfo);
+       int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+                                 int max);
+       void (*recover_commit_reqs) (struct list_head *list,
+                                    struct nfs_commit_info *cinfo);
+       int (*commit_pagelist)(struct inode *inode,
+                              struct list_head *mds_pages,
+                              int how,
+                              struct nfs_commit_info *cinfo);
 
        /*
         * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 
-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+                          const struct nfs_pgio_completion_ops *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+                           int, const struct nfs_pgio_completion_ops *);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
                                               gfp_t gfp_flags);
 
 void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
+                       const struct nfs_pgio_completion_ops *compl_ops);
+int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
+                       const struct nfs_pgio_completion_ops *compl_ops);
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 
 /* nfs4_deviceid_flags */
 enum {
@@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+                struct nfs_commit_info *cinfo)
 {
-       if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+       if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
                return PNFS_NOT_ATTEMPTED;
-       return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+       return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+}
+
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+       if (ld == NULL || ld->get_ds_info == NULL)
+               return NULL;
+       return ld->get_ds_info(inode);
 }
 
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+                        struct nfs_commit_info *cinfo)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (lseg == NULL || ld->mark_request_commit == NULL)
                return false;
-       ld->mark_request_commit(req, lseg);
+       ld->mark_request_commit(req, lseg, cinfo);
        return true;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (ld == NULL || ld->clear_request_commit == NULL)
                return false;
-       ld->clear_request_commit(req);
+       ld->clear_request_commit(req, cinfo);
        return true;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+                      int max)
 {
-       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-       int ret;
-
-       if (ld == NULL || ld->scan_commit_lists == NULL)
+       if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
                return 0;
-       ret = ld->scan_commit_lists(inode, max, lock);
-       if (ret != 0)
-               set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
-       return ret;
+       else
+               return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
+}
+
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+                        struct nfs_commit_info *cinfo)
+{
+       if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
+               return;
+       NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
 }
 
 /* Should the pNFS client commit and return the layout upon a setattr */
@@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+                  struct nfs_server *nfss)
+{
+       return (dst && src && src->bm != 0 &&
+                                       nfss->pnfs_curr_ld->id == src->l_type);
+}
+
 #ifdef NFS_DEBUG
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 #else
@@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
 {
 }
 
-static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+                                        const struct nfs_pgio_completion_ops *compl_ops)
 {
        return false;
 }
 
-static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
+                                         const struct nfs_pgio_completion_ops *compl_ops)
 {
        return false;
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+                struct nfs_commit_info *cinfo)
 {
        return PNFS_NOT_ATTEMPTED;
 }
 
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+       return NULL;
+}
+
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+                        struct nfs_commit_info *cinfo)
 {
        return false;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
        return false;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+                      int max)
 {
        return 0;
 }
 
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+                        struct nfs_commit_info *cinfo)
+{
+}
+
 static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
        return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+                  struct nfs_server *nfss)
+{
+       return false;
+}
+
+static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
index d6408b6437de4f9f0c55f9f211f61df129976b7c..a706b6bcc286a5a401318e868b0d1fbab2a206a4 100644 (file)
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
                struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
        struct nfs_diropargs    arg = {
@@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
+       struct inode *inode = data->header->inode;
+
        if (nfs_async_handle_expired_key(task))
                return -EAGAIN;
 
-       nfs_invalidate_atime(data->inode);
+       nfs_invalidate_atime(inode);
        if (task->tk_status >= 0) {
-               nfs_refresh_inode(data->inode, data->res.fattr);
+               nfs_refresh_inode(inode, data->res.fattr);
                /* Emulate the eof flag, which isn't normally needed in NFSv2
                 * as it is guaranteed to always return the file attributes
                 */
@@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
+       struct inode *inode = data->header->inode;
+
        if (nfs_async_handle_expired_key(task))
                return -EAGAIN;
 
        if (task->tk_status >= 0)
-               nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+               nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
        return 0;
 }
 
@@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
        rpc_call_start(task);
 }
 
+static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+       BUG();
+}
+
 static void
-nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
        BUG();
 }
@@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .file_inode_ops = &nfs_file_inode_operations,
        .file_ops       = &nfs_file_operations,
        .getroot        = nfs_proc_get_root,
+       .submount       = nfs_submount,
        .getattr        = nfs_proc_getattr,
        .setattr        = nfs_proc_setattr,
        .lookup         = nfs_proc_lookup,
@@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .write_rpc_prepare = nfs_proc_write_rpc_prepare,
        .write_done     = nfs_write_done,
        .commit_setup   = nfs_proc_commit_setup,
+       .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
        .lock           = nfs_proc_lock,
        .lock_check_bounds = nfs_lock_check_bounds,
        .close_context  = nfs_close_context,
index 0a4be28c2ea3c76f57321bf765708924c4a2fdcf..86ced78362142119328ad827138c0c716668aeac 100644 (file)
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_partial_ops;
-static const struct rpc_call_ops nfs_read_full_ops;
+static const struct rpc_call_ops nfs_read_common_ops;
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc(void)
 {
-       struct nfs_read_data *p;
-
-       p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-       if (p) {
-               INIT_LIST_HEAD(&p->pages);
-               p->npages = pagecount;
-               if (pagecount <= ARRAY_SIZE(p->page_array))
-                       p->pagevec = p->page_array;
-               else {
-                       p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-                       if (!p->pagevec) {
-                               kmem_cache_free(nfs_rdata_cachep, p);
-                               p = NULL;
-                       }
-               }
+       struct nfs_read_header *rhdr;
+
+       rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+       if (rhdr) {
+               struct nfs_pgio_header *hdr = &rhdr->header;
+
+               INIT_LIST_HEAD(&hdr->pages);
+               INIT_LIST_HEAD(&hdr->rpc_list);
+               spin_lock_init(&hdr->lock);
+               atomic_set(&hdr->refcnt, 0);
+       }
+       return rhdr;
+}
+
+static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+                                               unsigned int pagecount)
+{
+       struct nfs_read_data *data, *prealloc;
+
+       prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+       if (prealloc->header == NULL)
+               data = prealloc;
+       else
+               data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto out;
+
+       if (nfs_pgarray_set(&data->pages, pagecount)) {
+               data->header = hdr;
+               atomic_inc(&hdr->refcnt);
+       } else {
+               if (data != prealloc)
+                       kfree(data);
+               data = NULL;
        }
-       return p;
+out:
+       return data;
 }
 
-void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
-       if (p && (p->pagevec != &p->page_array[0]))
-               kfree(p->pagevec);
-       kmem_cache_free(nfs_rdata_cachep, p);
+       struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+
+       kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
+       struct nfs_pgio_header *hdr = rdata->header;
+       struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+
        put_nfs_open_context(rdata->args.context);
-       nfs_readdata_free(rdata);
+       if (rdata->pages.pagevec != rdata->pages.page_array)
+               kfree(rdata->pages.pagevec);
+       if (rdata != &read_header->rpc_data)
+               kfree(rdata);
+       else
+               rdata->header = NULL;
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
 }
 
 static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
        return 0;
 }
 
-static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
-{
-       unsigned int remainder = data->args.count - data->res.count;
-       unsigned int base = data->args.pgbase + data->res.count;
-       unsigned int pglen;
-       struct page **pages;
-
-       if (data->res.eof == 0 || remainder == 0)
-               return;
-       /*
-        * Note: "remainder" can never be negative, since we check for
-        *      this in the XDR code.
-        */
-       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-       base &= ~PAGE_CACHE_MASK;
-       pglen = PAGE_CACHE_SIZE - base;
-       for (;;) {
-               if (remainder <= pglen) {
-                       zero_user(*pages, base, remainder);
-                       break;
-               }
-               zero_user(*pages, base, pglen);
-               pages++;
-               remainder -= pglen;
-               pglen = PAGE_CACHE_SIZE;
-               base = 0;
-       }
-}
-
 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-               struct inode *inode)
+                             struct inode *inode,
+                             const struct nfs_pgio_completion_ops *compl_ops)
 {
-       nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+       nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
                        NFS_SERVER(inode)->rsize, 0);
 }
 
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
-static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-               struct inode *inode)
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+                         struct inode *inode,
+                         const struct nfs_pgio_completion_ops *compl_ops)
 {
-       if (!pnfs_pageio_init_read(pgio, inode))
-               nfs_pageio_init_read_mds(pgio, inode);
+       if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
+               nfs_pageio_init_read_mds(pgio, inode, compl_ops);
 }
 
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        if (len < PAGE_CACHE_SIZE)
                zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-       nfs_pageio_init_read(&pgio, inode);
+       nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
        nfs_pageio_add_request(&pgio, new);
        nfs_pageio_complete(&pgio);
+       NFS_I(inode)->read_io += pgio.pg_bytes_written;
        return 0;
 }
 
@@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)
        nfs_release_request(req);
 }
 
-int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
-                     const struct rpc_call_ops *call_ops)
+/* Note io was page aligned */
+static void nfs_read_completion(struct nfs_pgio_header *hdr)
+{
+       unsigned long bytes = 0;
+
+       if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+               goto out;
+       while (!list_empty(&hdr->pages)) {
+               struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+               struct page *page = req->wb_page;
+
+               if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+                       if (bytes > hdr->good_bytes)
+                               zero_user(page, 0, PAGE_SIZE);
+                       else if (hdr->good_bytes - bytes < PAGE_SIZE)
+                               zero_user_segment(page,
+                                       hdr->good_bytes & ~PAGE_MASK,
+                                       PAGE_SIZE);
+               }
+               bytes += req->wb_bytes;
+               if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+                       if (bytes <= hdr->good_bytes)
+                               SetPageUptodate(page);
+               } else
+                       SetPageUptodate(page);
+               nfs_list_remove_request(req);
+               nfs_readpage_release(req);
+       }
+out:
+       hdr->release(hdr);
+}
+
+int nfs_initiate_read(struct rpc_clnt *clnt,
+                     struct nfs_read_data *data,
+                     const struct rpc_call_ops *call_ops, int flags)
 {
-       struct inode *inode = data->inode;
+       struct inode *inode = data->header->inode;
        int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
        struct rpc_task *task;
        struct rpc_message msg = {
                .rpc_argp = &data->args,
                .rpc_resp = &data->res,
-               .rpc_cred = data->cred,
+               .rpc_cred = data->header->cred,
        };
        struct rpc_task_setup task_setup_data = {
                .task = &data->task,
@@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
                .callback_ops = call_ops,
                .callback_data = data,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC | swap_flags,
+               .flags = RPC_TASK_ASYNC | swap_flags | flags,
        };
 
        /* Set up the initial task struct. */
@@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_read_data *data,
                unsigned int count, unsigned int offset)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
-
-       data->req         = req;
-       data->inode       = inode;
-       data->cred        = req->wb_context->cred;
+       struct nfs_page *req = data->header->req;
 
-       data->args.fh     = NFS_FH(inode);
+       data->args.fh     = NFS_FH(data->header->inode);
        data->args.offset = req_offset(req) + offset;
        data->args.pgbase = req->wb_pgbase + offset;
-       data->args.pages  = data->pagevec;
+       data->args.pages  = data->pages.pagevec;
        data->args.count  = count;
        data->args.context = get_nfs_open_context(req->wb_context);
        data->args.lock_context = req->wb_lock_context;
@@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 static int nfs_do_read(struct nfs_read_data *data,
                const struct rpc_call_ops *call_ops)
 {
-       struct inode *inode = data->args.context->dentry->d_inode;
+       struct inode *inode = data->header->inode;
 
-       return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+       return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
 }
 
 static int
@@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,
        while (!list_empty(head)) {
                int ret2;
 
-               data = list_entry(head->next, struct nfs_read_data, list);
+               data = list_first_entry(head, struct nfs_read_data, list);
                list_del_init(&data->list);
 
                ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)
        }
 }
 
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
+       .error_cleanup = nfs_async_read_error,
+       .completion = nfs_read_completion,
+};
+
+static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
+               struct nfs_pgio_header *hdr)
+{
+       set_bit(NFS_IOHDR_REDO, &hdr->flags);
+       while (!list_empty(&hdr->rpc_list)) {
+               struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
+                               struct nfs_read_data, list);
+               list_del(&data->list);
+               nfs_readdata_release(data);
+       }
+       desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+}
+
 /*
  * Generate multiple requests to fill a single page.
  *
@@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
+                           struct nfs_pgio_header *hdr)
 {
-       struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+       struct nfs_page *req = hdr->req;
        struct page *page = req->wb_page;
        struct nfs_read_data *data;
        size_t rsize = desc->pg_bsize, nbytes;
        unsigned int offset;
-       int requests = 0;
-       int ret = 0;
-
-       nfs_list_remove_request(req);
 
        offset = 0;
        nbytes = desc->pg_count;
        do {
                size_t len = min(nbytes,rsize);
 
-               data = nfs_readdata_alloc(1);
-               if (!data)
-                       goto out_bad;
-               data->pagevec[0] = page;
-               nfs_read_rpcsetup(req, data, len, offset);
-               list_add(&data->list, res);
-               requests++;
+               data = nfs_readdata_alloc(hdr, 1);
+               if (!data) {
+                       nfs_pagein_error(desc, hdr);
+                       return -ENOMEM;
+               }
+               data->pages.pagevec[0] = page;
+               nfs_read_rpcsetup(data, len, offset);
+               list_add(&data->list, &hdr->rpc_list);
                nbytes -= len;
                offset += len;
-       } while(nbytes != 0);
-       atomic_set(&req->wb_complete, requests);
-       desc->pg_rpc_callops = &nfs_read_partial_ops;
-       return ret;
-out_bad:
-       while (!list_empty(res)) {
-               data = list_entry(res->next, struct nfs_read_data, list);
-               list_del(&data->list);
-               nfs_readdata_release(data);
-       }
-       nfs_readpage_release(req);
-       return -ENOMEM;
+       } while (nbytes != 0);
+
+       nfs_list_remove_request(req);
+       nfs_list_add_request(req, &hdr->pages);
+       desc->pg_rpc_callops = &nfs_read_common_ops;
+       return 0;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
+                         struct nfs_pgio_header *hdr)
 {
        struct nfs_page         *req;
        struct page             **pages;
-       struct nfs_read_data    *data;
+       struct nfs_read_data    *data;
        struct list_head *head = &desc->pg_list;
-       int ret = 0;
 
-       data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
-                                                    desc->pg_count));
+       data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+                                                         desc->pg_count));
        if (!data) {
-               nfs_async_read_error(head);
-               ret = -ENOMEM;
-               goto out;
+               nfs_pagein_error(desc, hdr);
+               return -ENOMEM;
        }
 
-       pages = data->pagevec;
+       pages = data->pages.pagevec;
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               nfs_list_add_request(req, &data->pages);
+               nfs_list_add_request(req, &hdr->pages);
                *pages++ = req->wb_page;
        }
-       req = nfs_list_entry(data->pages.next);
 
-       nfs_read_rpcsetup(req, data, desc->pg_count, 0);
-       list_add(&data->list, res);
-       desc->pg_rpc_callops = &nfs_read_full_ops;
-out:
-       return ret;
+       nfs_read_rpcsetup(data, desc->pg_count, 0);
+       list_add(&data->list, &hdr->rpc_list);
+       desc->pg_rpc_callops = &nfs_read_common_ops;
+       return 0;
 }
 
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+                      struct nfs_pgio_header *hdr)
 {
        if (desc->pg_bsize < PAGE_CACHE_SIZE)
-               return nfs_pagein_multi(desc, head);
-       return nfs_pagein_one(desc, head);
+               return nfs_pagein_multi(desc, hdr);
+       return nfs_pagein_one(desc, hdr);
 }
 
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-       LIST_HEAD(head);
+       struct nfs_read_header *rhdr;
+       struct nfs_pgio_header *hdr;
        int ret;
 
-       ret = nfs_generic_pagein(desc, &head);
+       rhdr = nfs_readhdr_alloc();
+       if (!rhdr) {
+               desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+               return -ENOMEM;
+       }
+       hdr = &rhdr->header;
+       nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+       atomic_inc(&hdr->refcnt);
+       ret = nfs_generic_pagein(desc, hdr);
        if (ret == 0)
-               ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+               ret = nfs_do_multiple_reads(&hdr->rpc_list,
+                                           desc->pg_rpc_callops);
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
        return ret;
 }
 
@@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  */
 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
+       struct inode *inode = data->header->inode;
        int status;
 
        dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
                        task->tk_status);
 
-       status = NFS_PROTO(data->inode)->read_done(task, data);
+       status = NFS_PROTO(inode)->read_done(task, data);
        if (status != 0)
                return status;
 
-       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+       nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
 
        if (task->tk_status == -ESTALE) {
-               set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
-               nfs_mark_for_revalidate(data->inode);
+               set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+               nfs_mark_for_revalidate(inode);
        }
        return 0;
 }
@@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
        struct nfs_readargs *argp = &data->args;
        struct nfs_readres *resp = &data->res;
 
-       if (resp->eof || resp->count == argp->count)
-               return;
-
        /* This is a short read! */
-       nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+       nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
        /* Has the server at least made some progress? */
-       if (resp->count == 0)
+       if (resp->count == 0) {
+               nfs_set_pgio_error(data->header, -EIO, argp->offset);
                return;
-
+       }
        /* Yes, so retry the read at the end of the data */
        data->mds_offset += resp->count;
        argp->offset += resp->count;
@@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
        rpc_restart_call_prepare(task);
 }
 
-/*
- * Handle a read reply that fills part of a page.
- */
-static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 {
        struct nfs_read_data *data = calldata;
+       struct nfs_pgio_header *hdr = data->header;
+
+       /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
        if (nfs_readpage_result(task, data) != 0)
                return;
        if (task->tk_status < 0)
-               return;
-
-       nfs_readpage_truncate_uninitialised_page(data);
-       nfs_readpage_retry(task, data);
+               nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
+       else if (data->res.eof) {
+               loff_t bound;
+
+               bound = data->args.offset + data->res.count;
+               spin_lock(&hdr->lock);
+               if (bound < hdr->io_start + hdr->good_bytes) {
+                       set_bit(NFS_IOHDR_EOF, &hdr->flags);
+                       clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
+                       hdr->good_bytes = bound - hdr->io_start;
+               }
+               spin_unlock(&hdr->lock);
+       } else if (data->res.count != data->args.count)
+               nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_partial(void *calldata)
+static void nfs_readpage_release_common(void *calldata)
 {
-       struct nfs_read_data *data = calldata;
-       struct nfs_page *req = data->req;
-       struct page *page = req->wb_page;
-       int status = data->task.tk_status;
-
-       if (status < 0)
-               set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
-
-       if (atomic_dec_and_test(&req->wb_complete)) {
-               if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
-                       SetPageUptodate(page);
-               nfs_readpage_release(req);
-       }
        nfs_readdata_release(calldata);
 }
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_read_data *data = calldata;
-       NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
-}
-
-static const struct rpc_call_ops nfs_read_partial_ops = {
-       .rpc_call_prepare = nfs_read_prepare,
-       .rpc_call_done = nfs_readpage_result_partial,
-       .rpc_release = nfs_readpage_release_partial,
-};
-
-static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
-{
-       unsigned int count = data->res.count;
-       unsigned int base = data->args.pgbase;
-       struct page **pages;
-
-       if (data->res.eof)
-               count = data->args.count;
-       if (unlikely(count == 0))
-               return;
-       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-       base &= ~PAGE_CACHE_MASK;
-       count += base;
-       for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-               SetPageUptodate(*pages);
-       if (count == 0)
-               return;
-       /* Was this a short read? */
-       if (data->res.eof || data->res.count == data->args.count)
-               SetPageUptodate(*pages);
-}
-
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
-{
-       struct nfs_read_data *data = calldata;
-
-       if (nfs_readpage_result(task, data) != 0)
-               return;
-       if (task->tk_status < 0)
-               return;
-       /*
-        * Note: nfs_readpage_retry may change the values of
-        * data->args. In the multi-page case, we therefore need
-        * to ensure that we call nfs_readpage_set_pages_uptodate()
-        * first.
-        */
-       nfs_readpage_truncate_uninitialised_page(data);
-       nfs_readpage_set_pages_uptodate(data);
-       nfs_readpage_retry(task, data);
-}
-
-static void nfs_readpage_release_full(void *calldata)
-{
-       struct nfs_read_data *data = calldata;
-
-       while (!list_empty(&data->pages)) {
-               struct nfs_page *req = nfs_list_entry(data->pages.next);
-
-               nfs_list_remove_request(req);
-               nfs_readpage_release(req);
-       }
-       nfs_readdata_release(calldata);
+       NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 }
 
-static const struct rpc_call_ops nfs_read_full_ops = {
+static const struct rpc_call_ops nfs_read_common_ops = {
        .rpc_call_prepare = nfs_read_prepare,
-       .rpc_call_done = nfs_readpage_result_full,
-       .rpc_release = nfs_readpage_release_full,
+       .rpc_call_done = nfs_readpage_result_common,
+       .rpc_release = nfs_readpage_release_common,
 };
 
 /*
@@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (ret == 0)
                goto read_complete; /* all pages were read */
 
-       nfs_pageio_init_read(&pgio, inode);
+       nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
 
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
        nfs_pageio_complete(&pgio);
+       NFS_I(inode)->read_io += pgio.pg_bytes_written;
        npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
@@ -684,7 +669,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
        nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-                                            sizeof(struct nfs_read_data),
+                                            sizeof(struct nfs_read_header),
                                             0, SLAB_HWCACHE_ALIGN,
                                             NULL);
        if (nfs_rdata_cachep == NULL)
index 4ac7fca7e4bf32fc01ac980c26dfcb255325f3b1..ff656c022684e9e2b0d94587cf9d807d670bd715 100644 (file)
@@ -66,6 +66,7 @@
 #include "pnfs.h"
 
 #define NFSDBG_FACILITY                NFSDBG_VFS
+#define NFS_TEXT_DATA          1
 
 #ifdef CONFIG_NFS_V3
 #define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
        { Opt_vers_err, NULL }
 };
 
+struct nfs_mount_info {
+       void (*fill_super)(struct super_block *, struct nfs_mount_info *);
+       int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
+       struct nfs_parsed_mount_data *parsed;
+       struct nfs_clone_mount *cloned;
+       struct nfs_fh *mntfh;
+};
+
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct dentry *);
 static int  nfs_show_devname(struct seq_file *, struct dentry *);
 static int  nfs_show_path(struct seq_file *, struct dentry *);
 static int  nfs_show_stats(struct seq_file *, struct dentry *);
+static struct dentry *nfs_fs_mount_common(struct file_system_type *,
+               struct nfs_server *, int, const char *, struct nfs_mount_info *);
 static struct dentry *nfs_fs_mount(struct file_system_type *,
                int, const char *, void *);
 static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
 };
 
 #ifdef CONFIG_NFS_V4
-static int nfs4_validate_text_mount_data(void *options,
+static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
+static int nfs4_validate_mount_data(void *options,
        struct nfs_parsed_mount_data *args, const char *dev_name);
 static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
-       struct nfs_parsed_mount_data *data);
-static struct dentry *nfs4_mount(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *raw_data);
+       struct nfs_mount_info *mount_info);
 static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *raw_data);
 static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
 static struct file_system_type nfs4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfs4",
-       .mount          = nfs4_mount,
+       .mount          = nfs_fs_mount,
        .kill_sb        = nfs4_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
 
 static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
 {
-       if (nfss->nfs_client && nfss->nfs_client->impl_id) {
-               struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
+       if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
+               struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
                seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
                           "date='%llu,%u'",
                           impl_id->name, impl_id->domain,
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
                rpc_killall_tasks(rpc);
 }
 
-static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version)
+static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
 {
        struct nfs_parsed_mount_data *data;
 
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
                data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
                data->auth_flavors[0]   = RPC_AUTH_UNIX;
                data->auth_flavor_len   = 1;
-               data->version           = version;
                data->minorversion      = 0;
+               data->need_mount        = true;
                data->net               = current->nsproxy->net_ns;
                security_init_mnt_opts(&data->lsm_opts);
        }
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
  * Use the remote server's MOUNT service to request the NFS file handle
  * corresponding to the provided path.
  */
-static int nfs_try_mount(struct nfs_parsed_mount_data *args,
-                        struct nfs_fh *root_fh)
+static int nfs_request_mount(struct nfs_parsed_mount_data *args,
+                            struct nfs_fh *root_fh)
 {
        rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
        unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
        return nfs_walk_authlist(args, &request);
 }
 
+static struct dentry *nfs_try_mount(int flags, const char *dev_name,
+                                   struct nfs_mount_info *mount_info)
+{
+       int status;
+       struct nfs_server *server;
+
+       if (mount_info->parsed->need_mount) {
+               status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
+               if (status)
+                       return ERR_PTR(status);
+       }
+
+       /* Get a volume representation */
+       server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
+       if (IS_ERR(server))
+               return ERR_CAST(server);
+
+       return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
+}
+
 /*
  * Split "dev_name" into "hostname:export_path".
  *
@@ -1826,10 +1856,10 @@ out_path:
  * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
  *   mountproto=tcp after mountproto=udp, and so on
  */
-static int nfs_validate_mount_data(void *options,
-                                  struct nfs_parsed_mount_data *args,
-                                  struct nfs_fh *mntfh,
-                                  const char *dev_name)
+static int nfs23_validate_mount_data(void *options,
+                                    struct nfs_parsed_mount_data *args,
+                                    struct nfs_fh *mntfh,
+                                    const char *dev_name)
 {
        struct nfs_mount_data *data = (struct nfs_mount_data *)options;
        struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
                args->acregmax          = data->acregmax;
                args->acdirmin          = data->acdirmin;
                args->acdirmax          = data->acdirmax;
+               args->need_mount        = false;
 
                memcpy(sap, &data->addr, sizeof(data->addr));
                args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
                }
 
                break;
-       default: {
-               int status;
-
-               if (nfs_parse_mount_options((char *)options, args) == 0)
-                       return -EINVAL;
-
-               if (!nfs_verify_server_address(sap))
-                       goto out_no_address;
-
-               if (args->version == 4)
-#ifdef CONFIG_NFS_V4
-                       return nfs4_validate_text_mount_data(options,
-                                                            args, dev_name);
-#else
-                       goto out_v4_not_compiled;
-#endif
-
-               nfs_set_port(sap, &args->nfs_server.port, 0);
-
-               nfs_set_mount_transport_protocol(args);
-
-               status = nfs_parse_devname(dev_name,
-                                          &args->nfs_server.hostname,
-                                          PAGE_SIZE,
-                                          &args->nfs_server.export_path,
-                                          NFS_MAXPATHLEN);
-               if (!status)
-                       status = nfs_try_mount(args, mntfh);
-
-               kfree(args->nfs_server.export_path);
-               args->nfs_server.export_path = NULL;
-
-               if (status)
-                       return status;
-
-               break;
-               }
+       default:
+               return NFS_TEXT_DATA;
        }
 
 #ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
        return -EPROTONOSUPPORT;
 #endif /* !CONFIG_NFS_V3 */
 
-#ifndef CONFIG_NFS_V4
-out_v4_not_compiled:
-       dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
-       return -EPROTONOSUPPORT;
-#endif /* !CONFIG_NFS_V4 */
-
 out_nomem:
        dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
        return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
        return -EINVAL;
 }
 
+#ifdef CONFIG_NFS_V4
+static int nfs_validate_mount_data(struct file_system_type *fs_type,
+                                  void *options,
+                                  struct nfs_parsed_mount_data *args,
+                                  struct nfs_fh *mntfh,
+                                  const char *dev_name)
+{
+       if (fs_type == &nfs_fs_type)
+               return nfs23_validate_mount_data(options, args, mntfh, dev_name);
+       return nfs4_validate_mount_data(options, args, dev_name);
+}
+#else
+static int nfs_validate_mount_data(struct file_system_type *fs_type,
+                                  void *options,
+                                  struct nfs_parsed_mount_data *args,
+                                  struct nfs_fh *mntfh,
+                                  const char *dev_name)
+{
+       return nfs23_validate_mount_data(options, args, mntfh, dev_name);
+}
+#endif
+
+static int nfs_validate_text_mount_data(void *options,
+                                       struct nfs_parsed_mount_data *args,
+                                       const char *dev_name)
+{
+       int port = 0;
+       int max_namelen = PAGE_SIZE;
+       int max_pathlen = NFS_MAXPATHLEN;
+       struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
+
+       if (nfs_parse_mount_options((char *)options, args) == 0)
+               return -EINVAL;
+
+       if (!nfs_verify_server_address(sap))
+               goto out_no_address;
+
+       if (args->version == 4) {
+#ifdef CONFIG_NFS_V4
+               port = NFS_PORT;
+               max_namelen = NFS4_MAXNAMLEN;
+               max_pathlen = NFS4_MAXPATHLEN;
+               nfs_validate_transport_protocol(args);
+               nfs4_validate_mount_flags(args);
+#else
+               goto out_v4_not_compiled;
+#endif /* CONFIG_NFS_V4 */
+       } else
+               nfs_set_mount_transport_protocol(args);
+
+       nfs_set_port(sap, &args->nfs_server.port, port);
+
+       if (args->auth_flavor_len > 1)
+               goto out_bad_auth;
+
+       return nfs_parse_devname(dev_name,
+                                  &args->nfs_server.hostname,
+                                  max_namelen,
+                                  &args->nfs_server.export_path,
+                                  max_pathlen);
+
+#ifndef CONFIG_NFS_V4
+out_v4_not_compiled:
+       dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
+       return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V4 */
+
+out_no_address:
+       dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+       return -EINVAL;
+
+out_bad_auth:
+       dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
+       return -EINVAL;
+}
+
 static int
 nfs_compare_remount_data(struct nfs_server *nfss,
                         struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
  * Finish setting up an NFS2/3 superblock
  */
 static void nfs_fill_super(struct super_block *sb,
-                          struct nfs_parsed_mount_data *data)
+                          struct nfs_mount_info *mount_info)
 {
+       struct nfs_parsed_mount_data *data = mount_info->parsed;
        struct nfs_server *server = NFS_SB(sb);
 
        sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
  * Finish setting up a cloned NFS2/3 superblock
  */
 static void nfs_clone_super(struct super_block *sb,
-                           const struct super_block *old_sb)
+                           struct nfs_mount_info *mount_info)
 {
+       const struct super_block *old_sb = mount_info->cloned->sb;
        struct nfs_server *server = NFS_SB(sb);
 
        sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
        return nfs_compare_mount_options(sb, server, mntflags);
 }
 
+#ifdef CONFIG_NFS_FSCACHE
+static void nfs_get_cache_cookie(struct super_block *sb,
+                                struct nfs_parsed_mount_data *parsed,
+                                struct nfs_clone_mount *cloned)
+{
+       char *uniq = NULL;
+       int ulen = 0;
+
+       if (parsed && parsed->fscache_uniq) {
+               uniq = parsed->fscache_uniq;
+               ulen = strlen(parsed->fscache_uniq);
+       } else if (cloned) {
+               struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+               if (mnt_s->fscache_key) {
+                       uniq = mnt_s->fscache_key->key.uniquifier;
+                       ulen = mnt_s->fscache_key->key.uniq_len;
+               };
+       }
+
+       nfs_fscache_get_super_cookie(sb, uniq, ulen);
+}
+#else
+static void nfs_get_cache_cookie(struct super_block *sb,
+                                struct nfs_parsed_mount_data *parsed,
+                                struct nfs_clone_mount *cloned)
+{
+}
+#endif
+
 static int nfs_bdi_register(struct nfs_server *server)
 {
        return bdi_register_dev(&server->backing_dev_info, server->s_dev);
 }
 
-static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *raw_data)
+static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
+                              struct nfs_mount_info *mount_info)
+{
+       return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
+}
+
+static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
+                                struct nfs_mount_info *mount_info)
+{
+       /* clone any lsm security options from the parent to the new sb */
+       security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
+       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
+               return -ESTALE;
+       return 0;
+}
+
+static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
+                                         struct nfs_server *server,
+                                         int flags, const char *dev_name,
+                                         struct nfs_mount_info *mount_info)
 {
-       struct nfs_server *server = NULL;
        struct super_block *s;
-       struct nfs_parsed_mount_data *data;
-       struct nfs_fh *mntfh;
        struct dentry *mntroot = ERR_PTR(-ENOMEM);
        int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
        struct nfs_sb_mountdata sb_mntdata = {
                .mntflags = flags,
+               .server = server,
        };
        int error;
 
-       data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
-       mntfh = nfs_alloc_fhandle();
-       if (data == NULL || mntfh == NULL)
-               goto out;
-
-       /* Validate the mount data */
-       error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
-       if (error < 0) {
-               mntroot = ERR_PTR(error);
-               goto out;
-       }
-
-#ifdef CONFIG_NFS_V4
-       if (data->version == 4) {
-               mntroot = nfs4_try_mount(flags, dev_name, data);
-               goto out;
-       }
-#endif /* CONFIG_NFS_V4 */
-
-       /* Get a volume representation */
-       server = nfs_create_server(data, mntfh);
-       if (IS_ERR(server)) {
-               mntroot = ERR_CAST(server);
-               goto out;
-       }
-       sb_mntdata.server = server;
-
        if (server->flags & NFS_MOUNT_UNSHARED)
                compare_super = NULL;
 
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
 
        if (!s->s_root) {
                /* initial superblock/root creation */
-               nfs_fill_super(s, data);
-               nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
+               mount_info->fill_super(s, mount_info);
+               nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
        }
 
-       mntroot = nfs_get_root(s, mntfh, dev_name);
+       mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
        if (IS_ERR(mntroot))
                goto error_splat_super;
 
-       error = security_sb_set_mnt_opts(s, &data->lsm_opts);
+       error = mount_info->set_security(s, mntroot, mount_info);
        if (error)
                goto error_splat_root;
 
        s->s_flags |= MS_ACTIVE;
 
 out:
-       nfs_free_parsed_mount_data(data);
-       nfs_free_fhandle(mntfh);
        return mntroot;
 
 out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
        goto out;
 }
 
+static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *raw_data)
+{
+       struct nfs_mount_info mount_info = {
+               .fill_super = nfs_fill_super,
+               .set_security = nfs_set_sb_security,
+       };
+       struct dentry *mntroot = ERR_PTR(-ENOMEM);
+       int error;
+
+       mount_info.parsed = nfs_alloc_parsed_mount_data();
+       mount_info.mntfh = nfs_alloc_fhandle();
+       if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
+               goto out;
+
+       /* Validate the mount data */
+       error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
+       if (error == NFS_TEXT_DATA)
+               error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
+       if (error < 0) {
+               mntroot = ERR_PTR(error);
+               goto out;
+       }
+
+#ifdef CONFIG_NFS_V4
+       if (mount_info.parsed->version == 4)
+               mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
+       else
+#endif /* CONFIG_NFS_V4 */
+               mntroot = nfs_try_mount(flags, dev_name, &mount_info);
+
+out:
+       nfs_free_parsed_mount_data(mount_info.parsed);
+       nfs_free_fhandle(mount_info.mntfh);
+       return mntroot;
+}
+
 /*
  * Ensure that we unregister the bdi before kill_anon_super
  * releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
 }
 
 /*
- * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
  */
 static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
-               const char *dev_name, void *raw_data)
+nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
+               const char *dev_name, struct nfs_mount_info *mount_info)
 {
-       struct nfs_clone_mount *data = raw_data;
-       struct super_block *s;
+       struct nfs_clone_mount *data = mount_info->cloned;
        struct nfs_server *server;
-       struct dentry *mntroot;
-       int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
-       struct nfs_sb_mountdata sb_mntdata = {
-               .mntflags = flags,
-       };
+       struct dentry *mntroot = ERR_PTR(-ENOMEM);
        int error;
 
-       dprintk("--> nfs_xdev_mount()\n");
+       dprintk("--> nfs_xdev_mount_common()\n");
+
+       mount_info->mntfh = data->fh;
 
        /* create a new volume representation */
        server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
        if (IS_ERR(server)) {
                error = PTR_ERR(server);
-               goto out_err_noserver;
-       }
-       sb_mntdata.server = server;
-
-       if (server->flags & NFS_MOUNT_UNSHARED)
-               compare_super = NULL;
-
-       /* -o noac implies -o sync */
-       if (server->flags & NFS_MOUNT_NOAC)
-               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
-       /* Get a superblock - note that we may end up sharing one that already exists */
-       s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
-       if (IS_ERR(s)) {
-               error = PTR_ERR(s);
-               goto out_err_nosb;
-       }
-
-       if (s->s_fs_info != server) {
-               nfs_free_server(server);
-               server = NULL;
-       } else {
-               error = nfs_bdi_register(server);
-               if (error)
-                       goto error_splat_bdi;
-       }
-
-       if (!s->s_root) {
-               /* initial superblock/root creation */
-               nfs_clone_super(s, data->sb);
-               nfs_fscache_get_super_cookie(s, NULL, data);
-       }
-
-       mntroot = nfs_get_root(s, data->fh, dev_name);
-       if (IS_ERR(mntroot)) {
-               error = PTR_ERR(mntroot);
-               goto error_splat_super;
-       }
-       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
-               dput(mntroot);
-               error = -ESTALE;
-               goto error_splat_super;
+               goto out_err;
        }
 
-       s->s_flags |= MS_ACTIVE;
-
-       /* clone any lsm security options from the parent to the new sb */
-       security_sb_clone_mnt_opts(data->sb, s);
-
-       dprintk("<-- nfs_xdev_mount() = 0\n");
+       mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
+       dprintk("<-- nfs_xdev_mount_common() = 0\n");
+out:
        return mntroot;
 
-out_err_nosb:
-       nfs_free_server(server);
-out_err_noserver:
-       dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
-       return ERR_PTR(error);
+out_err:
+       dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
+       goto out;
+}
 
-error_splat_super:
-       if (server && !s->s_root)
-               bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
-       deactivate_locked_super(s);
-       dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
-       return ERR_PTR(error);
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+               const char *dev_name, void *raw_data)
+{
+       struct nfs_mount_info mount_info = {
+               .fill_super = nfs_clone_super,
+               .set_security = nfs_clone_sb_security,
+               .cloned   = raw_data,
+       };
+       return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
 }
 
 #ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
  * Finish setting up a cloned NFS4 superblock
  */
 static void nfs4_clone_super(struct super_block *sb,
-                           const struct super_block *old_sb)
+                            struct nfs_mount_info *mount_info)
 {
+       const struct super_block *old_sb = mount_info->cloned->sb;
        sb->s_blocksize_bits = old_sb->s_blocksize_bits;
        sb->s_blocksize = old_sb->s_blocksize;
        sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
 /*
  * Set up an NFS4 superblock
  */
-static void nfs4_fill_super(struct super_block *sb)
+static void nfs4_fill_super(struct super_block *sb,
+                           struct nfs_mount_info *mount_info)
 {
        sb->s_time_gran = 1;
        sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
                         NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
 }
 
-static int nfs4_validate_text_mount_data(void *options,
-                                        struct nfs_parsed_mount_data *args,
-                                        const char *dev_name)
-{
-       struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-
-       nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
-
-       nfs_validate_transport_protocol(args);
-
-       nfs4_validate_mount_flags(args);
-
-       if (args->version != 4) {
-               dfprintk(MOUNT,
-                        "NFS4: Illegal mount version\n");
-               return -EINVAL;
-       }
-
-       if (args->auth_flavor_len > 1) {
-               dfprintk(MOUNT,
-                        "NFS4: Too many RPC auth flavours specified\n");
-               return -EINVAL;
-       }
-
-       return nfs_parse_devname(dev_name,
-                                  &args->nfs_server.hostname,
-                                  NFS4_MAXNAMLEN,
-                                  &args->nfs_server.export_path,
-                                  NFS4_MAXPATHLEN);
-}
-
 /*
  * Validate NFSv4 mount options
  */
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
 
                break;
        default:
-               if (nfs_parse_mount_options((char *)options, args) == 0)
-                       return -EINVAL;
-
-               if (!nfs_verify_server_address(sap))
-                       return -EINVAL;
-
-               return nfs4_validate_text_mount_data(options, args, dev_name);
+               return NFS_TEXT_DATA;
        }
 
        return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
  */
 static struct dentry *
 nfs4_remote_mount(struct file_system_type *fs_type, int flags,
-                 const char *dev_name, void *raw_data)
+                 const char *dev_name, void *info)
 {
-       struct nfs_parsed_mount_data *data = raw_data;
-       struct super_block *s;
+       struct nfs_mount_info *mount_info = info;
        struct nfs_server *server;
-       struct nfs_fh *mntfh;
-       struct dentry *mntroot;
-       int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
-       struct nfs_sb_mountdata sb_mntdata = {
-               .mntflags = flags,
-       };
-       int error = -ENOMEM;
+       struct dentry *mntroot = ERR_PTR(-ENOMEM);
 
-       mntfh = nfs_alloc_fhandle();
-       if (data == NULL || mntfh == NULL)
-               goto out;
+       mount_info->fill_super = nfs4_fill_super;
+       mount_info->set_security = nfs_set_sb_security;
 
        /* Get a volume representation */
-       server = nfs4_create_server(data, mntfh);
+       server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
        if (IS_ERR(server)) {
-               error = PTR_ERR(server);
+               mntroot = ERR_CAST(server);
                goto out;
        }
-       sb_mntdata.server = server;
 
-       if (server->flags & NFS4_MOUNT_UNSHARED)
-               compare_super = NULL;
-
-       /* -o noac implies -o sync */
-       if (server->flags & NFS_MOUNT_NOAC)
-               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
-       /* Get a superblock - note that we may end up sharing one that already exists */
-       s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
-       if (IS_ERR(s)) {
-               error = PTR_ERR(s);
-               goto out_free;
-       }
-
-       if (s->s_fs_info != server) {
-               nfs_free_server(server);
-               server = NULL;
-       } else {
-               error = nfs_bdi_register(server);
-               if (error)
-                       goto error_splat_bdi;
-       }
-
-       if (!s->s_root) {
-               /* initial superblock/root creation */
-               nfs4_fill_super(s);
-               nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
-       }
-
-       mntroot = nfs4_get_root(s, mntfh, dev_name);
-       if (IS_ERR(mntroot)) {
-               error = PTR_ERR(mntroot);
-               goto error_splat_super;
-       }
-
-       error = security_sb_set_mnt_opts(s, &data->lsm_opts);
-       if (error)
-               goto error_splat_root;
-
-       s->s_flags |= MS_ACTIVE;
-
-       nfs_free_fhandle(mntfh);
-       return mntroot;
+       mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
 
 out:
-       nfs_free_fhandle(mntfh);
-       return ERR_PTR(error);
-
-out_free:
-       nfs_free_server(server);
-       goto out;
-
-error_splat_root:
-       dput(mntroot);
-error_splat_super:
-       if (server && !s->s_root)
-               bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
-       deactivate_locked_super(s);
-       goto out;
+       return mntroot;
 }
 
 static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
 }
 
 static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
-                        struct nfs_parsed_mount_data *data)
+                        struct nfs_mount_info *mount_info)
 {
        char *export_path;
        struct vfsmount *root_mnt;
        struct dentry *res;
+       struct nfs_parsed_mount_data *data = mount_info->parsed;
 
        dfprintk(MOUNT, "--> nfs4_try_mount()\n");
 
        export_path = data->nfs_server.export_path;
        data->nfs_server.export_path = "/";
-       root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
+       root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
                        data->nfs_server.hostname);
        data->nfs_server.export_path = export_path;
 
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
        return res;
 }
 
-/*
- * Get the superblock for an NFS4 mountpoint
- */
-static struct dentry *nfs4_mount(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *raw_data)
-{
-       struct nfs_parsed_mount_data *data;
-       int error = -ENOMEM;
-       struct dentry *res = ERR_PTR(-ENOMEM);
-
-       data = nfs_alloc_parsed_mount_data(4);
-       if (data == NULL)
-               goto out;
-
-       /* Validate the mount data */
-       error = nfs4_validate_mount_data(raw_data, data, dev_name);
-       if (error < 0) {
-               res = ERR_PTR(error);
-               goto out;
-       }
-
-       res = nfs4_try_mount(flags, dev_name, data);
-       if (IS_ERR(res))
-               error = PTR_ERR(res);
-
-out:
-       nfs_free_parsed_mount_data(data);
-       dprintk("<-- nfs4_mount() = %d%s\n", error,
-                       error != 0 ? " [error]" : "");
-       return res;
-}
-
 static void nfs4_kill_super(struct super_block *sb)
 {
        struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
 nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
                 const char *dev_name, void *raw_data)
 {
-       struct nfs_clone_mount *data = raw_data;
-       struct super_block *s;
-       struct nfs_server *server;
-       struct dentry *mntroot;
-       int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
-       struct nfs_sb_mountdata sb_mntdata = {
-               .mntflags = flags,
+       struct nfs_mount_info mount_info = {
+               .fill_super = nfs4_clone_super,
+               .set_security = nfs_clone_sb_security,
+               .cloned = raw_data,
        };
-       int error;
-
-       dprintk("--> nfs4_xdev_mount()\n");
-
-       /* create a new volume representation */
-       server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-       if (IS_ERR(server)) {
-               error = PTR_ERR(server);
-               goto out_err_noserver;
-       }
-       sb_mntdata.server = server;
-
-       if (server->flags & NFS4_MOUNT_UNSHARED)
-               compare_super = NULL;
-
-       /* -o noac implies -o sync */
-       if (server->flags & NFS_MOUNT_NOAC)
-               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
-       /* Get a superblock - note that we may end up sharing one that already exists */
-       s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
-       if (IS_ERR(s)) {
-               error = PTR_ERR(s);
-               goto out_err_nosb;
-       }
-
-       if (s->s_fs_info != server) {
-               nfs_free_server(server);
-               server = NULL;
-       } else {
-               error = nfs_bdi_register(server);
-               if (error)
-                       goto error_splat_bdi;
-       }
-
-       if (!s->s_root) {
-               /* initial superblock/root creation */
-               nfs4_clone_super(s, data->sb);
-               nfs_fscache_get_super_cookie(s, NULL, data);
-       }
-
-       mntroot = nfs4_get_root(s, data->fh, dev_name);
-       if (IS_ERR(mntroot)) {
-               error = PTR_ERR(mntroot);
-               goto error_splat_super;
-       }
-       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
-               dput(mntroot);
-               error = -ESTALE;
-               goto error_splat_super;
-       }
-
-       s->s_flags |= MS_ACTIVE;
-
-       security_sb_clone_mnt_opts(data->sb, s);
-
-       dprintk("<-- nfs4_xdev_mount() = 0\n");
-       return mntroot;
-
-out_err_nosb:
-       nfs_free_server(server);
-out_err_noserver:
-       dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
-       return ERR_PTR(error);
-
-error_splat_super:
-       if (server && !s->s_root)
-               bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
-       deactivate_locked_super(s);
-       dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
-       return ERR_PTR(error);
+       return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
 }
 
 static struct dentry *
 nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
                           const char *dev_name, void *raw_data)
 {
-       struct nfs_clone_mount *data = raw_data;
-       struct super_block *s;
-       struct nfs_server *server;
-       struct dentry *mntroot;
-       struct nfs_fh *mntfh;
-       int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
-       struct nfs_sb_mountdata sb_mntdata = {
-               .mntflags = flags,
+       struct nfs_mount_info mount_info = {
+               .fill_super = nfs4_fill_super,
+               .set_security = nfs_clone_sb_security,
+               .cloned = raw_data,
        };
-       int error = -ENOMEM;
+       struct nfs_server *server;
+       struct dentry *mntroot = ERR_PTR(-ENOMEM);
 
        dprintk("--> nfs4_referral_get_sb()\n");
 
-       mntfh = nfs_alloc_fhandle();
-       if (mntfh == NULL)
-               goto out_err_nofh;
+       mount_info.mntfh = nfs_alloc_fhandle();
+       if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
+               goto out;
 
        /* create a new volume representation */
-       server = nfs4_create_referral_server(data, mntfh);
+       server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
        if (IS_ERR(server)) {
-               error = PTR_ERR(server);
-               goto out_err_noserver;
-       }
-       sb_mntdata.server = server;
-
-       if (server->flags & NFS4_MOUNT_UNSHARED)
-               compare_super = NULL;
-
-       /* -o noac implies -o sync */
-       if (server->flags & NFS_MOUNT_NOAC)
-               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
-       /* Get a superblock - note that we may end up sharing one that already exists */
-       s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
-       if (IS_ERR(s)) {
-               error = PTR_ERR(s);
-               goto out_err_nosb;
-       }
-
-       if (s->s_fs_info != server) {
-               nfs_free_server(server);
-               server = NULL;
-       } else {
-               error = nfs_bdi_register(server);
-               if (error)
-                       goto error_splat_bdi;
-       }
-
-       if (!s->s_root) {
-               /* initial superblock/root creation */
-               nfs4_fill_super(s);
-               nfs_fscache_get_super_cookie(s, NULL, data);
-       }
-
-       mntroot = nfs4_get_root(s, mntfh, dev_name);
-       if (IS_ERR(mntroot)) {
-               error = PTR_ERR(mntroot);
-               goto error_splat_super;
-       }
-       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
-               dput(mntroot);
-               error = -ESTALE;
-               goto error_splat_super;
+               mntroot = ERR_CAST(server);
+               goto out;
        }
 
-       s->s_flags |= MS_ACTIVE;
-
-       security_sb_clone_mnt_opts(data->sb, s);
-
-       nfs_free_fhandle(mntfh);
-       dprintk("<-- nfs4_referral_get_sb() = 0\n");
+       mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
+out:
+       nfs_free_fhandle(mount_info.mntfh);
        return mntroot;
-
-out_err_nosb:
-       nfs_free_server(server);
-out_err_noserver:
-       nfs_free_fhandle(mntfh);
-out_err_nofh:
-       dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
-       return ERR_PTR(error);
-
-error_splat_super:
-       if (server && !s->s_root)
-               bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
-       deactivate_locked_super(s);
-       nfs_free_fhandle(mntfh);
-       dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
-       return ERR_PTR(error);
 }
 
 /*
index c07462320f6b5c41c09ba2ff054e75048951691f..e6fe3d69d14cbe0a5b75fc2cc5905c875f4c0181 100644 (file)
 /*
  * Local function declarations
  */
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
-                                 struct inode *inode, int ioflags);
 static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_partial_ops;
-static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
+static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_write_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
 {
-       struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+       struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
        if (p) {
                memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_commit_data *p)
 {
-       if (p && (p->pagevec != &p->page_array[0]))
-               kfree(p->pagevec);
        mempool_free(p, nfs_commit_mempool);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(void)
 {
-       struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+       struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
        if (p) {
+               struct nfs_pgio_header *hdr = &p->header;
+
                memset(p, 0, sizeof(*p));
-               INIT_LIST_HEAD(&p->pages);
-               p->npages = pagecount;
-               if (pagecount <= ARRAY_SIZE(p->page_array))
-                       p->pagevec = p->page_array;
-               else {
-                       p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-                       if (!p->pagevec) {
-                               mempool_free(p, nfs_wdata_mempool);
-                               p = NULL;
-                       }
-               }
+               INIT_LIST_HEAD(&hdr->pages);
+               INIT_LIST_HEAD(&hdr->rpc_list);
+               spin_lock_init(&hdr->lock);
+               atomic_set(&hdr->refcnt, 0);
        }
        return p;
 }
 
-void nfs_writedata_free(struct nfs_write_data *p)
+static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+                                                 unsigned int pagecount)
+{
+       struct nfs_write_data *data, *prealloc;
+
+       prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+       if (prealloc->header == NULL)
+               data = prealloc;
+       else
+               data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto out;
+
+       if (nfs_pgarray_set(&data->pages, pagecount)) {
+               data->header = hdr;
+               atomic_inc(&hdr->refcnt);
+       } else {
+               if (data != prealloc)
+                       kfree(data);
+               data = NULL;
+       }
+out:
+       return data;
+}
+
+void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-       if (p && (p->pagevec != &p->page_array[0]))
-               kfree(p->pagevec);
-       mempool_free(p, nfs_wdata_mempool);
+       struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+       mempool_free(whdr, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
+       struct nfs_pgio_header *hdr = wdata->header;
+       struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+
        put_nfs_open_context(wdata->args.context);
-       nfs_writedata_free(wdata);
+       if (wdata->pages.pagevec != wdata->pages.page_array)
+               kfree(wdata->pages.pagevec);
+       if (wdata != &write_header->rpc_data)
+               kfree(wdata);
+       else
+               wdata->header = NULL;
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
                struct inode *inode = page->mapping->host;
                struct nfs_server *nfss = NFS_SERVER(inode);
 
-               page_cache_get(page);
                if (atomic_long_inc_return(&nfss->writeback) >
                                NFS_CONGESTION_ON_THRESH) {
                        set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
        struct nfs_server *nfss = NFS_SERVER(inode);
 
        end_page_writeback(page);
-       page_cache_release(page);
        if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
                clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
                req = nfs_page_find_request_locked(page);
                if (req == NULL)
                        break;
-               if (nfs_lock_request_dontget(req))
+               if (nfs_lock_request(req))
                        break;
                /* Note: If we hold the page lock, as is the case in nfs_writepage,
-                *       then the call to nfs_lock_request_dontget() will always
+                *       then the call to nfs_lock_request() will always
                 *       succeed provided that someone hasn't already marked the
                 *       request as dirty (in which case we don't care).
                 */
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
        struct nfs_pageio_descriptor pgio;
        int err;
 
-       nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+       nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+                             &nfs_async_write_completion_ops);
        err = nfs_do_writepage(page, wbc, &pgio);
        nfs_pageio_complete(&pgio);
        if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-       nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
+       nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+                             &nfs_async_write_completion_ops);
        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
        nfs_pageio_complete(&pgio);
 
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
        struct nfs_inode *nfsi = NFS_I(inode);
 
        /* Lock the request! */
-       nfs_lock_request_dontget(req);
+       nfs_lock_request(req);
 
        spin_lock(&inode->i_lock);
        if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
 /**
  * nfs_request_add_commit_list - add request to a commit list
  * @req: pointer to a struct nfs_page
- * @head: commit list head
+ * @dst: commit list head
+ * @cinfo: holds list lock and accounting info
  *
- * This sets the PG_CLEAN bit, updates the inode global count of
+ * This sets the PG_CLEAN bit, updates the cinfo count of
  * number of outstanding requests requiring a commit as well as
  * the MM page stats.
  *
- * The caller must _not_ hold the inode->i_lock, but must be
+ * The caller must _not_ hold the cinfo->lock, but must be
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+                           struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
-
        set_bit(PG_CLEAN, &(req)->wb_flags);
-       spin_lock(&inode->i_lock);
-       nfs_list_add_request(req, head);
-       NFS_I(inode)->ncommit++;
-       spin_unlock(&inode->i_lock);
-       inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-       inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
-       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+       spin_lock(cinfo->lock);
+       nfs_list_add_request(req, dst);
+       cinfo->mds->ncommit++;
+       spin_unlock(cinfo->lock);
+       if (!cinfo->dreq) {
+               inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+               inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
+                            BDI_RECLAIMABLE);
+               __mark_inode_dirty(req->wb_context->dentry->d_inode,
+                                  I_DIRTY_DATASYNC);
+       }
 }
 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 
 /**
  * nfs_request_remove_commit_list - Remove request from a commit list
  * @req: pointer to a nfs_page
+ * @cinfo: holds list lock and accounting info
  *
- * This clears the PG_CLEAN bit, and updates the inode global count of
+ * This clears the PG_CLEAN bit, and updates the cinfo's count of
  * number of outstanding requests requiring a commit
  * It does not update the MM page stats.
  *
- * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ * The caller _must_ hold the cinfo->lock and the nfs_page lock.
  */
 void
-nfs_request_remove_commit_list(struct nfs_page *req)
+nfs_request_remove_commit_list(struct nfs_page *req,
+                              struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
-
        if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
                return;
        nfs_list_remove_request(req);
-       NFS_I(inode)->ncommit--;
+       cinfo->mds->ncommit--;
 }
 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
 
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+                                     struct inode *inode)
+{
+       cinfo->lock = &inode->i_lock;
+       cinfo->mds = &NFS_I(inode)->commit_info;
+       cinfo->ds = pnfs_get_ds_info(inode);
+       cinfo->dreq = NULL;
+       cinfo->completion_ops = &nfs_commit_completion_ops;
+}
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+                   struct inode *inode,
+                   struct nfs_direct_req *dreq)
+{
+       if (dreq)
+               nfs_init_cinfo_from_dreq(cinfo, dreq);
+       else
+               nfs_init_cinfo_from_inode(cinfo, inode);
+}
+EXPORT_SYMBOL_GPL(nfs_init_cinfo);
 
 /*
  * Add a request to the inode's commit list.
  */
-static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+void
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+                       struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
-
-       if (pnfs_mark_request_commit(req, lseg))
+       if (pnfs_mark_request_commit(req, lseg, cinfo))
                return;
-       nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+       nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
 }
 
 static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
 {
        if (test_bit(PG_CLEAN, &req->wb_flags)) {
                struct inode *inode = req->wb_context->dentry->d_inode;
+               struct nfs_commit_info cinfo;
 
-               if (!pnfs_clear_request_commit(req)) {
-                       spin_lock(&inode->i_lock);
-                       nfs_request_remove_commit_list(req);
-                       spin_unlock(&inode->i_lock);
+               nfs_init_cinfo_from_inode(&cinfo, inode);
+               if (!pnfs_clear_request_commit(req, &cinfo)) {
+                       spin_lock(cinfo.lock);
+                       nfs_request_remove_commit_list(req, &cinfo);
+                       spin_unlock(cinfo.lock);
                }
                nfs_clear_page_commit(req->wb_page);
        }
@@ -508,28 +560,25 @@ static inline
 int nfs_write_need_commit(struct nfs_write_data *data)
 {
        if (data->verf.committed == NFS_DATA_SYNC)
-               return data->lseg == NULL;
-       else
-               return data->verf.committed != NFS_FILE_SYNC;
+               return data->header->lseg == NULL;
+       return data->verf.committed != NFS_FILE_SYNC;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-                                 struct nfs_write_data *data)
+#else
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+                                     struct inode *inode)
 {
-       if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-               nfs_mark_request_commit(req, data->lseg);
-               return 1;
-       }
-       if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-               nfs_mark_request_dirty(req);
-               return 1;
-       }
-       return 0;
 }
-#else
-static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+                   struct inode *inode,
+                   struct nfs_direct_req *dreq)
+{
+}
+
+void
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+                       struct nfs_commit_info *cinfo)
 {
 }
 
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
        return 0;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-                                 struct nfs_write_data *data)
+#endif
+
+static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
-       return 0;
+       struct nfs_commit_info cinfo;
+       unsigned long bytes = 0;
+
+       if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+               goto out;
+       nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
+       while (!list_empty(&hdr->pages)) {
+               struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+               bytes += req->wb_bytes;
+               nfs_list_remove_request(req);
+               if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+                   (hdr->good_bytes < bytes)) {
+                       nfs_set_pageerror(req->wb_page);
+                       nfs_context_set_write_error(req->wb_context, hdr->error);
+                       goto remove_req;
+               }
+               if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+                       nfs_mark_request_dirty(req);
+                       goto next;
+               }
+               if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+                       nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+                       goto next;
+               }
+remove_req:
+               nfs_inode_remove_request(req);
+next:
+               nfs_unlock_request(req);
+               nfs_end_page_writeback(req->wb_page);
+               nfs_release_request(req);
+       }
+out:
+       hdr->release(hdr);
 }
-#endif
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static int
-nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long
+nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
-       return nfsi->ncommit > 0;
+       return cinfo->mds->ncommit;
 }
 
-/* i_lock held by caller */
-static int
-nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
-               spinlock_t *lock)
+/* cinfo->lock held by caller */
+int
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+                    struct nfs_commit_info *cinfo, int max)
 {
        struct nfs_page *req, *tmp;
        int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
        list_for_each_entry_safe(req, tmp, src, wb_list) {
                if (!nfs_lock_request(req))
                        continue;
-               if (cond_resched_lock(lock))
+               kref_get(&req->wb_kref);
+               if (cond_resched_lock(cinfo->lock))
                        list_safe_reset_next(req, tmp, wb_list);
-               nfs_request_remove_commit_list(req);
+               nfs_request_remove_commit_list(req, cinfo);
                nfs_list_add_request(req, dst);
                ret++;
-               if (ret == max)
+               if ((ret == max) && !cinfo->dreq)
                        break;
        }
        return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
 /*
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
- * @dst: destination list
+ * @dst: mds destination list
+ * @cinfo: mds and ds lists of reqs ready to commit
  *
  * Moves requests from the inode's 'commit' request list.
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
-static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst)
+int
+nfs_scan_commit(struct inode *inode, struct list_head *dst,
+               struct nfs_commit_info *cinfo)
 {
-       struct nfs_inode *nfsi = NFS_I(inode);
        int ret = 0;
 
-       spin_lock(&inode->i_lock);
-       if (nfsi->ncommit > 0) {
+       spin_lock(cinfo->lock);
+       if (cinfo->mds->ncommit > 0) {
                const int max = INT_MAX;
 
-               ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
-                               &inode->i_lock);
-               ret += pnfs_scan_commit_lists(inode, max - ret,
-                               &inode->i_lock);
+               ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
+                                          cinfo, max);
+               ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(cinfo->lock);
        return ret;
 }
 
 #else
-static inline int nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
        return 0;
 }
 
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+                   struct nfs_commit_info *cinfo)
 {
        return 0;
 }
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                    || end < req->wb_offset)
                        goto out_flushme;
 
-               if (nfs_lock_request_dontget(req))
+               if (nfs_lock_request(req))
                        break;
 
                /* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
        nfs_grow_file(page, offset, count);
        nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
        nfs_mark_request_dirty(req);
-       nfs_unlock_request(req);
+       nfs_unlock_and_release_request(req);
        return 0;
 }
 
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
  * the PageUptodate() flag. In this case, we will need to turn off
  * write optimisations that depend on the page contents being correct.
  */
-static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
+static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
 {
-       return PageUptodate(page) &&
-               !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
+       if (nfs_have_delegated_attributes(inode))
+               goto out;
+       if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+               return false;
+out:
+       return PageUptodate(page) != 0;
 }
 
 /*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
        return status;
 }
 
-static void nfs_writepage_release(struct nfs_page *req,
-                                 struct nfs_write_data *data)
-{
-       struct page *page = req->wb_page;
-
-       if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
-               nfs_inode_remove_request(req);
-       nfs_unlock_request(req);
-       nfs_end_page_writeback(page);
-}
-
 static int flush_task_priority(int how)
 {
        switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
        return RPC_PRIORITY_NORMAL;
 }
 
-int nfs_initiate_write(struct nfs_write_data *data,
-                      struct rpc_clnt *clnt,
+int nfs_initiate_write(struct rpc_clnt *clnt,
+                      struct nfs_write_data *data,
                       const struct rpc_call_ops *call_ops,
-                      int how)
+                      int how, int flags)
 {
-       struct inode *inode = data->inode;
+       struct inode *inode = data->header->inode;
        int priority = flush_task_priority(how);
        struct rpc_task *task;
        struct rpc_message msg = {
                .rpc_argp = &data->args,
                .rpc_resp = &data->res,
-               .rpc_cred = data->cred,
+               .rpc_cred = data->header->cred,
        };
        struct rpc_task_setup task_setup_data = {
                .rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
                .callback_ops = call_ops,
                .callback_data = data,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | flags,
                .priority = priority,
        };
        int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_page *req,
-               struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_write_data *data,
                unsigned int count, unsigned int offset,
-               int how)
+               int how, struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct nfs_page *req = data->header->req;
 
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
 
-       data->req = req;
-       data->inode = inode = req->wb_context->dentry->d_inode;
-       data->cred = req->wb_context->cred;
-
-       data->args.fh     = NFS_FH(inode);
+       data->args.fh     = NFS_FH(data->header->inode);
        data->args.offset = req_offset(req) + offset;
        /* pnfs_set_layoutcommit needs this */
        data->mds_offset = data->args.offset;
        data->args.pgbase = req->wb_pgbase + offset;
-       data->args.pages  = data->pagevec;
+       data->args.pages  = data->pages.pagevec;
        data->args.count  = count;
        data->args.context = get_nfs_open_context(req->wb_context);
        data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
        case 0:
                break;
        case FLUSH_COND_STABLE:
-               if (nfs_need_commit(NFS_I(inode)))
+               if (nfs_reqs_to_commit(cinfo))
                        break;
        default:
                data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
                const struct rpc_call_ops *call_ops,
                int how)
 {
-       struct inode *inode = data->args.context->dentry->d_inode;
+       struct inode *inode = data->header->inode;
 
-       return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+       return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
 }
 
 static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
        while (!list_empty(head)) {
                int ret2;
 
-               data = list_entry(head->next, struct nfs_write_data, list);
+               data = list_first_entry(head, struct nfs_write_data, list);
                list_del_init(&data->list);
                
                ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
-       struct page *page = req->wb_page;
-
        nfs_mark_request_dirty(req);
        nfs_unlock_request(req);
-       nfs_end_page_writeback(page);
+       nfs_end_page_writeback(req->wb_page);
+       nfs_release_request(req);
+}
+
+static void nfs_async_write_error(struct list_head *head)
+{
+       struct nfs_page *req;
+
+       while (!list_empty(head)) {
+               req = nfs_list_entry(head->next);
+               nfs_list_remove_request(req);
+               nfs_redirty_request(req);
+       }
+}
+
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+       .error_cleanup = nfs_async_write_error,
+       .completion = nfs_write_completion,
+};
+
+static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
+               struct nfs_pgio_header *hdr)
+{
+       set_bit(NFS_IOHDR_REDO, &hdr->flags);
+       while (!list_empty(&hdr->rpc_list)) {
+               struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
+                               struct nfs_write_data, list);
+               list_del(&data->list);
+               nfs_writedata_release(data);
+       }
+       desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 }
 
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
+                          struct nfs_pgio_header *hdr)
 {
-       struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+       struct nfs_page *req = hdr->req;
        struct page *page = req->wb_page;
        struct nfs_write_data *data;
        size_t wsize = desc->pg_bsize, nbytes;
        unsigned int offset;
        int requests = 0;
-       int ret = 0;
+       struct nfs_commit_info cinfo;
 
-       nfs_list_remove_request(req);
+       nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-           (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+           (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
             desc->pg_count > wsize))
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
        do {
                size_t len = min(nbytes, wsize);
 
-               data = nfs_writedata_alloc(1);
-               if (!data)
-                       goto out_bad;
-               data->pagevec[0] = page;
-               nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
-               list_add(&data->list, res);
+               data = nfs_writedata_alloc(hdr, 1);
+               if (!data) {
+                       nfs_flush_error(desc, hdr);
+                       return -ENOMEM;
+               }
+               data->pages.pagevec[0] = page;
+               nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
+               list_add(&data->list, &hdr->rpc_list);
                requests++;
                nbytes -= len;
                offset += len;
        } while (nbytes != 0);
-       atomic_set(&req->wb_complete, requests);
-       desc->pg_rpc_callops = &nfs_write_partial_ops;
-       return ret;
-
-out_bad:
-       while (!list_empty(res)) {
-               data = list_entry(res->next, struct nfs_write_data, list);
-               list_del(&data->list);
-               nfs_writedata_release(data);
-       }
-       nfs_redirty_request(req);
-       return -ENOMEM;
+       nfs_list_remove_request(req);
+       nfs_list_add_request(req, &hdr->pages);
+       desc->pg_rpc_callops = &nfs_write_common_ops;
+       return 0;
 }
 
 /*
@@ -1033,62 +1127,71 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
+                        struct nfs_pgio_header *hdr)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_write_data   *data;
        struct list_head *head = &desc->pg_list;
-       int ret = 0;
+       struct nfs_commit_info cinfo;
 
-       data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
-                                                     desc->pg_count));
+       data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+                                                          desc->pg_count));
        if (!data) {
-               while (!list_empty(head)) {
-                       req = nfs_list_entry(head->next);
-                       nfs_list_remove_request(req);
-                       nfs_redirty_request(req);
-               }
-               ret = -ENOMEM;
-               goto out;
+               nfs_flush_error(desc, hdr);
+               return -ENOMEM;
        }
-       pages = data->pagevec;
+
+       nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+       pages = data->pages.pagevec;
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               nfs_list_add_request(req, &data->pages);
+               nfs_list_add_request(req, &hdr->pages);
                *pages++ = req->wb_page;
        }
-       req = nfs_list_entry(data->pages.next);
 
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-           (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+           (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
        /* Set up the argument struct */
-       nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
-       list_add(&data->list, res);
-       desc->pg_rpc_callops = &nfs_write_full_ops;
-out:
-       return ret;
+       nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+       list_add(&data->list, &hdr->rpc_list);
+       desc->pg_rpc_callops = &nfs_write_common_ops;
+       return 0;
 }
 
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+                     struct nfs_pgio_header *hdr)
 {
        if (desc->pg_bsize < PAGE_CACHE_SIZE)
-               return nfs_flush_multi(desc, head);
-       return nfs_flush_one(desc, head);
+               return nfs_flush_multi(desc, hdr);
+       return nfs_flush_one(desc, hdr);
 }
 
 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-       LIST_HEAD(head);
+       struct nfs_write_header *whdr;
+       struct nfs_pgio_header *hdr;
        int ret;
 
-       ret = nfs_generic_flush(desc, &head);
+       whdr = nfs_writehdr_alloc();
+       if (!whdr) {
+               desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+               return -ENOMEM;
+       }
+       hdr = &whdr->header;
+       nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+       atomic_inc(&hdr->refcnt);
+       ret = nfs_generic_flush(desc, hdr);
        if (ret == 0)
-               ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
-                               desc->pg_ioflags);
+               ret = nfs_do_multiple_writes(&hdr->rpc_list,
+                                            desc->pg_rpc_callops,
+                                            desc->pg_ioflags);
+       if (atomic_dec_and_test(&hdr->refcnt))
+               hdr->completion_ops->completion(hdr);
        return ret;
 }
 
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
 };
 
 void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-                                 struct inode *inode, int ioflags)
+                              struct inode *inode, int ioflags,
+                              const struct nfs_pgio_completion_ops *compl_ops)
 {
-       nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+       nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
                                NFS_SERVER(inode)->wsize, ioflags);
 }
 
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-                                 struct inode *inode, int ioflags)
+void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+                          struct inode *inode, int ioflags,
+                          const struct nfs_pgio_completion_ops *compl_ops)
 {
-       if (!pnfs_pageio_init_write(pgio, inode, ioflags))
-               nfs_pageio_init_write_mds(pgio, inode, ioflags);
+       if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
+               nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
 }
 
-/*
- * Handle a write reply that flushed part of a page.
- */
-static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
+void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
-       struct nfs_write_data   *data = calldata;
-
-       dprintk("NFS: %5u write(%s/%lld %d@%lld)",
-               task->tk_pid,
-               data->req->wb_context->dentry->d_inode->i_sb->s_id,
-               (long long)
-                 NFS_FILEID(data->req->wb_context->dentry->d_inode),
-               data->req->wb_bytes, (long long)req_offset(data->req));
-
-       nfs_writeback_done(task, data);
+       struct nfs_write_data *data = calldata;
+       NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
 }
 
-static void nfs_writeback_release_partial(void *calldata)
+void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 {
-       struct nfs_write_data   *data = calldata;
-       struct nfs_page         *req = data->req;
-       struct page             *page = req->wb_page;
-       int status = data->task.tk_status;
+       struct nfs_commit_data *data = calldata;
 
-       if (status < 0) {
-               nfs_set_pageerror(page);
-               nfs_context_set_write_error(req->wb_context, status);
-               dprintk(", error = %d\n", status);
-               goto out;
-       }
-
-       if (nfs_write_need_commit(data)) {
-               struct inode *inode = page->mapping->host;
-
-               spin_lock(&inode->i_lock);
-               if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-                       /* Do nothing we need to resend the writes */
-               } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-                       memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-                       dprintk(" defer commit\n");
-               } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
-                       set_bit(PG_NEED_RESCHED, &req->wb_flags);
-                       clear_bit(PG_NEED_COMMIT, &req->wb_flags);
-                       dprintk(" server reboot detected\n");
-               }
-               spin_unlock(&inode->i_lock);
-       } else
-               dprintk(" OK\n");
-
-out:
-       if (atomic_dec_and_test(&req->wb_complete))
-               nfs_writepage_release(req, data);
-       nfs_writedata_release(calldata);
+       NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
-       struct nfs_write_data *data = calldata;
-       NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
-}
-
-static const struct rpc_call_ops nfs_write_partial_ops = {
-       .rpc_call_prepare = nfs_write_prepare,
-       .rpc_call_done = nfs_writeback_done_partial,
-       .rpc_release = nfs_writeback_release_partial,
-};
-
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
  *       writebacks since the page->count is kept > 1 for as long
  *       as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 {
        struct nfs_write_data   *data = calldata;
 
        nfs_writeback_done(task, data);
 }
 
-static void nfs_writeback_release_full(void *calldata)
+static void nfs_writeback_release_common(void *calldata)
 {
        struct nfs_write_data   *data = calldata;
+       struct nfs_pgio_header *hdr = data->header;
        int status = data->task.tk_status;
+       struct nfs_page *req = hdr->req;
 
-       /* Update attributes as result of writeback. */
-       while (!list_empty(&data->pages)) {
-               struct nfs_page *req = nfs_list_entry(data->pages.next);
-               struct page *page = req->wb_page;
-
-               nfs_list_remove_request(req);
-
-               dprintk("NFS: %5u write (%s/%lld %d@%lld)",
-                       data->task.tk_pid,
-                       req->wb_context->dentry->d_inode->i_sb->s_id,
-                       (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
-                       req->wb_bytes,
-                       (long long)req_offset(req));
-
-               if (status < 0) {
-                       nfs_set_pageerror(page);
-                       nfs_context_set_write_error(req->wb_context, status);
-                       dprintk(", error = %d\n", status);
-                       goto remove_request;
-               }
-
-               if (nfs_write_need_commit(data)) {
+       if ((status >= 0) && nfs_write_need_commit(data)) {
+               spin_lock(&hdr->lock);
+               if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
+                       ; /* Do nothing */
+               else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-                       nfs_mark_request_commit(req, data->lseg);
-                       dprintk(" marked for commit\n");
-                       goto next;
-               }
-               dprintk(" OK\n");
-remove_request:
-               nfs_inode_remove_request(req);
-       next:
-               nfs_unlock_request(req);
-               nfs_end_page_writeback(page);
+               else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+                       set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
+               spin_unlock(&hdr->lock);
        }
-       nfs_writedata_release(calldata);
+       nfs_writedata_release(data);
 }
 
-static const struct rpc_call_ops nfs_write_full_ops = {
+static const struct rpc_call_ops nfs_write_common_ops = {
        .rpc_call_prepare = nfs_write_prepare,
-       .rpc_call_done = nfs_writeback_done_full,
-       .rpc_release = nfs_writeback_release_full,
+       .rpc_call_done = nfs_writeback_done_common,
+       .rpc_release = nfs_writeback_release_common,
 };
 
 
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        struct nfs_writeargs    *argp = &data->args;
        struct nfs_writeres     *resp = &data->res;
+       struct inode            *inode = data->header->inode;
        int status;
 
        dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
         * another writer had changed the file, but some applications
         * depend on tighter cache coherency when writing.
         */
-       status = NFS_PROTO(data->inode)->write_done(task, data);
+       status = NFS_PROTO(inode)->write_done(task, data);
        if (status != 0)
                return;
-       nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+       nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
        if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
                if (time_before(complain, jiffies)) {
                        dprintk("NFS:       faulty NFS server %s:"
                                " (committed = %d) != (stable = %d)\n",
-                               NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+                               NFS_SERVER(inode)->nfs_client->cl_hostname,
                                resp->verf->committed, argp->stable);
                        complain = jiffies + 300 * HZ;
                }
        }
 #endif
-       /* Is this a short write? */
-       if (task->tk_status >= 0 && resp->count < argp->count) {
+       if (task->tk_status < 0)
+               nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
+       else if (resp->count < argp->count) {
                static unsigned long    complain;
 
-               nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+               /* This a short write! */
+               nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
 
                /* Has the server at least made some progress? */
-               if (resp->count != 0) {
-                       /* Was this an NFSv2 write or an NFSv3 stable write? */
-                       if (resp->verf->committed != NFS_UNSTABLE) {
-                               /* Resend from where the server left off */
-                               data->mds_offset += resp->count;
-                               argp->offset += resp->count;
-                               argp->pgbase += resp->count;
-                               argp->count -= resp->count;
-                       } else {
-                               /* Resend as a stable write in order to avoid
-                                * headaches in the case of a server crash.
-                                */
-                               argp->stable = NFS_FILE_SYNC;
+               if (resp->count == 0) {
+                       if (time_before(complain, jiffies)) {
+                               printk(KERN_WARNING
+                                      "NFS: Server wrote zero bytes, expected %u.\n",
+                                      argp->count);
+                               complain = jiffies + 300 * HZ;
                        }
-                       rpc_restart_call_prepare(task);
+                       nfs_set_pgio_error(data->header, -EIO, argp->offset);
+                       task->tk_status = -EIO;
                        return;
                }
-               if (time_before(complain, jiffies)) {
-                       printk(KERN_WARNING
-                              "NFS: Server wrote zero bytes, expected %u.\n",
-                                       argp->count);
-                       complain = jiffies + 300 * HZ;
+               /* Was this an NFSv2 write or an NFSv3 stable write? */
+               if (resp->verf->committed != NFS_UNSTABLE) {
+                       /* Resend from where the server left off */
+                       data->mds_offset += resp->count;
+                       argp->offset += resp->count;
+                       argp->pgbase += resp->count;
+                       argp->count -= resp->count;
+               } else {
+                       /* Resend as a stable write in order to avoid
+                        * headaches in the case of a server crash.
+                        */
+                       argp->stable = NFS_FILE_SYNC;
                }
-               /* Can't do anything about it except throw an error. */
-               task->tk_status = -EIO;
+               rpc_restart_call_prepare(task);
        }
-       return;
 }
 
 
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
        return (ret < 0) ? ret : 1;
 }
 
-void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 {
        clear_bit(NFS_INO_COMMIT, &nfsi->flags);
        smp_mb__after_clear_bit();
        wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
 }
-EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
-void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(struct nfs_commit_data *data)
 {
-       struct nfs_write_data *wdata = data;
-
-       put_nfs_open_context(wdata->args.context);
-       nfs_commit_free(wdata);
+       put_nfs_open_context(data->context);
+       nfs_commit_free(data);
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_release);
 
-int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
                        const struct rpc_call_ops *call_ops,
-                       int how)
+                       int how, int flags)
 {
        struct rpc_task *task;
        int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
                .callback_ops = call_ops,
                .callback_data = data,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | flags,
                .priority = priority,
        };
        /* Set up the initial task struct.  */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-void nfs_init_commit(struct nfs_write_data *data,
-                           struct list_head *head,
-                           struct pnfs_layout_segment *lseg)
+void nfs_init_commit(struct nfs_commit_data *data,
+                    struct list_head *head,
+                    struct pnfs_layout_segment *lseg,
+                    struct nfs_commit_info *cinfo)
 {
        struct nfs_page *first = nfs_list_entry(head->next);
        struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
        data->cred        = first->wb_context->cred;
        data->lseg        = lseg; /* reference transferred */
        data->mds_ops     = &nfs_commit_ops;
+       data->completion_ops = cinfo->completion_ops;
+       data->dreq        = cinfo->dreq;
 
        data->args.fh     = NFS_FH(data->inode);
        /* Note: we always request a commit of the entire inode */
        data->args.offset = 0;
        data->args.count  = 0;
-       data->args.context = get_nfs_open_context(first->wb_context);
-       data->res.count   = 0;
+       data->context     = get_nfs_open_context(first->wb_context);
        data->res.fattr   = &data->fattr;
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
 EXPORT_SYMBOL_GPL(nfs_init_commit);
 
 void nfs_retry_commit(struct list_head *page_list,
-                     struct pnfs_layout_segment *lseg)
+                     struct pnfs_layout_segment *lseg,
+                     struct nfs_commit_info *cinfo)
 {
        struct nfs_page *req;
 
        while (!list_empty(page_list)) {
                req = nfs_list_entry(page_list->next);
                nfs_list_remove_request(req);
-               nfs_mark_request_commit(req, lseg);
-               dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-               dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-                            BDI_RECLAIMABLE);
-               nfs_unlock_request(req);
+               nfs_mark_request_commit(req, lseg, cinfo);
+               if (!cinfo->dreq) {
+                       dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+                       dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+                                    BDI_RECLAIMABLE);
+               }
+               nfs_unlock_and_release_request(req);
        }
 }
 EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
  * Commit dirty pages
  */
 static int
-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+               struct nfs_commit_info *cinfo)
 {
-       struct nfs_write_data   *data;
+       struct nfs_commit_data  *data;
 
        data = nfs_commitdata_alloc();
 
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
                goto out_bad;
 
        /* Set up the argument struct */
-       nfs_init_commit(data, head, NULL);
-       return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
+       nfs_init_commit(data, head, NULL, cinfo);
+       atomic_inc(&cinfo->mds->rpcs_out);
+       return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
+                                  how, 0);
  out_bad:
-       nfs_retry_commit(head, NULL);
-       nfs_commit_clear_lock(NFS_I(inode));
+       nfs_retry_commit(head, NULL, cinfo);
+       cinfo->completion_ops->error_cleanup(NFS_I(inode));
        return -ENOMEM;
 }
 
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
  */
 static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
-       struct nfs_write_data   *data = calldata;
+       struct nfs_commit_data  *data = calldata;
 
         dprintk("NFS: %5u nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
        NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
-void nfs_commit_release_pages(struct nfs_write_data *data)
+static void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
        struct nfs_page *req;
        int status = data->task.tk_status;
+       struct nfs_commit_info cinfo;
 
        while (!list_empty(&data->pages)) {
                req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
                dprintk(" mismatch\n");
                nfs_mark_request_dirty(req);
        next:
-               nfs_unlock_request(req);
+               nfs_unlock_and_release_request(req);
        }
+       nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+       if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+               nfs_commit_clear_lock(NFS_I(data->inode));
 }
-EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
 
 static void nfs_commit_release(void *calldata)
 {
-       struct nfs_write_data *data = calldata;
+       struct nfs_commit_data *data = calldata;
 
-       nfs_commit_release_pages(data);
-       nfs_commit_clear_lock(NFS_I(data->inode));
+       data->completion_ops->completion(data);
        nfs_commitdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
-       .rpc_call_prepare = nfs_write_prepare,
+       .rpc_call_prepare = nfs_commit_prepare,
        .rpc_call_done = nfs_commit_done,
        .rpc_release = nfs_commit_release,
 };
 
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
+       .completion = nfs_commit_release_pages,
+       .error_cleanup = nfs_commit_clear_lock,
+};
+
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+                           int how, struct nfs_commit_info *cinfo)
+{
+       int status;
+
+       status = pnfs_commit_list(inode, head, how, cinfo);
+       if (status == PNFS_NOT_ATTEMPTED)
+               status = nfs_commit_list(inode, head, how, cinfo);
+       return status;
+}
+
 int nfs_commit_inode(struct inode *inode, int how)
 {
        LIST_HEAD(head);
+       struct nfs_commit_info cinfo;
        int may_wait = how & FLUSH_SYNC;
        int res;
 
        res = nfs_commit_set_lock(NFS_I(inode), may_wait);
        if (res <= 0)
                goto out_mark_dirty;
-       res = nfs_scan_commit(inode, &head);
+       nfs_init_cinfo_from_inode(&cinfo, inode);
+       res = nfs_scan_commit(inode, &head, &cinfo);
        if (res) {
                int error;
 
-               error = pnfs_commit_list(inode, &head, how);
-               if (error == PNFS_NOT_ATTEMPTED)
-                       error = nfs_commit_list(inode, &head, how);
+               error = nfs_generic_commit_list(inode, &head, how, &cinfo);
                if (error < 0)
                        return error;
                if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
        int ret = 0;
 
        /* no commits means nothing needs to be done */
-       if (!nfsi->ncommit)
+       if (!nfsi->commit_info.ncommit)
                return ret;
 
        if (wbc->sync_mode == WB_SYNC_NONE) {
                /* Don't commit yet if this is a non-blocking flush and there
                 * are a lot of outstanding writes for this mapping.
                 */
-               if (nfsi->ncommit <= (nfsi->npages >> 1))
+               if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
                        goto out_mark_dirty;
 
                /* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
                req = nfs_page_find_request(page);
                if (req == NULL)
                        break;
-               if (nfs_lock_request_dontget(req)) {
+               if (nfs_lock_request(req)) {
                        nfs_clear_request_commit(req);
                        nfs_inode_remove_request(req);
                        /*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
                         * page as being dirty
                         */
                        cancel_dirty_page(page, PAGE_CACHE_SIZE);
-                       nfs_unlock_request(req);
+                       nfs_unlock_and_release_request(req);
                        break;
                }
                ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
        nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-                                            sizeof(struct nfs_write_data),
+                                            sizeof(struct nfs_write_header),
                                             0, SLAB_HWCACHE_ALIGN,
                                             NULL);
        if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
        if (nfs_wdata_mempool == NULL)
                return -ENOMEM;
 
+       nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
+                                            sizeof(struct nfs_commit_data),
+                                            0, SLAB_HWCACHE_ALIGN,
+                                            NULL);
+       if (nfs_cdata_cachep == NULL)
+               return -ENOMEM;
+
        nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
                                                      nfs_wdata_cachep);
        if (nfs_commit_mempool == NULL)
index 0987146b0637a1fd1f9a4ea03fb7040b1d4314cd..af2d2fa30eee960a6b0f263f43e6b4a39ad5f245 100644 (file)
 #define NFS4_CDFC4_FORE_OR_BOTH 0x3
 #define NFS4_CDFC4_BACK_OR_BOTH 0x7
 
+#define NFS4_CDFS4_FORE 0x1
+#define NFS4_CDFS4_BACK 0x2
+#define NFS4_CDFS4_BOTH 0x3
+
 #define NFS4_SET_TO_SERVER_TIME        0
 #define NFS4_SET_TO_CLIENT_TIME        1
 
@@ -526,6 +530,13 @@ enum lock_type4 {
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+#define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+
+/* MDS threshold bitmap bits */
+#define THRESHOLD_RD                    (1UL << 0)
+#define THRESHOLD_WR                    (1UL << 1)
+#define THRESHOLD_RD_IO                 (1UL << 2)
+#define THRESHOLD_WR_IO                 (1UL << 3)
 
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
@@ -596,6 +607,8 @@ enum {
        NFSPROC4_CLNT_TEST_STATEID,
        NFSPROC4_CLNT_FREE_STATEID,
        NFSPROC4_CLNT_GETDEVICELIST,
+       NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
+       NFSPROC4_CLNT_DESTROY_CLIENTID,
 };
 
 /* nfs41 types */
index 52a1bdb4ee2bad0a668262c7b67bf8003f738095..b23cfc120edb46c5b285de63edeff61fb03242ec 100644 (file)
@@ -102,6 +102,7 @@ struct nfs_open_context {
        int error;
 
        struct list_head list;
+       struct nfs4_threshold   *mdsthreshold;
 };
 
 struct nfs_open_dir_context {
@@ -179,8 +180,7 @@ struct nfs_inode {
        __be32                  cookieverf[2];
 
        unsigned long           npages;
-       unsigned long           ncommit;
-       struct list_head        commit_list;
+       struct nfs_mds_commit_info commit_info;
 
        /* Open contexts for shared mmap writes */
        struct list_head        open_files;
@@ -201,8 +201,10 @@ struct nfs_inode {
 
        /* pNFS layout information */
        struct pnfs_layout_hdr *layout;
-       atomic_t                commits_outstanding;
 #endif /* CONFIG_NFS_V4*/
+       /* how many bytes have been written/read and how many bytes queued up */
+       __u64 write_io;
+       __u64 read_io;
 #ifdef CONFIG_NFS_FSCACHE
        struct fscache_cookie   *fscache;
 #endif
@@ -230,7 +232,6 @@ struct nfs_inode {
 #define NFS_INO_FSCACHE                (5)             /* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK   (6)             /* FS-Cache cookie management lock */
 #define NFS_INO_COMMIT         (7)             /* inode is committing unstable writes */
-#define NFS_INO_PNFS_COMMIT    (8)             /* use pnfs code for commit */
 #define NFS_INO_LAYOUTCOMMIT   (9)             /* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)          /* layoutcommit inflight */
 
@@ -317,11 +318,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
        return NFS_SERVER(inode)->caps & cap;
 }
 
-static inline int NFS_USE_READDIRPLUS(struct inode *inode)
-{
-       return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-}
-
 static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
 {
        dentry->d_time = verf;
@@ -552,8 +548,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_write_data *nfs_commitdata_alloc(void);
-extern void nfs_commit_free(struct nfs_write_data *wdata);
+extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern void nfs_commit_free(struct nfs_commit_data *data);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
@@ -568,12 +564,6 @@ nfs_have_writebacks(struct inode *inode)
        return NFS_I(inode)->npages != 0;
 }
 
-/*
- * Allocate nfs_write_data structures
- */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
-extern void nfs_writedata_free(struct nfs_write_data *);
-
 /*
  * linux/fs/nfs/read.c
  */
@@ -584,12 +574,6 @@ extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
                               struct page *);
 
-/*
- * Allocate nfs_read_data structures
- */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
-extern void nfs_readdata_free(struct nfs_read_data *);
-
 /*
  * linux/fs/nfs3proc.c
  */
@@ -654,6 +638,7 @@ nfs_fileid_to_ino_t(u64 fileid)
 #define NFSDBG_FSCACHE         0x0800
 #define NFSDBG_PNFS            0x1000
 #define NFSDBG_PNFS_LD         0x2000
+#define NFSDBG_STATE           0x4000
 #define NFSDBG_ALL             0xFFFF
 
 #ifdef __KERNEL__
index 7073fc74481cb6e1d69b0278e26c87c52cbc349e..fbb78fb09bd25c925d65207643bf61da614167d8 100644 (file)
@@ -17,7 +17,7 @@ struct nfs4_sequence_args;
 struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
-struct server_scope;
+struct nfs41_server_scope;
 struct nfs41_impl_id;
 
 /*
@@ -35,6 +35,9 @@ struct nfs_client {
 #define NFS_CS_RENEWD          3               /* - renewd started */
 #define NFS_CS_STOP_RENEW      4               /* no more state to renew */
 #define NFS_CS_CHECK_LEASE_TIME        5               /* need to check lease time */
+       unsigned long           cl_flags;       /* behavior switches */
+#define NFS_CS_NORESVPORT      0               /* - use ephemeral src port */
+#define NFS_CS_DISCRTRY                1               /* - disconnect on RPC retry */
        struct sockaddr_storage cl_addr;        /* server identifier */
        size_t                  cl_addrlen;
        char *                  cl_hostname;    /* hostname of server */
@@ -61,9 +64,6 @@ struct nfs_client {
 
        struct rpc_wait_queue   cl_rpcwaitq;
 
-       /* used for the setclientid verifier */
-       struct timespec         cl_boot_time;
-
        /* idmapper */
        struct idmap *          cl_idmap;
 
@@ -79,16 +79,17 @@ struct nfs_client {
        u32                     cl_seqid;
        /* The flags used for obtaining the clientid during EXCHANGE_ID */
        u32                     cl_exchange_flags;
-       struct nfs4_session     *cl_session;    /* sharred session */
+       struct nfs4_session     *cl_session;    /* shared session */
+       struct nfs41_server_owner *cl_serverowner;
+       struct nfs41_server_scope *cl_serverscope;
+       struct nfs41_impl_id    *cl_implid;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
        struct fscache_cookie   *fscache;       /* client index cache cookie */
 #endif
 
-       struct server_scope     *server_scope;  /* from exchange_id */
-       struct nfs41_impl_id    *impl_id;       /* from exchange_id */
-       struct net              *net;
+       struct net              *cl_net;
 };
 
 /*
index eac30d6bec17c78db77a050e269ae0336e00e372..88d166b555e8539fa8ec5cb896c56ecea7417bdc 100644 (file)
@@ -27,7 +27,6 @@ enum {
        PG_CLEAN,
        PG_NEED_COMMIT,
        PG_NEED_RESCHED,
-       PG_PARTIAL_READ_FAILED,
        PG_COMMIT_TO_DS,
 };
 
@@ -37,7 +36,6 @@ struct nfs_page {
        struct page             *wb_page;       /* page to read in/write out */
        struct nfs_open_context *wb_context;    /* File state context info */
        struct nfs_lock_context *wb_lock_context;       /* lock context info */
-       atomic_t                wb_complete;    /* i/os we're waiting for */
        pgoff_t                 wb_index;       /* Offset >> PAGE_CACHE_SHIFT */
        unsigned int            wb_offset,      /* Offset & ~PAGE_CACHE_MASK */
                                wb_pgbase,      /* Start of page data */
@@ -68,7 +66,9 @@ struct nfs_pageio_descriptor {
        int                     pg_ioflags;
        int                     pg_error;
        const struct rpc_call_ops *pg_rpc_callops;
+       const struct nfs_pgio_completion_ops *pg_completion_ops;
        struct pnfs_layout_segment *pg_lseg;
+       struct nfs_direct_req   *pg_dreq;
 };
 
 #define NFS_WBACK_BUSY(req)    (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -84,6 +84,7 @@ extern        void nfs_release_request(struct nfs_page *req);
 extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                             struct inode *inode,
                             const struct nfs_pageio_ops *pg_ops,
+                            const struct nfs_pgio_completion_ops *compl_ops,
                             size_t bsize,
                             int how);
 extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -95,26 +96,17 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
                                struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern void nfs_unlock_request(struct nfs_page *req);
+extern void nfs_unlock_and_release_request(struct nfs_page *req);
 
 /*
- * Lock the page of an asynchronous request without getting a new reference
+ * Lock the page of an asynchronous request
  */
-static inline int
-nfs_lock_request_dontget(struct nfs_page *req)
-{
-       return !test_and_set_bit(PG_BUSY, &req->wb_flags);
-}
-
 static inline int
 nfs_lock_request(struct nfs_page *req)
 {
-       if (test_and_set_bit(PG_BUSY, &req->wb_flags))
-               return 0;
-       kref_get(&req->wb_kref);
-       return 1;
+       return !test_and_set_bit(PG_BUSY, &req->wb_flags);
 }
 
-
 /**
  * nfs_list_add_request - Insert a request into a list
  * @req: request
index 7ba3551a0414a867cffe38e52cc720004e32592e..d1a7bf51c326dc7f103aae60874a667f3307b373 100644 (file)
@@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
        return a->major == b->major && a->minor == b->minor;
 }
 
+struct nfs4_threshold {
+       __u32   bm;
+       __u32   l_type;
+       __u64   rd_sz;
+       __u64   wr_sz;
+       __u64   rd_io_sz;
+       __u64   wr_io_sz;
+};
+
 struct nfs_fattr {
        unsigned int            valid;          /* which fields are valid */
        umode_t                 mode;
@@ -67,6 +76,7 @@ struct nfs_fattr {
        unsigned long           gencount;
        struct nfs4_string      *owner_name;
        struct nfs4_string      *group_name;
+       struct nfs4_threshold   *mdsthreshold;  /* pNFS threshold hints */
 };
 
 #define NFS_ATTR_FATTR_TYPE            (1U << 0)
@@ -106,14 +116,14 @@ struct nfs_fattr {
                | NFS_ATTR_FATTR_FILEID \
                | NFS_ATTR_FATTR_ATIME \
                | NFS_ATTR_FATTR_MTIME \
-               | NFS_ATTR_FATTR_CTIME)
+               | NFS_ATTR_FATTR_CTIME \
+               | NFS_ATTR_FATTR_CHANGE)
 #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
                | NFS_ATTR_FATTR_BLOCKS_USED)
 #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
                | NFS_ATTR_FATTR_SPACE_USED)
 #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
-               | NFS_ATTR_FATTR_SPACE_USED \
-               | NFS_ATTR_FATTR_CHANGE)
+               | NFS_ATTR_FATTR_SPACE_USED)
 
 /*
  * Info on the file system
@@ -338,7 +348,6 @@ struct nfs_openargs {
        const struct qstr *     name;
        const struct nfs_server *server;         /* Needed for ID mapping */
        const u32 *             bitmask;
-       const u32 *             dir_bitmask;
        __u32                   claim;
        struct nfs4_sequence_args       seq_args;
 };
@@ -349,7 +358,6 @@ struct nfs_openres {
        struct nfs4_change_info cinfo;
        __u32                   rflags;
        struct nfs_fattr *      f_attr;
-       struct nfs_fattr *      dir_attr;
        struct nfs_seqid *      seqid;
        const struct nfs_server *server;
        fmode_t                 delegation_type;
@@ -518,13 +526,30 @@ struct nfs_writeres {
        struct nfs4_sequence_res        seq_res;
 };
 
+/*
+ * Arguments to the commit call.
+ */
+struct nfs_commitargs {
+       struct nfs_fh           *fh;
+       __u64                   offset;
+       __u32                   count;
+       const u32               *bitmask;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs_commitres {
+       struct nfs_fattr        *fattr;
+       struct nfs_writeverf    *verf;
+       const struct nfs_server *server;
+       struct nfs4_sequence_res        seq_res;
+};
+
 /*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
        const struct nfs_fh     *fh;
        struct qstr             name;
-       const u32 *             bitmask;
        struct nfs4_sequence_args       seq_args;
 };
 
@@ -543,7 +568,6 @@ struct nfs_renameargs {
        const struct nfs_fh             *new_dir;
        const struct qstr               *old_name;
        const struct qstr               *new_name;
-       const u32                       *bitmask;
        struct nfs4_sequence_args       seq_args;
 };
 
@@ -839,7 +863,6 @@ struct nfs4_create_res {
        struct nfs_fh *                 fh;
        struct nfs_fattr *              fattr;
        struct nfs4_change_info         dir_cinfo;
-       struct nfs_fattr *              dir_fattr;
        struct nfs4_sequence_res        seq_res;
 };
 
@@ -1061,6 +1084,21 @@ struct nfstime4 {
 };
 
 #ifdef CONFIG_NFS_V4_1
+
+struct pnfs_commit_bucket {
+       struct list_head written;
+       struct list_head committing;
+       struct pnfs_layout_segment *wlseg;
+       struct pnfs_layout_segment *clseg;
+};
+
+struct pnfs_ds_commit_info {
+       int nwritten;
+       int ncommitting;
+       int nbuckets;
+       struct pnfs_commit_bucket *buckets;
+};
+
 #define NFS4_EXCHANGE_ID_LEN   (48)
 struct nfs41_exchange_id_args {
        struct nfs_client               *client;
@@ -1070,13 +1108,13 @@ struct nfs41_exchange_id_args {
        u32                             flags;
 };
 
-struct server_owner {
+struct nfs41_server_owner {
        uint64_t                        minor_id;
        uint32_t                        major_id_sz;
        char                            major_id[NFS4_OPAQUE_LIMIT];
 };
 
-struct server_scope {
+struct nfs41_server_scope {
        uint32_t                        server_scope_sz;
        char                            server_scope[NFS4_OPAQUE_LIMIT];
 };
@@ -1087,10 +1125,18 @@ struct nfs41_impl_id {
        struct nfstime4                 date;
 };
 
+struct nfs41_bind_conn_to_session_res {
+       struct nfs4_session             *session;
+       u32                             dir;
+       bool                            use_conn_in_rdma_mode;
+};
+
 struct nfs41_exchange_id_res {
-       struct nfs_client               *client;
+       u64                             clientid;
+       u32                             seqid;
        u32                             flags;
-       struct server_scope             *server_scope;
+       struct nfs41_server_owner       *server_owner;
+       struct nfs41_server_scope       *server_scope;
        struct nfs41_impl_id            *impl_id;
 };
 
@@ -1143,35 +1189,114 @@ struct nfs41_free_stateid_res {
        struct nfs4_sequence_res        seq_res;
 };
 
+#else
+
+struct pnfs_ds_commit_info {
+};
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE       (8U)
 
+struct nfs_page_array {
+       struct page             **pagevec;
+       unsigned int            npages;         /* Max length of pagevec */
+       struct page             *page_array[NFS_PAGEVEC_SIZE];
+};
+
 struct nfs_read_data {
+       struct nfs_pgio_header  *header;
+       struct list_head        list;
        struct rpc_task         task;
-       struct inode            *inode;
-       struct rpc_cred         *cred;
        struct nfs_fattr        fattr;  /* fattr storage */
-       struct list_head        pages;  /* Coalesced read requests */
-       struct list_head        list;   /* lists of struct nfs_read_data */
-       struct nfs_page         *req;   /* multi ops per nfs_page */
-       struct page             **pagevec;
-       unsigned int            npages; /* Max length of pagevec */
        struct nfs_readargs args;
        struct nfs_readres  res;
        unsigned long           timestamp;      /* For lease renewal */
-       struct pnfs_layout_segment *lseg;
-       struct nfs_client       *ds_clp;        /* pNFS data server */
-       const struct rpc_call_ops *mds_ops;
        int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
        __u64                   mds_offset;
+       struct nfs_page_array   pages;
+       struct nfs_client       *ds_clp;        /* pNFS data server */
+};
+
+/* used as flag bits in nfs_pgio_header */
+enum {
+       NFS_IOHDR_ERROR = 0,
+       NFS_IOHDR_EOF,
+       NFS_IOHDR_REDO,
+       NFS_IOHDR_NEED_COMMIT,
+       NFS_IOHDR_NEED_RESCHED,
+};
+
+struct nfs_pgio_header {
+       struct inode            *inode;
+       struct rpc_cred         *cred;
+       struct list_head        pages;
+       struct list_head        rpc_list;
+       atomic_t                refcnt;
+       struct nfs_page         *req;
+       struct pnfs_layout_segment *lseg;
+       loff_t                  io_start;
+       const struct rpc_call_ops *mds_ops;
+       void (*release) (struct nfs_pgio_header *hdr);
+       const struct nfs_pgio_completion_ops *completion_ops;
+       struct nfs_direct_req   *dreq;
+       spinlock_t              lock;
+       /* fields protected by lock */
        int                     pnfs_error;
-       struct page             *page_array[NFS_PAGEVEC_SIZE];
+       int                     error;          /* merge with pnfs_error */
+       unsigned long           good_bytes;     /* boundary of good data */
+       unsigned long           flags;
+};
+
+struct nfs_read_header {
+       struct nfs_pgio_header  header;
+       struct nfs_read_data    rpc_data;
 };
 
 struct nfs_write_data {
+       struct nfs_pgio_header  *header;
+       struct list_head        list;
+       struct rpc_task         task;
+       struct nfs_fattr        fattr;
+       struct nfs_writeverf    verf;
+       struct nfs_writeargs    args;           /* argument struct */
+       struct nfs_writeres     res;            /* result struct */
+       unsigned long           timestamp;      /* For lease renewal */
+       int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
+       __u64                   mds_offset;     /* Filelayout dense stripe */
+       struct nfs_page_array   pages;
+       struct nfs_client       *ds_clp;        /* pNFS data server */
+};
+
+struct nfs_write_header {
+       struct nfs_pgio_header  header;
+       struct nfs_write_data   rpc_data;
+};
+
+struct nfs_mds_commit_info {
+       atomic_t rpcs_out;
+       unsigned long           ncommit;
+       struct list_head        list;
+};
+
+struct nfs_commit_data;
+struct nfs_inode;
+struct nfs_commit_completion_ops {
+       void (*error_cleanup) (struct nfs_inode *nfsi);
+       void (*completion) (struct nfs_commit_data *data);
+};
+
+struct nfs_commit_info {
+       spinlock_t                      *lock;
+       struct nfs_mds_commit_info      *mds;
+       struct pnfs_ds_commit_info      *ds;
+       struct nfs_direct_req           *dreq;  /* O_DIRECT request */
+       const struct nfs_commit_completion_ops *completion_ops;
+};
+
+struct nfs_commit_data {
        struct rpc_task         task;
        struct inode            *inode;
        struct rpc_cred         *cred;
@@ -1179,22 +1304,22 @@ struct nfs_write_data {
        struct nfs_writeverf    verf;
        struct list_head        pages;          /* Coalesced requests we wish to flush */
        struct list_head        list;           /* lists of struct nfs_write_data */
-       struct nfs_page         *req;           /* multi ops per nfs_page */
-       struct page             **pagevec;
-       unsigned int            npages;         /* Max length of pagevec */
-       struct nfs_writeargs    args;           /* argument struct */
-       struct nfs_writeres     res;            /* result struct */
+       struct nfs_direct_req   *dreq;          /* O_DIRECT request */
+       struct nfs_commitargs   args;           /* argument struct */
+       struct nfs_commitres    res;            /* result struct */
+       struct nfs_open_context *context;
        struct pnfs_layout_segment *lseg;
        struct nfs_client       *ds_clp;        /* pNFS data server */
        int                     ds_commit_index;
        const struct rpc_call_ops *mds_ops;
-       int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-#ifdef CONFIG_NFS_V4
-       unsigned long           timestamp;      /* For lease renewal */
-#endif
-       __u64                   mds_offset;     /* Filelayout dense stripe */
-       int                     pnfs_error;
-       struct page             *page_array[NFS_PAGEVEC_SIZE];
+       const struct nfs_commit_completion_ops *completion_ops;
+       int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
+};
+
+struct nfs_pgio_completion_ops {
+       void    (*error_cleanup)(struct list_head *head);
+       void    (*init_hdr)(struct nfs_pgio_header *hdr);
+       void    (*completion)(struct nfs_pgio_header *hdr);
 };
 
 struct nfs_unlinkdata {
@@ -1234,11 +1359,13 @@ struct nfs_rpc_ops {
 
        int     (*getroot) (struct nfs_server *, struct nfs_fh *,
                            struct nfs_fsinfo *);
+       struct vfsmount *(*submount) (struct nfs_server *, struct dentry *,
+                                     struct nfs_fh *, struct nfs_fattr *);
        int     (*getattr) (struct nfs_server *, struct nfs_fh *,
                            struct nfs_fattr *);
        int     (*setattr) (struct dentry *, struct nfs_fattr *,
                            struct iattr *);
-       int     (*lookup)  (struct rpc_clnt *clnt, struct inode *, struct qstr *,
+       int     (*lookup)  (struct inode *, struct qstr *,
                            struct nfs_fh *, struct nfs_fattr *);
        int     (*access)  (struct inode *, struct nfs_access_entry *);
        int     (*readlink)(struct inode *, struct page *, unsigned int,
@@ -1277,8 +1404,9 @@ struct nfs_rpc_ops {
        void    (*write_setup)  (struct nfs_write_data *, struct rpc_message *);
        void    (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
        int     (*write_done)  (struct rpc_task *, struct nfs_write_data *);
-       void    (*commit_setup) (struct nfs_write_data *, struct rpc_message *);
-       int     (*commit_done) (struct rpc_task *, struct nfs_write_data *);
+       void    (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
+       void    (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
+       int     (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
        int     (*lock)(struct file *, int, struct file_lock *);
        int     (*lock_check_bounds)(const struct file_lock *);
        void    (*clear_acl_cache)(struct inode *);
@@ -1287,9 +1415,9 @@ struct nfs_rpc_ops {
                                struct nfs_open_context *ctx,
                                int open_flags,
                                struct iattr *iattr);
-       int     (*init_client) (struct nfs_client *, const struct rpc_timeout *,
-                               const char *, rpc_authflavor_t, int);
-       int     (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
+       struct nfs_client *
+               (*init_client) (struct nfs_client *, const struct rpc_timeout *,
+                               const char *, rpc_authflavor_t);
 };
 
 /*
index 7fee13b331d193e1a46831c257ffd6fcf6a4fde8..f56f045778aedf4a0da1fcf2566eacf69c7c6c8a 100644 (file)
@@ -1286,6 +1286,8 @@ call_reserveresult(struct rpc_task *task)
        }
 
        switch (status) {
+       case -ENOMEM:
+               rpc_delay(task, HZ >> 2);
        case -EAGAIN:   /* woken up; retry */
                task->tk_action = call_reserve;
                return;
index fd2423991c2d4dc473223b128d9a761b3da5beea..04040476082e6efd5ef08f9c7e6444c0fec77929 100644 (file)
@@ -120,7 +120,7 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall);
 
 /**
  * rpc_queue_upcall - queue an upcall message to userspace
- * @inode: inode of upcall pipe on which to queue given message
+ * @pipe: upcall pipe on which to queue given message
  * @msg: message to queue
  *
  * Call with an @inode created by rpc_mkpipe() to queue an upcall.
@@ -819,9 +819,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
  * @parent: dentry of directory to create new "pipe" in
  * @name: name of pipe
  * @private: private data to associate with the pipe, for the caller's use
- * @ops: operations defining the behavior of the pipe: upcall, downcall,
- *     release_pipe, open_pipe, and destroy_msg.
- * @flags: rpc_pipe flags
+ * @pipe: &rpc_pipe containing input parameters
  *
  * Data is made available for userspace to read by calls to
  * rpc_queue_upcall().  The actual reads will result in calls to
@@ -943,7 +941,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
 
 /**
  * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
- * @clnt: rpc client
+ * @dentry: dentry for the pipe
  */
 int rpc_remove_client_dir(struct dentry *dentry)
 {
@@ -1115,7 +1113,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_op = &s_ops;
        sb->s_time_gran = 1;
 
-       inode = rpc_get_inode(sb, S_IFDIR | 0755);
+       inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
        sb->s_root = root = d_make_root(inode);
        if (!root)
                return -ENOMEM;
index 78ac39fd9fe7556a71dc361406904cf8bb32d7b2..3c0653439f3dc398031301d53819c3b6e78bc4ef 100644 (file)
@@ -394,6 +394,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
 
 /**
  * rpcb_register - set or unset a port registration with the local rpcbind svc
+ * @net: target network namespace
  * @prog: RPC program number to bind
  * @vers: RPC version number to bind
  * @prot: transport protocol to register
@@ -521,6 +522,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
 
 /**
  * rpcb_v4_register - set or unset a port registration with the local rpcbind
+ * @net: target network namespace
  * @program: RPC program number of service to (un)register
  * @version: RPC version number of service to (un)register
  * @address: address family, IP address, and port to (un)register
index 6fe2dcead15027e6a5d4b7086121b319d8e05363..3c83035cdaa9940849fbfb1c729a6a2720b32702 100644 (file)
@@ -979,20 +979,21 @@ static void xprt_alloc_slot(struct rpc_task *task)
                list_del(&req->rq_list);
                goto out_init_req;
        }
-       req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
+       req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
        if (!IS_ERR(req))
                goto out_init_req;
        switch (PTR_ERR(req)) {
        case -ENOMEM:
-               rpc_delay(task, HZ >> 2);
                dprintk("RPC:       dynamic allocation of request slot "
                                "failed! Retrying\n");
+               task->tk_status = -ENOMEM;
                break;
        case -EAGAIN:
                rpc_sleep_on(&xprt->backlog, task, NULL);
                dprintk("RPC:       waiting for request slot\n");
+       default:
+               task->tk_status = -EAGAIN;
        }
-       task->tk_status = -EAGAIN;
        return;
 out_init_req:
        task->tk_status = 0;