]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'pnfs_generic'
authorTrond Myklebust <trond.myklebust@primarydata.com>
Mon, 4 Jan 2016 18:19:55 +0000 (13:19 -0500)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Mon, 4 Jan 2016 18:19:55 +0000 (13:19 -0500)
* pnfs_generic:
  NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
  NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
  NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
  NFSv4.1/pNFS: Fix a race in initiate_file_draining()
  NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
  NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
  NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
  NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
  NFS: Relax requirements in nfs_flush_incompatible
  NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
  NFS: Allow multiple commit requests in flight per file
  NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
  NFSv4: List stateid information in the callback tracepoints
  NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL
  NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1
  pNFS: If we have to delay the layout callback, mark the layout for return
  NFSv4.1/pNFS: Add a helper to mark the layout as returned
  pNFS: Ensure nfs4_layoutget_prepare returns the correct error

1  2 
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/internal.h
fs/nfs/nfs4proc.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/nfs_xdr.h

diff --combined fs/nfs/direct.c
index 2e7142bcb4c83922748385c2d87771223e7cdc53,a9a93927fe3e31af8228bc9bd2dca8e6c41be047..7ab7ec9f4eed8a0212d2079d7c2c79cb331d7a20
@@@ -117,12 -117,6 +117,6 @@@ static inline int put_dreq(struct nfs_d
        return atomic_dec_and_test(&dreq->io_count);
  }
  
- void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
- {
-       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- }
- EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
  static void
  nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
  {
@@@ -670,10 -664,6 +664,10 @@@ static void nfs_direct_write_reschedule
  
        req = nfs_list_entry(reqs.next);
        nfs_direct_setup_mirroring(dreq, &desc, req);
 +      if (desc.pg_error < 0) {
 +              list_splice_init(&reqs, &failed);
 +              goto out_failed;
 +      }
  
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
                if (!nfs_pageio_add_request(&desc, req)) {
                        nfs_list_add_request(req, &failed);
                        spin_lock(cinfo.lock);
                        dreq->flags = 0;
 -                      dreq->error = -EIO;
 +                      if (desc.pg_error < 0)
 +                              dreq->error = desc.pg_error;
 +                      else
 +                              dreq->error = -EIO;
                        spin_unlock(cinfo.lock);
                }
                nfs_release_request(req);
        }
        nfs_pageio_complete(&desc);
  
 +out_failed:
        while (!list_empty(&failed)) {
                req = nfs_list_entry(failed.next);
                nfs_list_remove_request(req);
@@@ -735,14 -721,20 +729,20 @@@ static void nfs_direct_commit_complete(
                nfs_direct_write_complete(dreq, data->inode);
  }
  
- static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
+               struct nfs_page *req)
  {
-       /* There is no lock to clear */
+       struct nfs_direct_req *dreq = cinfo->dreq;
+       spin_lock(&dreq->lock);
+       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+       spin_unlock(&dreq->lock);
+       nfs_mark_request_commit(req, NULL, cinfo, 0);
  }
  
  static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
        .completion = nfs_direct_commit_complete,
-       .error_cleanup = nfs_direct_error_cleanup,
+       .resched_write = nfs_direct_resched_write,
  };
  
  static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@@ -847,10 -839,25 +847,25 @@@ static void nfs_write_sync_pgio_error(s
        }
  }
  
+ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+ {
+       struct nfs_direct_req *dreq = hdr->dreq;
+       spin_lock(&dreq->lock);
+       if (dreq->error == 0) {
+               dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+               /* fake unstable write to let common nfs resend pages */
+               hdr->verf.committed = NFS_UNSTABLE;
+               hdr->good_bytes = hdr->args.count;
+       }
+       spin_unlock(&dreq->lock);
+ }
  static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
        .error_cleanup = nfs_write_sync_pgio_error,
        .init_hdr = nfs_direct_pgio_init,
        .completion = nfs_direct_write_completion,
+       .reschedule_io = nfs_direct_write_reschedule_io,
  };
  
  
@@@ -908,11 -915,6 +923,11 @@@ static ssize_t nfs_direct_write_schedul
                        }
  
                        nfs_direct_setup_mirroring(dreq, &desc, req);
 +                      if (desc.pg_error < 0) {
 +                              nfs_free_request(req);
 +                              result = desc.pg_error;
 +                              break;
 +                      }
  
                        nfs_lock_request(req);
                        req->wb_index = pos >> PAGE_SHIFT;
diff --combined fs/nfs/file.c
index f188dd071dfcfb9fc3b3e05053aaed172b69e217,e6ef80ec699c72d9d1f95e048bd612d2c90bcefa..178ec8da028f3cada7ad1a08302c00ac10d08d6d
@@@ -514,7 -514,7 +514,7 @@@ static void nfs_check_dirty_writeback(s
         * so it will not block due to pages that will shortly be freeable.
         */
        nfsi = NFS_I(mapping->host);
-       if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+       if (atomic_read(&nfsi->commit_info.rpcs_out)) {
                *writeback = true;
                return;
        }
@@@ -545,7 -545,7 +545,7 @@@ static int nfs_launder_page(struct pag
                inode->i_ino, (long long)page_offset(page));
  
        nfs_fscache_wait_on_page_write(nfsi, page);
 -      return nfs_wb_page(inode, page);
 +      return nfs_wb_launder_page(inode, page);
  }
  
  static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
index 2981cd190bfd4f6cdf9c58371c9f4275d276b1f1,df475d42df7765852b245378c400b5de5db710f8..18c329b84ffbb2323111f5123d504e1fb22e00d5
@@@ -505,17 -505,9 +505,17 @@@ ff_layout_alloc_lseg(struct pnfs_layout
        }
  
        p = xdr_inline_decode(&stream, 4);
 -      if (p)
 -              fls->flags = be32_to_cpup(p);
 +      if (!p)
 +              goto out_sort_mirrors;
 +      fls->flags = be32_to_cpup(p);
 +
 +      p = xdr_inline_decode(&stream, 4);
 +      if (!p)
 +              goto out_sort_mirrors;
 +      for (i=0; i < fls->mirror_array_cnt; i++)
 +              fls->mirror_array[i]->report_interval = be32_to_cpup(p);
  
 +out_sort_mirrors:
        ff_layout_sort_mirrors(fls);
        rc = ff_layout_check_layout(lgr);
        if (rc)
@@@ -611,9 -603,7 +611,9 @@@ nfs4_ff_layoutstat_start_io(struct nfs4
                mirror->start_time = now;
        if (ktime_equal(mirror->last_report_time, notime))
                mirror->last_report_time = now;
 -      if (layoutstats_timer != 0)
 +      if (mirror->report_interval != 0)
 +              report_interval = (s64)mirror->report_interval * 1000LL;
 +      else if (layoutstats_timer != 0)
                report_interval = (s64)layoutstats_timer * 1000LL;
        if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
                        report_interval) {
@@@ -795,19 -785,13 +795,19 @@@ ff_layout_pg_init_read(struct nfs_pagei
        int ds_idx;
  
        /* Use full layout for now */
 -      if (!pgio->pg_lseg)
 +      if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   req->wb_context,
                                                   0,
                                                   NFS4_MAX_UINT64,
                                                   IOMODE_READ,
                                                   GFP_KERNEL);
 +              if (IS_ERR(pgio->pg_lseg)) {
 +                      pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 +                      pgio->pg_lseg = NULL;
 +                      return;
 +              }
 +      }
        /* If no lseg, fall back to read through mds */
        if (pgio->pg_lseg == NULL)
                goto out_mds;
@@@ -841,19 -825,13 +841,19 @@@ ff_layout_pg_init_write(struct nfs_page
        int i;
        int status;
  
 -      if (!pgio->pg_lseg)
 +      if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   req->wb_context,
                                                   0,
                                                   NFS4_MAX_UINT64,
                                                   IOMODE_RW,
                                                   GFP_NOFS);
 +              if (IS_ERR(pgio->pg_lseg)) {
 +                      pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 +                      pgio->pg_lseg = NULL;
 +                      return;
 +              }
 +      }
        /* If no lseg, fall back to write through mds */
        if (pgio->pg_lseg == NULL)
                goto out_mds;
@@@ -889,25 -867,18 +889,25 @@@ static unsigned in
  ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
                                    struct nfs_page *req)
  {
 -      if (!pgio->pg_lseg)
 +      if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   req->wb_context,
                                                   0,
                                                   NFS4_MAX_UINT64,
                                                   IOMODE_RW,
                                                   GFP_NOFS);
 +              if (IS_ERR(pgio->pg_lseg)) {
 +                      pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 +                      pgio->pg_lseg = NULL;
 +                      goto out;
 +              }
 +      }
        if (pgio->pg_lseg)
                return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
  
        /* no lseg means that pnfs is not in use, so no mirroring here */
        nfs_pageio_reset_write_mds(pgio);
 +out:
        return 1;
  }
  
@@@ -941,18 -912,7 +941,7 @@@ static void ff_layout_reset_write(struc
                        hdr->args.count,
                        (unsigned long long)hdr->args.offset);
  
-               if (!hdr->dreq) {
-                       struct nfs_open_context *ctx;
-                       ctx = nfs_list_entry(hdr->pages.next)->wb_context;
-                       set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
-                       hdr->completion_ops->error_cleanup(&hdr->pages);
-               } else {
-                       nfs_direct_set_resched_writes(hdr->dreq);
-                       /* fake unstable write to let common nfs resend pages */
-                       hdr->verf.committed = NFS_UNSTABLE;
-                       hdr->good_bytes = hdr->args.count;
-               }
+               hdr->completion_ops->reschedule_io(hdr);
                return;
        }
  
@@@ -1130,7 -1090,7 +1119,7 @@@ static int ff_layout_async_handle_error
        return -NFS4ERR_RESET_TO_PNFS;
  out_retry:
        task->tk_status = 0;
 -      rpc_restart_call(task);
 +      rpc_restart_call_prepare(task);
        rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
        return -EAGAIN;
  }
@@@ -1188,14 -1148,6 +1177,14 @@@ static void ff_layout_io_track_ds_error
                }
        }
  
 +      switch (status) {
 +      case NFS4ERR_DELAY:
 +      case NFS4ERR_GRACE:
 +              return;
 +      default:
 +              break;
 +      }
 +
        mirror = FF_LAYOUT_COMP(lseg, idx);
        err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                       mirror, offset, length, status, opnum,
@@@ -1279,31 -1231,14 +1268,31 @@@ ff_layout_reset_to_mds(struct pnfs_layo
        return ff_layout_test_devid_unavailable(node);
  }
  
 -static int ff_layout_read_prepare_common(struct rpc_task *task,
 -                                       struct nfs_pgio_header *hdr)
 +static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
 +              struct nfs_pgio_header *hdr)
  {
 +      if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
 +              return;
        nfs4_ff_layout_stat_io_start_read(hdr->inode,
                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
                        hdr->args.count,
                        task->tk_start);
 +}
  
 +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
 +              struct nfs_pgio_header *hdr)
 +{
 +      if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
 +              return;
 +      nfs4_ff_layout_stat_io_end_read(task,
 +                      FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
 +                      hdr->args.count,
 +                      hdr->res.count);
 +}
 +
 +static int ff_layout_read_prepare_common(struct rpc_task *task,
 +                                       struct nfs_pgio_header *hdr)
 +{
        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
                rpc_exit(task, -EIO);
                return -EIO;
        }
        hdr->pgio_done_cb = ff_layout_read_done_cb;
  
 +      ff_layout_read_record_layoutstats_start(task, hdr);
        return 0;
  }
  
@@@ -1378,6 -1312,10 +1367,6 @@@ static void ff_layout_read_call_done(st
  
        dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
  
 -      nfs4_ff_layout_stat_io_end_read(task,
 -                      FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
 -                      hdr->args.count, hdr->res.count);
 -
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
            task->tk_status == 0) {
                nfs4_sequence_done(task, &hdr->res.seq_res);
@@@ -1392,20 -1330,10 +1381,20 @@@ static void ff_layout_read_count_stats(
  {
        struct nfs_pgio_header *hdr = data;
  
 +      ff_layout_read_record_layoutstats_done(task, hdr);
        rpc_count_iostats_metrics(task,
            &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
  }
  
 +static void ff_layout_read_release(void *data)
 +{
 +      struct nfs_pgio_header *hdr = data;
 +
 +      ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
 +      pnfs_generic_rw_release(data);
 +}
 +
 +
  static int ff_layout_write_done_cb(struct rpc_task *task,
                                struct nfs_pgio_header *hdr)
  {
  
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS:
 -              pnfs_set_retry_layoutget(hdr->lseg->pls_layout);
                ff_layout_reset_write(hdr, true);
                return task->tk_status;
        case -NFS4ERR_RESET_TO_MDS:
 -              pnfs_clear_retry_layoutget(hdr->lseg->pls_layout);
                ff_layout_reset_write(hdr, false);
                return task->tk_status;
        case -EAGAIN:
 -              rpc_restart_call_prepare(task);
                return -EAGAIN;
        }
  
@@@ -1460,9 -1391,11 +1449,9 @@@ static int ff_layout_commit_done_cb(str
  
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS:
 -              pnfs_set_retry_layoutget(data->lseg->pls_layout);
                pnfs_generic_prepare_to_resend_writes(data);
                return -EAGAIN;
        case -NFS4ERR_RESET_TO_MDS:
 -              pnfs_clear_retry_layoutget(data->lseg->pls_layout);
                pnfs_generic_prepare_to_resend_writes(data);
                return -EAGAIN;
        case -EAGAIN:
        return 0;
  }
  
 -static int ff_layout_write_prepare_common(struct rpc_task *task,
 -                                        struct nfs_pgio_header *hdr)
 +static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
 +              struct nfs_pgio_header *hdr)
  {
 +      if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
 +              return;
        nfs4_ff_layout_stat_io_start_write(hdr->inode,
                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
                        hdr->args.count,
                        task->tk_start);
 +}
 +
 +static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
 +              struct nfs_pgio_header *hdr)
 +{
 +      if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
 +              return;
 +      nfs4_ff_layout_stat_io_end_write(task,
 +                      FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
 +                      hdr->args.count, hdr->res.count,
 +                      hdr->res.verf->committed);
 +}
  
 +static int ff_layout_write_prepare_common(struct rpc_task *task,
 +                                        struct nfs_pgio_header *hdr)
 +{
        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
                rpc_exit(task, -EIO);
                return -EIO;
                return -EAGAIN;
        }
  
 +      ff_layout_write_record_layoutstats_start(task, hdr);
        return 0;
  }
  
@@@ -1554,6 -1469,11 +1543,6 @@@ static void ff_layout_write_call_done(s
  {
        struct nfs_pgio_header *hdr = data;
  
 -      nfs4_ff_layout_stat_io_end_write(task,
 -                      FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
 -                      hdr->args.count, hdr->res.count,
 -                      hdr->res.verf->committed);
 -
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
            task->tk_status == 0) {
                nfs4_sequence_done(task, &hdr->res.seq_res);
@@@ -1568,53 -1488,18 +1557,53 @@@ static void ff_layout_write_count_stats
  {
        struct nfs_pgio_header *hdr = data;
  
 +      ff_layout_write_record_layoutstats_done(task, hdr);
        rpc_count_iostats_metrics(task,
            &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
  }
  
 -static void ff_layout_commit_prepare_common(struct rpc_task *task,
 +static void ff_layout_write_release(void *data)
 +{
 +      struct nfs_pgio_header *hdr = data;
 +
 +      ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
 +      pnfs_generic_rw_release(data);
 +}
 +
 +static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
                struct nfs_commit_data *cdata)
  {
 +      if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
 +              return;
        nfs4_ff_layout_stat_io_start_write(cdata->inode,
                        FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
                        0, task->tk_start);
  }
  
 +static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
 +              struct nfs_commit_data *cdata)
 +{
 +      struct nfs_page *req;
 +      __u64 count = 0;
 +
 +      if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
 +              return;
 +
 +      if (task->tk_status == 0) {
 +              list_for_each_entry(req, &cdata->pages, wb_list)
 +                      count += req->wb_bytes;
 +      }
 +      nfs4_ff_layout_stat_io_end_write(task,
 +                      FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
 +                      count, count, NFS_FILE_SYNC);
 +}
 +
 +static void ff_layout_commit_prepare_common(struct rpc_task *task,
 +              struct nfs_commit_data *cdata)
 +{
 +      ff_layout_commit_record_layoutstats_start(task, cdata);
 +}
 +
  static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
  {
        ff_layout_commit_prepare_common(task, data);
@@@ -1635,6 -1520,19 +1624,6 @@@ static void ff_layout_commit_prepare_v4
  
  static void ff_layout_commit_done(struct rpc_task *task, void *data)
  {
 -      struct nfs_commit_data *cdata = data;
 -      struct nfs_page *req;
 -      __u64 count = 0;
 -
 -      if (task->tk_status == 0) {
 -              list_for_each_entry(req, &cdata->pages, wb_list)
 -                      count += req->wb_bytes;
 -      }
 -
 -      nfs4_ff_layout_stat_io_end_write(task,
 -                      FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
 -                      count, count, NFS_FILE_SYNC);
 -
        pnfs_generic_write_commit_done(task, data);
  }
  
@@@ -1642,59 -1540,50 +1631,59 @@@ static void ff_layout_commit_count_stat
  {
        struct nfs_commit_data *cdata = data;
  
 +      ff_layout_commit_record_layoutstats_done(task, cdata);
        rpc_count_iostats_metrics(task,
            &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
  }
  
 +static void ff_layout_commit_release(void *data)
 +{
 +      struct nfs_commit_data *cdata = data;
 +
 +      ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
 +      pnfs_generic_commit_release(data);
 +}
 +
  static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_read_prepare_v3,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
 -      .rpc_release = pnfs_generic_rw_release,
 +      .rpc_release = ff_layout_read_release,
  };
  
  static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_read_prepare_v4,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
 -      .rpc_release = pnfs_generic_rw_release,
 +      .rpc_release = ff_layout_read_release,
  };
  
  static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_write_prepare_v3,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
 -      .rpc_release = pnfs_generic_rw_release,
 +      .rpc_release = ff_layout_write_release,
  };
  
  static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_write_prepare_v4,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
 -      .rpc_release = pnfs_generic_rw_release,
 +      .rpc_release = ff_layout_write_release,
  };
  
  static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_commit_prepare_v3,
        .rpc_call_done = ff_layout_commit_done,
        .rpc_count_stats = ff_layout_commit_count_stats,
 -      .rpc_release = pnfs_generic_commit_release,
 +      .rpc_release = ff_layout_commit_release,
  };
  
  static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_commit_prepare_v4,
        .rpc_call_done = ff_layout_commit_done,
        .rpc_count_stats = ff_layout_commit_count_stats,
 -      .rpc_release = pnfs_generic_commit_release,
 +      .rpc_release = ff_layout_commit_release,
  };
  
  static enum pnfs_try_status
diff --combined fs/nfs/internal.h
index 68f773dc226ece4e5cf1657628c6cf2e2646016b,870e2ba7ba49e0169dc5306cdea365c760b674ba..ee81792d288620eee4c95f2764333ccca272e497
@@@ -264,6 -264,12 +264,12 @@@ static inline bool nfs_pgio_has_mirrori
        return desc->pg_mirror_count > 1;
  }
  
+ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+               const struct nfs_open_context *ctx2)
+ {
+       return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+ }
  /* nfs2xdr.c */
  extern struct rpc_procinfo nfs_procedures[];
  extern int nfs2_decode_dirent(struct xdr_stream *,
@@@ -519,7 -525,6 +525,6 @@@ static inline void nfs_inode_dio_wait(s
        inode_dio_wait(inode);
  }
  extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
- extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
  
  /* nfs4proc.c */
  extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
@@@ -711,17 -716,3 +716,17 @@@ static inline u32 nfs_stateid_hash(nfs4
        return 0;
  }
  #endif
 +
 +static inline bool nfs_error_is_fatal(int err)
 +{
 +      switch (err) {
 +      case -ERESTARTSYS:
 +      case -EIO:
 +      case -ENOSPC:
 +      case -EROFS:
 +      case -E2BIG:
 +              return true;
 +      default:
 +              return false;
 +      }
 +}
diff --combined fs/nfs/nfs4proc.c
index adae525edec4e932b1298676a672648016a19ef5,883da29b9acec6b4852b3ea3b5cc11dc5ddb0bb7..5e5062c9b92ba7474218a377b4439aac8454c405
@@@ -5383,11 -5383,6 +5383,11 @@@ static int _nfs4_proc_delegreturn(struc
        if (data == NULL)
                return -ENOMEM;
        nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 +
 +      nfs4_state_protect(server->nfs_client,
 +                      NFS_SP4_MACH_CRED_CLEANUP,
 +                      &task_setup_data.rpc_client, &msg);
 +
        data->args.fhandle = &data->fh;
        data->args.stateid = &data->stateid;
        data->args.bitmask = server->cache_consistency_bitmask;
@@@ -6864,13 -6859,10 +6864,13 @@@ static const struct nfs41_state_protect
        },
        .allow.u.words = {
                [0] = 1 << (OP_CLOSE) |
 +                    1 << (OP_OPEN_DOWNGRADE) |
                      1 << (OP_LOCKU) |
 +                    1 << (OP_DELEGRETURN) |
                      1 << (OP_COMMIT),
                [1] = 1 << (OP_SECINFO - 32) |
                      1 << (OP_SECINFO_NO_NAME - 32) |
 +                    1 << (OP_LAYOUTRETURN - 32) |
                      1 << (OP_TEST_STATEID - 32) |
                      1 << (OP_FREE_STATEID - 32) |
                      1 << (OP_WRITE - 32)
@@@ -6935,19 -6927,11 +6935,19 @@@ static int nfs4_sp4_select_mode(struct 
                }
  
                if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
 +                  test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) &&
 +                  test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
                    test_bit(OP_LOCKU, sp->allow.u.longs)) {
                        dfprintk(MOUNT, "  cleanup mode enabled\n");
                        set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
                }
  
 +              if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
 +                      dfprintk(MOUNT, "  pnfs cleanup mode enabled\n");
 +                      set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
 +                              &clp->cl_sp4_flags);
 +              }
 +
                if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
                    test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
                        dfprintk(MOUNT, "  secinfo mode enabled\n");
@@@ -7776,6 -7760,7 +7776,7 @@@ nfs4_layoutget_prepare(struct rpc_task 
        struct nfs4_layoutget *lgp = calldata;
        struct nfs_server *server = NFS_SERVER(lgp->args.inode);
        struct nfs4_session *session = nfs4_get_session(server);
+       int ret;
  
        dprintk("--> %s\n", __func__);
        /* Note the is a race here, where a CB_LAYOUTRECALL can come in
        if (nfs41_setup_sequence(session, &lgp->args.seq_args,
                                &lgp->res.seq_res, task))
                return;
-       if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+       ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
                                          NFS_I(lgp->args.inode)->layout,
                                          &lgp->args.range,
-                                         lgp->args.ctx->state)) {
-               rpc_exit(task, NFS4_OK);
-       }
+                                         lgp->args.ctx->state);
+       if (ret < 0)
+               rpc_exit(task, ret);
  }
  
  static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        switch (task->tk_status) {
        case 0:
                goto out;
 +
 +      /*
 +       * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
 +       * on the file. set tk_status to -ENODATA to tell upper layer to
 +       * retry go inband.
 +       */
 +      case -NFS4ERR_LAYOUTUNAVAILABLE:
 +              task->tk_status = -ENODATA;
 +              goto out;
        /*
         * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
         * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
@@@ -8073,9 -8049,10 +8074,10 @@@ static void nfs4_layoutreturn_release(v
  
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
+       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
+       pnfs_mark_layout_returned_if_empty(lo);
        if (lrp->res.lrs_present)
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
-       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
        pnfs_clear_layoutreturn_waitbit(lo);
        lo->plh_block_lgets--;
        spin_unlock(&lo->plh_inode->i_lock);
@@@ -8109,10 -8086,6 +8111,10 @@@ int nfs4_proc_layoutreturn(struct nfs4_
        };
        int status = 0;
  
 +      nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client,
 +                      NFS_SP4_MACH_CRED_PNFS_CLEANUP,
 +                      &task_setup_data.rpc_client, &msg);
 +
        dprintk("--> %s\n", __func__);
        if (!sync) {
                lrp->inode = nfs_igrab_and_active(lrp->args.inode);
diff --combined fs/nfs/pagelist.c
index 7c71b71016b5215bae068be9b661e61f56f40852,c3a78450a2395fa6423450cfd73651527ea45753..eeddbf0bf4c498ef4ed50fcd98320fae7725b87b
@@@ -664,11 -664,22 +664,11 @@@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio)
   * @desc: IO descriptor
   * @hdr: pageio header
   */
 -static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 -                        struct nfs_pgio_header *hdr)
 +static void nfs_pgio_error(struct nfs_pgio_header *hdr)
  {
 -      struct nfs_pgio_mirror *mirror;
 -      u32 midx;
 -
        set_bit(NFS_IOHDR_REDO, &hdr->flags);
        nfs_pgio_data_destroy(hdr);
        hdr->completion_ops->completion(hdr);
 -      /* TODO: Make sure it's right to clean up all mirrors here
 -       *       and not just hdr->pgio_mirror_idx */
 -      for (midx = 0; midx < desc->pg_mirror_count; midx++) {
 -              mirror = &desc->pg_mirrors[midx];
 -              desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
 -      }
 -      return -ENOMEM;
  }
  
  /**
@@@ -789,11 -800,8 +789,11 @@@ int nfs_generic_pgio(struct nfs_pageio_
        unsigned int pagecount, pageused;
  
        pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
 -      if (!nfs_pgarray_set(&hdr->page_array, pagecount))
 -              return nfs_pgio_error(desc, hdr);
 +      if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
 +              nfs_pgio_error(hdr);
 +              desc->pg_error = -ENOMEM;
 +              return desc->pg_error;
 +      }
  
        nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
        pages = hdr->page_array.pagevec;
                        *pages++ = last_page = req->wb_page;
                }
        }
 -      if (WARN_ON_ONCE(pageused != pagecount))
 -              return nfs_pgio_error(desc, hdr);
 +      if (WARN_ON_ONCE(pageused != pagecount)) {
 +              nfs_pgio_error(hdr);
 +              desc->pg_error = -EINVAL;
 +              return desc->pg_error;
 +      }
  
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
            (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@@ -838,8 -843,10 +838,8 @@@ static int nfs_generic_pg_pgios(struct 
  
        hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
        if (!hdr) {
 -              /* TODO: make sure this is right with mirroring - or
 -               *       should it back out all mirrors? */
 -              desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
 -              return -ENOMEM;
 +              desc->pg_error = -ENOMEM;
 +              return desc->pg_error;
        }
        nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
        ret = nfs_generic_pgio(desc, hdr);
@@@ -867,9 -874,6 +867,9 @@@ static int nfs_pageio_setup_mirroring(s
  
        mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
  
 +      if (pgio->pg_error < 0)
 +              return pgio->pg_error;
 +
        if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
                return -EINVAL;
  
@@@ -899,12 -903,6 +899,6 @@@ static void nfs_pageio_cleanup_mirrorin
        pgio->pg_mirrors_dynamic = NULL;
  }
  
- static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
-               const struct nfs_open_context *ctx2)
- {
-       return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
- }
  static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
                const struct nfs_lock_context *l2)
  {
@@@ -978,8 -976,6 +972,8 @@@ static int nfs_pageio_do_add_request(st
        } else {
                if (desc->pg_ops->pg_init)
                        desc->pg_ops->pg_init(desc, req);
 +              if (desc->pg_error < 0)
 +                      return 0;
                mirror->pg_base = req->wb_pgbase;
        }
        if (!nfs_can_coalesce_requests(prev, req, desc))
@@@ -1145,8 -1141,6 +1139,8 @@@ int nfs_pageio_add_request(struct nfs_p
        bytes = req->wb_bytes;
  
        nfs_pageio_setup_mirroring(desc, req);
 +      if (desc->pg_error < 0)
 +              goto out_failed;
  
        for (midx = 0; midx < desc->pg_mirror_count; midx++) {
                if (midx) {
  
                        if (IS_ERR(dupreq)) {
                                nfs_page_group_unlock(req);
 -                              return 0;
 +                              desc->pg_error = PTR_ERR(dupreq);
 +                              goto out_failed;
                        }
  
                        nfs_lock_request(dupreq);
                if (nfs_pgio_has_mirroring(desc))
                        desc->pg_mirror_idx = midx;
                if (!nfs_pageio_add_request_mirror(desc, dupreq))
 -                      return 0;
 +                      goto out_failed;
        }
  
        return 1;
 +
 +out_failed:
 +      /*
 +       * We might have failed before sending any reqs over wire.
 +       * Clean up rest of the reqs in mirror pg_list.
 +       */
 +      if (desc->pg_error) {
 +              struct nfs_pgio_mirror *mirror;
 +              void (*func)(struct list_head *);
 +
 +              /* remember fatal errors */
 +              if (nfs_error_is_fatal(desc->pg_error))
 +                      mapping_set_error(desc->pg_inode->i_mapping,
 +                                        desc->pg_error);
 +
 +              func = desc->pg_completion_ops->error_cleanup;
 +              for (midx = 0; midx < desc->pg_mirror_count; midx++) {
 +                      mirror = &desc->pg_mirrors[midx];
 +                      func(&mirror->pg_list);
 +              }
 +      }
 +      return 0;
  }
  
  /*
@@@ -1255,7 -1226,7 +1249,7 @@@ int nfs_pageio_resend(struct nfs_pageio
        nfs_pageio_complete(desc);
        if (!list_empty(&failed)) {
                list_move(&failed, &hdr->pages);
 -              return -EIO;
 +              return desc->pg_error < 0 ? desc->pg_error : -EIO;
        }
        return 0;
  }
diff --combined fs/nfs/pnfs.c
index 113c3b327e24ef7dc65322137d55fc9c0f790961,04db6d951b997bca6fdef210258ee47bfc2c80c2..a3592cc34a20b76341c9c9b9af3378bdddebd477
@@@ -53,7 -53,7 +53,7 @@@ static DEFINE_SPINLOCK(pnfs_spinlock)
  static LIST_HEAD(pnfs_modules_tbl);
  
  static int
- pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
+ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                       enum pnfs_iomode iomode, bool sync);
  
  /* Return the registered pnfs layout driver module matching given id */
@@@ -385,13 -385,13 +385,13 @@@ static void pnfs_layoutreturn_before_pu
                enum pnfs_iomode iomode;
                bool send;
  
-               stateid = lo->plh_stateid;
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
                iomode = lo->plh_return_iomode;
                send = pnfs_prepare_layoutreturn(lo);
                spin_unlock(&inode->i_lock);
                if (send) {
                        /* Send an async layoutreturn so we dont deadlock */
-                       pnfs_send_layoutreturn(lo, stateid, iomode, false);
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
                }
        } else
                spin_unlock(&inode->i_lock);
@@@ -566,10 -566,10 +566,10 @@@ static int mark_lseg_invalid(struct pnf
  int
  pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
                            struct list_head *tmp_list,
-                           struct pnfs_layout_range *recall_range)
+                           const struct pnfs_layout_range *recall_range)
  {
        struct pnfs_layout_segment *lseg, *next;
-       int invalid = 0, removed = 0;
+       int remaining = 0;
  
        dprintk("%s:Begin lo %p\n", __func__, lo);
  
                                "offset %llu length %llu\n", __func__,
                                lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
                                lseg->pls_range.length);
-                       invalid++;
-                       removed += mark_lseg_invalid(lseg, tmp_list);
+                       if (!mark_lseg_invalid(lseg, tmp_list))
+                               remaining++;
                }
-       dprintk("%s:Return %i\n", __func__, invalid - removed);
-       return invalid - removed;
+       dprintk("%s:Return %i\n", __func__, remaining);
+       return remaining;
  }
  
  /* note free_me must contain lsegs from a single layout_hdr */
@@@ -618,6 -618,7 +618,6 @@@ pnfs_destroy_layout(struct nfs_inode *n
                pnfs_get_layout_hdr(lo);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 -              pnfs_clear_retry_layoutget(lo);
                spin_unlock(&nfsi->vfs_inode.i_lock);
                pnfs_free_lseg_list(&tmp_list);
                pnfs_put_layout_hdr(lo);
@@@ -702,6 -703,8 +702,8 @@@ pnfs_layout_free_bulk_destroy_list(stru
                        ret = -EAGAIN;
                spin_unlock(&inode->i_lock);
                pnfs_free_lseg_list(&lseg_list);
+               /* Free all lsegs that are attached to commit buckets */
+               nfs_commit_inode(inode, 0);
                pnfs_put_layout_hdr(lo);
                iput(inode);
        }
@@@ -825,7 -828,7 +827,7 @@@ pnfs_layoutgets_blocked(const struct pn
  
  int
  pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
-                             struct pnfs_layout_range *range,
+                             const struct pnfs_layout_range *range,
                              struct nfs4_state *open_state)
  {
        int status = 0;
  static struct pnfs_layout_segment *
  send_layoutget(struct pnfs_layout_hdr *lo,
           struct nfs_open_context *ctx,
-          struct pnfs_layout_range *range,
+          const struct pnfs_layout_range *range,
           gfp_t gfp_flags)
  {
        struct inode *ino = lo->plh_inode;
                                lgp->args.minlength = i_size - range->offset;
                }
                lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
-               lgp->args.range = *range;
+               pnfs_copy_range(&lgp->args.range, range);
                lgp->args.type = server->pnfs_curr_ld->id;
                lgp->args.inode = ino;
                lgp->args.ctx = get_nfs_open_context(ctx);
                lseg = nfs4_proc_layoutget(lgp, gfp_flags);
        } while (lseg == ERR_PTR(-EAGAIN));
  
 -      if (IS_ERR(lseg)) {
 -              switch (PTR_ERR(lseg)) {
 -              case -ENOMEM:
 -              case -ERESTARTSYS:
 -                      break;
 -              default:
 -                      /* remember that LAYOUTGET failed and suspend trying */
 -                      pnfs_layout_io_set_failed(lo, range->iomode);
 -              }
 -              return NULL;
 -      } else
 +      if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg)))
 +              lseg = NULL;
 +      else
                pnfs_layout_clear_fail_bit(lo,
                                pnfs_iomode_to_fail_bit(range->iomode));
  
@@@ -936,7 -947,7 +938,7 @@@ void pnfs_clear_layoutreturn_waitbit(st
  }
  
  static int
- pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
+ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                       enum pnfs_iomode iomode, bool sync)
  {
        struct inode *ino = lo->plh_inode;
                goto out;
        }
  
-       lrp->args.stateid = stateid;
+       nfs4_stateid_copy(&lrp->args.stateid, stateid);
        lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
        lrp->args.inode = ino;
        lrp->args.range.iomode = iomode;
@@@ -996,7 -1007,7 +998,7 @@@ _pnfs_return_layout(struct inode *ino
                dprintk("NFS: %s no layout to return\n", __func__);
                goto out;
        }
-       stateid = nfsi->layout->plh_stateid;
+       nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
        /* Reference matched in nfs4_layoutreturn_release */
        pnfs_get_layout_hdr(lo);
        empty = list_empty(&lo->plh_segs);
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
        if (send)
-               status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+               status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
  out_put_layout_hdr:
        pnfs_put_layout_hdr(lo);
  out:
@@@ -1087,12 -1098,13 +1089,12 @@@ bool pnfs_roc(struct inode *ino
                        goto out_noroc;
        }
  
-       stateid = lo->plh_stateid;
+       nfs4_stateid_copy(&stateid, &lo->plh_stateid);
        /* always send layoutreturn if being marked so */
        if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
                                   &lo->plh_flags))
                layoutreturn = pnfs_prepare_layoutreturn(lo);
  
 -      pnfs_clear_retry_layoutget(lo);
        list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
                /* If we are sending layoutreturn, invalidate all valid lsegs */
                if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
@@@ -1114,7 -1126,7 +1116,7 @@@ out_noroc
        pnfs_free_lseg_list(&tmp_list);
        pnfs_layoutcommit_inode(ino, true);
        if (layoutreturn)
-               pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+               pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
        return roc;
  }
  
@@@ -1139,6 -1151,7 +1141,7 @@@ void pnfs_roc_set_barrier(struct inode 
  
        spin_lock(&ino->i_lock);
        lo = NFS_I(ino)->layout;
+       pnfs_mark_layout_returned_if_empty(lo);
        if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
                lo->plh_barrier = barrier;
        spin_unlock(&ino->i_lock);
@@@ -1455,15 -1468,25 +1458,15 @@@ static bool pnfs_within_mdsthreshold(st
        return ret;
  }
  
 -/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
 -static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
 -{
 -      if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
 -              return 1;
 -      return nfs_wait_bit_killable(key, mode);
 -}
 -
  static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
  {
 -      if (!pnfs_should_retry_layoutget(lo))
 -              return false;
        /*
         * send layoutcommit as it can hold up layoutreturn due to lseg
         * reference
         */
        pnfs_layoutcommit_inode(lo->plh_inode, false);
        return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
 -                                 pnfs_layoutget_retry_bit_wait,
 +                                 nfs_wait_bit_killable,
                                   TASK_UNINTERRUPTIBLE);
  }
  
@@@ -1538,7 -1561,8 +1541,7 @@@ lookup_again
        }
  
        /* if LAYOUTGET already failed once we don't try again */
 -      if (pnfs_layout_io_test_failed(lo, iomode) &&
 -          !pnfs_should_retry_layoutget(lo)) {
 +      if (pnfs_layout_io_test_failed(lo, iomode)) {
                trace_pnfs_update_layout(ino, pos, count, iomode, lo,
                                 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
                goto out_unlock;
                arg.length = PAGE_CACHE_ALIGN(arg.length);
  
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 -      pnfs_clear_retry_layoutget(lo);
        atomic_dec(&lo->plh_outstanding);
        trace_pnfs_update_layout(ino, pos, count, iomode, lo,
                                 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
@@@ -1627,7 -1652,7 +1630,7 @@@ out
                        "(%s, offset: %llu, length: %llu)\n",
                        __func__, ino->i_sb->s_id,
                        (unsigned long long)NFS_FILEID(ino),
 -                      lseg == NULL ? "not found" : "found",
 +                      IS_ERR_OR_NULL(lseg) ? "not found" : "found",
                        iomode==IOMODE_RW ?  "read/write" : "read-only",
                        (unsigned long long)pos,
                        (unsigned long long)count);
@@@ -1734,16 -1759,29 +1737,29 @@@ out_forget_reply
  }
  
  static void
+ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
+ {
+       if (lo->plh_return_iomode == iomode)
+               return;
+       if (lo->plh_return_iomode != 0)
+               iomode = IOMODE_ANY;
+       lo->plh_return_iomode = iomode;
+ }
+ int
  pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                struct list_head *tmp_list,
-                               struct pnfs_layout_range *return_range)
+                               const struct pnfs_layout_range *return_range)
  {
        struct pnfs_layout_segment *lseg, *next;
+       int remaining = 0;
  
        dprintk("%s:Begin lo %p\n", __func__, lo);
  
        if (list_empty(&lo->plh_segs))
-               return;
+               return 0;
+       assert_spin_locked(&lo->plh_inode->i_lock);
  
        list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
                if (should_free_lseg(&lseg->pls_range, return_range)) {
                                lseg->pls_range.offset,
                                lseg->pls_range.length);
                        set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
-                       mark_lseg_invalid(lseg, tmp_list);
+                       pnfs_set_plh_return_iomode(lo, return_range->iomode);
+                       if (!mark_lseg_invalid(lseg, tmp_list))
+                               remaining++;
                        set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
                                        &lo->plh_flags);
                }
+       return remaining;
  }
  
  void pnfs_error_mark_layout_for_return(struct inode *inode,
                                       struct pnfs_layout_segment *lseg)
  {
        struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
 -      int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode);
        struct pnfs_layout_range range = {
                .iomode = lseg->pls_range.iomode,
                .offset = 0,
                .length = NFS4_MAX_UINT64,
        };
        LIST_HEAD(free_me);
+       bool return_now = false;
  
        spin_lock(&inode->i_lock);
-       if (lo->plh_return_iomode == 0)
-               lo->plh_return_iomode = range.iomode;
-       else if (lo->plh_return_iomode != range.iomode)
-               lo->plh_return_iomode = IOMODE_ANY;
 -      /* set failure bit so that pnfs path will be retried later */
 -      pnfs_layout_set_fail_bit(lo, iomode);
+       pnfs_set_plh_return_iomode(lo, range.iomode);
        /*
         * mark all matching lsegs so that we are sure to have no live
         * segments at hand when sending layoutreturn. See pnfs_put_lseg()
         * for how it works.
         */
-       pnfs_mark_matching_lsegs_return(lo, &free_me, &range);
-       spin_unlock(&inode->i_lock);
+       if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) {
+               nfs4_stateid stateid;
+               enum pnfs_iomode iomode = lo->plh_return_iomode;
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+               return_now = pnfs_prepare_layoutreturn(lo);
+               spin_unlock(&inode->i_lock);
+               if (return_now)
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+       } else {
+               spin_unlock(&inode->i_lock);
+               nfs_commit_inode(inode, 0);
+       }
        pnfs_free_lseg_list(&free_me);
  }
  EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
@@@ -1803,11 -1856,6 +1831,11 @@@ pnfs_generic_pg_init_read(struct nfs_pa
                                                   rd_size,
                                                   IOMODE_READ,
                                                   GFP_KERNEL);
 +              if (IS_ERR(pgio->pg_lseg)) {
 +                      pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 +                      pgio->pg_lseg = NULL;
 +                      return;
 +              }
        }
        /* If no lseg, fall back to read through mds */
        if (pgio->pg_lseg == NULL)
  pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
                           struct nfs_page *req, u64 wb_size)
  {
 -      if (pgio->pg_lseg == NULL)
 +      if (pgio->pg_lseg == NULL) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   req->wb_context,
                                                   req_offset(req),
                                                   wb_size,
                                                   IOMODE_RW,
                                                   GFP_NOFS);
 +              if (IS_ERR(pgio->pg_lseg)) {
 +                      pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 +                      pgio->pg_lseg = NULL;
 +                      return;
 +              }
 +      }
        /* If no lseg, fall back to write through mds */
        if (pgio->pg_lseg == NULL)
                nfs_pageio_reset_write_mds(pgio);
@@@ -2000,13 -2042,15 +2028,13 @@@ static void pnfs_writehdr_free(struct n
  int
  pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
  {
 -      struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
 -
        struct nfs_pgio_header *hdr;
        int ret;
  
        hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
        if (!hdr) {
 -              desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
 -              return -ENOMEM;
 +              desc->pg_error = -ENOMEM;
 +              return desc->pg_error;
        }
        nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
  
@@@ -2129,13 -2173,15 +2157,13 @@@ static void pnfs_readhdr_free(struct nf
  int
  pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
  {
 -      struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
 -
        struct nfs_pgio_header *hdr;
        int ret;
  
        hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
        if (!hdr) {
 -              desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
 -              return -ENOMEM;
 +              desc->pg_error = -ENOMEM;
 +              return desc->pg_error;
        }
        nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
        hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
diff --combined fs/nfs/pnfs.h
index 6916ff4e86f9ba425dd8616c248973cb1c998dd9,78df618a1596a9501a4d314cd8cd26232c018909..9f4e2a47f4aa4ff41a590178dde2ddf160ae244f
@@@ -98,6 -98,7 +98,6 @@@ enum 
        NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
 -      NFS_LAYOUT_RETRY_LAYOUTGET,     /* Retry layoutget */
  };
  
  enum layoutdriver_policy_flags {
@@@ -260,11 -261,14 +260,14 @@@ void pnfs_set_layout_stateid(struct pnf
                             bool update_barrier);
  int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
                                  struct pnfs_layout_hdr *lo,
-                                 struct pnfs_layout_range *range,
+                                 const struct pnfs_layout_range *range,
                                  struct nfs4_state *open_state);
  int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
                                struct list_head *tmp_list,
-                               struct pnfs_layout_range *recall_range);
+                               const struct pnfs_layout_range *recall_range);
+ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
+                               struct list_head *tmp_list,
+                               const struct pnfs_layout_range *recall_range);
  bool pnfs_roc(struct inode *ino);
  void pnfs_roc_release(struct inode *ino);
  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
@@@ -378,6 -382,26 +381,6 @@@ nfs4_get_deviceid(struct nfs4_deviceid_
        return d;
  }
  
 -static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
 -{
 -      if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
 -              atomic_inc(&lo->plh_refcount);
 -}
 -
 -static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
 -{
 -      if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
 -              atomic_dec(&lo->plh_refcount);
 -              /* wake up waiters for LAYOUTRETURN as that is not needed */
 -              wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
 -      }
 -}
 -
 -static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
 -{
 -      return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
 -}
 -
  static inline struct pnfs_layout_segment *
  pnfs_get_lseg(struct pnfs_layout_segment *lseg)
  {
        return lseg;
  }
  
+ static inline bool
+ pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
+ {
+       return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
+ }
  /* Return true if a layout driver is being used for this mountpoint */
  static inline int pnfs_enabled_sb(struct nfs_server *nfss)
  {
@@@ -535,6 -565,26 +544,26 @@@ pnfs_calc_offset_length(u64 offset, u6
        return 1 + end - offset;
  }
  
+ /**
+  * pnfs_mark_layout_returned_if_empty - marks the layout as returned
+  * @lo: layout header
+  *
+  * Note: Caller must hold inode->i_lock
+  */
+ static inline void
+ pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
+ {
+       if (list_empty(&lo->plh_segs))
+               set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ }
+ static inline void
+ pnfs_copy_range(struct pnfs_layout_range *dst,
+               const struct pnfs_layout_range *src)
+ {
+       memcpy(dst, src, sizeof(*dst));
+ }
  extern unsigned int layoutstats_timer;
  
  #ifdef NFS_DEBUG
diff --combined fs/nfs/write.c
index 4d254232d7283d8dfde0d793f2c33103c0c4c182,2c26e04d93968cf38e815af292a8b2ef5254f5ee..94828b3f8c9558ea5686fe604473af7e39e11e27
@@@ -21,6 -21,8 +21,8 @@@
  #include <linux/nfs_page.h>
  #include <linux/backing-dev.h>
  #include <linux/export.h>
+ #include <linux/freezer.h>
+ #include <linux/wait.h>
  
  #include <asm/uaccess.h>
  
@@@ -545,22 -547,12 +547,22 @@@ try_again
        return head;
  }
  
 +static void nfs_write_error_remove_page(struct nfs_page *req)
 +{
 +      nfs_unlock_request(req);
 +      nfs_end_page_writeback(req);
 +      nfs_release_request(req);
 +      generic_error_remove_page(page_file_mapping(req->wb_page),
 +                                req->wb_page);
 +}
 +
  /*
   * Find an associated nfs write request, and prepare to flush it out
   * May return an error if the user signalled nfs_wait_on_request().
   */
  static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 -                              struct page *page, bool nonblock)
 +                              struct page *page, bool nonblock,
 +                              bool launder)
  {
        struct nfs_page *req;
        int ret = 0;
  
        ret = 0;
        if (!nfs_pageio_add_request(pgio, req)) {
 -              nfs_redirty_request(req);
                ret = pgio->pg_error;
 +              /*
 +               * Remove the problematic req upon fatal errors
 +               * in launder case, while other dirty pages can
 +               * still be around until they get flushed.
 +               */
 +              if (nfs_error_is_fatal(ret)) {
 +                      nfs_context_set_write_error(req->wb_context, ret);
 +                      if (launder) {
 +                              nfs_write_error_remove_page(req);
 +                              goto out;
 +                      }
 +              }
 +              nfs_redirty_request(req);
 +              ret = -EAGAIN;
        } else
                nfs_add_stats(page_file_mapping(page)->host,
                                NFSIOS_WRITEPAGES, 1);
@@@ -599,14 -578,12 +601,14 @@@ out
        return ret;
  }
  
 -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
 +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
 +                          struct nfs_pageio_descriptor *pgio, bool launder)
  {
        int ret;
  
        nfs_pageio_cond_complete(pgio, page_file_index(page));
 -      ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 +      ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
 +                                 launder);
        if (ret == -EAGAIN) {
                redirty_page_for_writepage(wbc, page);
                ret = 0;
  /*
   * Write an mmapped page to the server.
   */
 -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 +static int nfs_writepage_locked(struct page *page,
 +                              struct writeback_control *wbc,
 +                              bool launder)
  {
        struct nfs_pageio_descriptor pgio;
        struct inode *inode = page_file_mapping(page)->host;
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
                                false, &nfs_async_write_completion_ops);
 -      err = nfs_do_writepage(page, wbc, &pgio);
 +      err = nfs_do_writepage(page, wbc, &pgio, launder);
        nfs_pageio_complete(&pgio);
        if (err < 0)
                return err;
@@@ -641,7 -616,7 +643,7 @@@ int nfs_writepage(struct page *page, st
  {
        int ret;
  
 -      ret = nfs_writepage_locked(page, wbc);
 +      ret = nfs_writepage_locked(page, wbc, false);
        unlock_page(page);
        return ret;
  }
@@@ -650,7 -625,7 +652,7 @@@ static int nfs_writepages_callback(stru
  {
        int ret;
  
 -      ret = nfs_do_writepage(page, wbc, data);
 +      ret = nfs_do_writepage(page, wbc, data, false);
        unlock_page(page);
        return ret;
  }
@@@ -1155,7 -1130,8 +1157,8 @@@ int nfs_flush_incompatible(struct file 
                if (req == NULL)
                        return 0;
                l_ctx = req->wb_lock_context;
-               do_flush = req->wb_page != page || req->wb_context != ctx;
+               do_flush = req->wb_page != page ||
+                       !nfs_match_open_context(req->wb_context, ctx);
                /* for now, flush if more than 1 request in page_group */
                do_flush |= req->wb_this_page != req;
                if (l_ctx && flctx &&
@@@ -1353,9 -1329,15 +1356,15 @@@ static void nfs_async_write_error(struc
        }
  }
  
+ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
+ {
+       nfs_async_write_error(&hdr->pages);
+ }
  static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
        .error_cleanup = nfs_async_write_error,
        .completion = nfs_write_completion,
+       .reschedule_io = nfs_async_write_reschedule_io,
  };
  
  void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@@ -1556,27 -1538,29 +1565,29 @@@ static void nfs_writeback_result(struc
        }
  }
  
+ static int nfs_wait_atomic_killable(atomic_t *key)
+ {
+       if (fatal_signal_pending(current))
+               return -ERESTARTSYS;
+       freezable_schedule_unsafe();
+       return 0;
+ }
  
- static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
  {
-       int ret;
+       return wait_on_atomic_t(&cinfo->rpcs_out,
+                       nfs_wait_atomic_killable, TASK_KILLABLE);
+ }
  
-       if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
-               return 1;
-       if (!may_wait)
-               return 0;
-       ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
-                               NFS_INO_COMMIT,
-                               nfs_wait_bit_killable,
-                               TASK_KILLABLE);
-       return (ret < 0) ? ret : 1;
+ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+ {
+       atomic_inc(&cinfo->rpcs_out);
  }
  
- static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+ static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
  {
-       clear_bit(NFS_INO_COMMIT, &nfsi->flags);
-       smp_mb__after_atomic();
-       wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+       if (atomic_dec_and_test(&cinfo->rpcs_out))
+               wake_up_atomic_t(&cinfo->rpcs_out);
  }
  
  void nfs_commitdata_release(struct nfs_commit_data *data)
@@@ -1693,6 -1677,13 +1704,13 @@@ void nfs_retry_commit(struct list_head 
  }
  EXPORT_SYMBOL_GPL(nfs_retry_commit);
  
+ static void
+ nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+               struct nfs_page *req)
+ {
+       __set_page_dirty_nobuffers(req->wb_page);
+ }
  /*
   * Commit dirty pages
   */
@@@ -1714,7 -1705,6 +1732,6 @@@ nfs_commit_list(struct inode *inode, st
                                   data->mds_ops, how, 0);
   out_bad:
        nfs_retry_commit(head, NULL, cinfo, 0);
-       cinfo->completion_ops->error_cleanup(NFS_I(inode));
        return -ENOMEM;
  }
  
@@@ -1776,8 -1766,7 +1793,7 @@@ static void nfs_commit_release_pages(st
                clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
  
        nfs_init_cinfo(&cinfo, data->inode, data->dreq);
-       if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
-               nfs_commit_clear_lock(NFS_I(data->inode));
+       nfs_commit_end(cinfo.mds);
  }
  
  static void nfs_commit_release(void *calldata)
@@@ -1796,7 -1785,7 +1812,7 @@@ static const struct rpc_call_ops nfs_co
  
  static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
        .completion = nfs_commit_release_pages,
-       .error_cleanup = nfs_commit_clear_lock,
+       .resched_write = nfs_commit_resched_write,
  };
  
  int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@@ -1815,30 -1804,25 +1831,25 @@@ int nfs_commit_inode(struct inode *inod
        LIST_HEAD(head);
        struct nfs_commit_info cinfo;
        int may_wait = how & FLUSH_SYNC;
+       int error = 0;
        int res;
  
-       res = nfs_commit_set_lock(NFS_I(inode), may_wait);
-       if (res <= 0)
-               goto out_mark_dirty;
        nfs_init_cinfo_from_inode(&cinfo, inode);
+       nfs_commit_begin(cinfo.mds);
        res = nfs_scan_commit(inode, &head, &cinfo);
-       if (res) {
-               int error;
+       if (res)
                error = nfs_generic_commit_list(inode, &head, how, &cinfo);
-               if (error < 0)
-                       return error;
-               if (!may_wait)
-                       goto out_mark_dirty;
-               error = wait_on_bit_action(&NFS_I(inode)->flags,
-                               NFS_INO_COMMIT,
-                               nfs_wait_bit_killable,
-                               TASK_KILLABLE);
-               if (error < 0)
-                       return error;
-       } else
-               nfs_commit_clear_lock(NFS_I(inode));
+       nfs_commit_end(cinfo.mds);
+       if (error < 0)
+               goto out_error;
+       if (!may_wait)
+               goto out_mark_dirty;
+       error = wait_on_commit(cinfo.mds);
+       if (error < 0)
+               return error;
        return res;
+ out_error:
+       res = error;
        /* Note: If we exit without ensuring that the commit is complete,
         * we must mark the inode as dirty. Otherwise, future calls to
         * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
@@@ -1848,6 -1832,7 +1859,7 @@@ out_mark_dirty
        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        return res;
  }
+ EXPORT_SYMBOL_GPL(nfs_commit_inode);
  
  int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
  {
@@@ -1938,7 -1923,7 +1950,7 @@@ int nfs_wb_page_cancel(struct inode *in
  /*
   * Write back all requests on one page - we do this before reading it.
   */
 -int nfs_wb_page(struct inode *inode, struct page *page)
 +int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
  {
        loff_t range_start = page_file_offset(page);
        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
        for (;;) {
                wait_on_page_writeback(page);
                if (clear_page_dirty_for_io(page)) {
 -                      ret = nfs_writepage_locked(page, &wbc);
 +                      ret = nfs_writepage_locked(page, &wbc, launder);
                        if (ret < 0)
                                goto out_error;
                        continue;
diff --combined include/linux/nfs_fs.h
index b88fc46cfbb8186698b1baca6fb5090634515c73,ebf0bd72a42bde532394209a4ecfa45d69a8dadc..9eee972863a7462af07b1ea3894b40062cb78cda
@@@ -216,7 -216,6 +216,6 @@@ struct nfs_inode 
  #define NFS_INO_FLUSHING      (4)             /* inode is flushing out data */
  #define NFS_INO_FSCACHE               (5)             /* inode can be cached by FS-Cache */
  #define NFS_INO_FSCACHE_LOCK  (6)             /* FS-Cache cookie management lock */
- #define NFS_INO_COMMIT                (7)             /* inode is committing unstable writes */
  #define NFS_INO_LAYOUTCOMMIT  (9)             /* layoutcommit required */
  #define NFS_INO_LAYOUTCOMMITTING (10)         /* layoutcommit inflight */
  #define NFS_INO_LAYOUTSTATS   (11)            /* layoutstats inflight */
@@@ -517,24 -516,12 +516,24 @@@ extern int  nfs_updatepage(struct file 
   */
  extern int nfs_sync_inode(struct inode *inode);
  extern int nfs_wb_all(struct inode *inode);
 -extern int nfs_wb_page(struct inode *inode, struct page* page);
 +extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
  extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
  extern int  nfs_commit_inode(struct inode *, int);
  extern struct nfs_commit_data *nfs_commitdata_alloc(void);
  extern void nfs_commit_free(struct nfs_commit_data *data);
  
 +static inline int
 +nfs_wb_launder_page(struct inode *inode, struct page *page)
 +{
 +      return nfs_wb_single_page(inode, page, true);
 +}
 +
 +static inline int
 +nfs_wb_page(struct inode *inode, struct page *page)
 +{
 +      return nfs_wb_single_page(inode, page, false);
 +}
 +
  static inline int
  nfs_have_writebacks(struct inode *inode)
  {
diff --combined include/linux/nfs_xdr.h
index 7b30ac0c7def8708a4c593a533c3d00d71c7c46b,bee3e60a7006d35869f853ed5d3cb8f357ff890d..791098a08a8765bea64b4137baaf12ef4af62ac8
@@@ -1375,7 -1375,6 +1375,7 @@@ enum 
        NFS_IOHDR_ERROR = 0,
        NFS_IOHDR_EOF,
        NFS_IOHDR_REDO,
 +      NFS_IOHDR_STAT,
  };
  
  struct nfs_pgio_header {
@@@ -1421,11 -1420,12 +1421,12 @@@ struct nfs_mds_commit_info 
        struct list_head        list;
  };
  
+ struct nfs_commit_info;
  struct nfs_commit_data;
  struct nfs_inode;
  struct nfs_commit_completion_ops {
-       void (*error_cleanup) (struct nfs_inode *nfsi);
        void (*completion) (struct nfs_commit_data *data);
+       void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
  };
  
  struct nfs_commit_info {
@@@ -1455,13 -1455,13 +1456,14 @@@ struct nfs_commit_data 
        const struct rpc_call_ops *mds_ops;
        const struct nfs_commit_completion_ops *completion_ops;
        int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
 +      unsigned long           flags;
  };
  
  struct nfs_pgio_completion_ops {
        void    (*error_cleanup)(struct list_head *head);
        void    (*init_hdr)(struct nfs_pgio_header *hdr);
        void    (*completion)(struct nfs_pgio_header *hdr);
+       void    (*reschedule_io)(struct nfs_pgio_header *hdr);
  };
  
  struct nfs_unlinkdata {