]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
pnfs: add CB_LAYOUTRECALL handling
authorFred Isaman <iisaman@netapp.com>
Thu, 6 Jan 2011 11:36:30 +0000 (11:36 +0000)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Thu, 6 Jan 2011 19:46:32 +0000 (14:46 -0500)
This is the heart of the wave 2 submission.  Add the code to trigger
drain and forget of any afected layouts.  In addition, we set a
"barrier", below which any LAYOUTGET reply is ignored.  This is to
compensate for the fact that we do not wait for outstanding LAYOUTGETs
to complete as per section 12.5.5.2.1 of RFC 5661.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/callback_proc.c
fs/nfs/nfs4_fs.h
fs/nfs/pnfs.c
fs/nfs/pnfs.h

index c1bb157e94bd8c6c7dad2bba650f9f17d618211c..6619c05b55a0a74560b1d41e13574577efdf2382 100644 (file)
@@ -12,6 +12,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "pnfs.h"
 
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -107,10 +108,126 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 
 #if defined(CONFIG_NFS_V4_1)
 
+static u32 initiate_file_draining(struct nfs_client *clp,
+                                 struct cb_layoutrecallargs *args)
+{
+       struct pnfs_layout_hdr *lo;
+       struct inode *ino;
+       bool found = false;
+       u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+       LIST_HEAD(free_me_list);
+
+       spin_lock(&clp->cl_lock);
+       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+               if (nfs_compare_fh(&args->cbl_fh,
+                                  &NFS_I(lo->plh_inode)->fh))
+                       continue;
+               ino = igrab(lo->plh_inode);
+               if (!ino)
+                       continue;
+               found = true;
+               /* Without this, layout can be freed as soon
+                * as we release cl_lock.
+                */
+               get_layout_hdr(lo);
+               break;
+       }
+       spin_unlock(&clp->cl_lock);
+       if (!found)
+               return NFS4ERR_NOMATCHING_LAYOUT;
+
+       spin_lock(&ino->i_lock);
+       if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+           mark_matching_lsegs_invalid(lo, &free_me_list,
+                                       args->cbl_range.iomode))
+               rv = NFS4ERR_DELAY;
+       else
+               rv = NFS4ERR_NOMATCHING_LAYOUT;
+       pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+       spin_unlock(&ino->i_lock);
+       pnfs_free_lseg_list(&free_me_list);
+       put_layout_hdr(lo);
+       iput(ino);
+       return rv;
+}
+
+static u32 initiate_bulk_draining(struct nfs_client *clp,
+                                 struct cb_layoutrecallargs *args)
+{
+       struct pnfs_layout_hdr *lo;
+       struct inode *ino;
+       u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+       struct pnfs_layout_hdr *tmp;
+       LIST_HEAD(recall_list);
+       LIST_HEAD(free_me_list);
+       struct pnfs_layout_range range = {
+               .iomode = IOMODE_ANY,
+               .offset = 0,
+               .length = NFS4_MAX_UINT64,
+       };
+
+       spin_lock(&clp->cl_lock);
+       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+               if ((args->cbl_recall_type == RETURN_FSID) &&
+                   memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
+                          &args->cbl_fsid, sizeof(struct nfs_fsid)))
+                       continue;
+               if (!igrab(lo->plh_inode))
+                       continue;
+               get_layout_hdr(lo);
+               BUG_ON(!list_empty(&lo->plh_bulk_recall));
+               list_add(&lo->plh_bulk_recall, &recall_list);
+       }
+       spin_unlock(&clp->cl_lock);
+       list_for_each_entry_safe(lo, tmp,
+                                &recall_list, plh_bulk_recall) {
+               ino = lo->plh_inode;
+               spin_lock(&ino->i_lock);
+               set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+               if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
+                       rv = NFS4ERR_DELAY;
+               list_del_init(&lo->plh_bulk_recall);
+               spin_unlock(&ino->i_lock);
+               put_layout_hdr(lo);
+               iput(ino);
+       }
+       pnfs_free_lseg_list(&free_me_list);
+       return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+                                   struct cb_layoutrecallargs *args)
+{
+       u32 res = NFS4ERR_DELAY;
+
+       dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+       if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
+               goto out;
+       if (args->cbl_recall_type == RETURN_FILE)
+               res = initiate_file_draining(clp, args);
+       else
+               res = initiate_bulk_draining(clp, args);
+       clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
+out:
+       dprintk("%s returning %i\n", __func__, res);
+       return res;
+
+}
+
 __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
                                  void *dummy, struct cb_process_state *cps)
 {
-       return cpu_to_be32(NFS4ERR_NOTSUPP); /* STUB */
+       u32 res;
+
+       dprintk("%s: -->\n", __func__);
+
+       if (cps->clp)
+               res = do_callback_layoutrecall(cps->clp, args);
+       else
+               res = NFS4ERR_OP_NOT_IN_SESSION;
+
+       dprintk("%s: exit with status = %d\n", __func__, res);
+       return cpu_to_be32(res);
 }
 
 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
index 3b3829c3098fa828eceb9c593ef01b1a1630f904..8f169dc789dbb589f030a456e30092a3c39f61f7 100644 (file)
@@ -44,6 +44,7 @@ enum nfs4_client_state {
        NFS4CLNT_RECLAIM_REBOOT,
        NFS4CLNT_RECLAIM_NOGRACE,
        NFS4CLNT_DELEGRETURN,
+       NFS4CLNT_LAYOUTRECALL,
        NFS4CLNT_SESSION_RESET,
        NFS4CLNT_RECALL_SLOT,
 };
index 32b66468e5dbcdb3297298646ec4289ebeaf41b3..bf4186b8f2fcea6a0ca0deb884f63ad16ecff6f4 100644 (file)
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  */
 
 /* Need to hold i_lock if caller does not already hold reference */
-static void
+void
 get_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        atomic_inc(&lo->plh_refcount);
@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
                        /* List does not take a reference, so no need for put here */
                        list_del_init(&lseg->pls_layout->plh_layouts);
                        spin_unlock(&clp->cl_lock);
+                       clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
                }
                list_add(&lseg->pls_list, tmp_list);
                return 1;
@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 /* Returns count of number of matching invalid lsegs remaining in list
  * after call.
  */
-static int
+int
 mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
                            struct list_head *tmp_list,
                            u32 iomode)
@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
        return invalid - removed;
 }
 
-static void
+void
 pnfs_free_lseg_list(struct list_head *free_me)
 {
        struct pnfs_layout_segment *lseg, *tmp;
@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 }
 
 /* update lo->plh_stateid with new if is more recent */
-static void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
-                       const nfs4_stateid *new)
+void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+                       bool update_barrier)
 {
        u32 oldseq, newseq;
 
        oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
        newseq = be32_to_cpu(new->stateid.seqid);
-       if ((int)(newseq - oldseq) > 0)
+       if ((int)(newseq - oldseq) > 0) {
                memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+               if (update_barrier) {
+                       u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+
+                       if ((int)(new_barrier - lo->plh_barrier))
+                               lo->plh_barrier = new_barrier;
+               } else {
+                       /* Because of wraparound, we want to keep the barrier
+                        * "close" to the current seqids.  It needs to be
+                        * within 2**31 to count as "behind", so if it
+                        * gets too near that limit, give us a litle leeway
+                        * and bring it to within 2**30.
+                        * NOTE - and yes, this is all unsigned arithmetic.
+                        */
+                       if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+                               lo->plh_barrier = newseq - (1 << 30);
+               }
+       }
 }
 
 /* lget is set to 1 if called from inside send_layoutget call chain */
 static bool
-pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget)
-{
-       return (list_empty(&lo->plh_segs) &&
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
+                       int lget)
+{
+       if ((stateid) &&
+           (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+               return true;
+       return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+               (list_empty(&lo->plh_segs) &&
                 (atomic_read(&lo->plh_outstanding) > lget));
 }
 
@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
-       if (pnfs_layoutgets_blocked(lo, 1)) {
+       if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
                status = -EAGAIN;
        } else if (list_empty(&lo->plh_segs)) {
                int seq;
@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino)
        atomic_set(&lo->plh_refcount, 1);
        INIT_LIST_HEAD(&lo->plh_layouts);
        INIT_LIST_HEAD(&lo->plh_segs);
+       INIT_LIST_HEAD(&lo->plh_bulk_recall);
        lo->plh_inode = ino;
        return lo;
 }
@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
  * lookup range in layout
  */
 static struct pnfs_layout_segment *
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 {
        struct pnfs_layout_segment *lseg, *ret = NULL;
 
@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino,
                goto out_unlock;
        }
 
-       /* Check to see if the layout for the given range already exists */
-       lseg = pnfs_has_layout(lo, iomode);
-       if (lseg) {
-               dprintk("%s: Using cached lseg %p for iomode %d)\n",
-                       __func__, lseg, iomode);
+       /* Do we even need to bother with this? */
+       if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+               dprintk("%s matches recall, use MDS\n", __func__);
                goto out_unlock;
        }
+       /* Check to see if the layout for the given range already exists */
+       lseg = pnfs_find_lseg(lo, iomode);
+       if (lseg)
+               goto out_unlock;
 
        /* if LAYOUTGET already failed once we don't try again */
        if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
                goto out_unlock;
 
-       if (pnfs_layoutgets_blocked(lo, 0))
+       if (pnfs_layoutgets_blocked(lo, NULL, 0))
                goto out_unlock;
        atomic_inc(&lo->plh_outstanding);
 
@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino,
                        spin_lock(&clp->cl_lock);
                        list_del_init(&lo->plh_layouts);
                        spin_unlock(&clp->cl_lock);
+                       clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
                }
                spin_unlock(&ino->i_lock);
        }
@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        struct nfs4_layoutget_res *res = &lgp->res;
        struct pnfs_layout_segment *lseg;
        struct inode *ino = lo->plh_inode;
+       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
        int status = 0;
 
        /* Verify we got what we asked for.
@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        }
 
        spin_lock(&ino->i_lock);
+       if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+               dprintk("%s forget reply due to recall\n", __func__);
+               goto out_forget_reply;
+       }
+
+       if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
+               dprintk("%s forget reply due to state\n", __func__);
+               goto out_forget_reply;
+       }
        init_lseg(lo, lseg);
        lseg->pls_range = res->range;
        *lgp->lsegpp = lseg;
        pnfs_insert_layout(lo, lseg);
 
        /* Done processing layoutget. Set the layout stateid */
-       pnfs_set_layout_stateid(lo, &res->stateid);
+       pnfs_set_layout_stateid(lo, &res->stateid, false);
        spin_unlock(&ino->i_lock);
 out:
        return status;
+
+out_forget_reply:
+       spin_unlock(&ino->i_lock);
+       lseg->pls_layout = lo;
+       NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+       goto out;
 }
 
 /*
index 8aaab56b794f604124ee2a6415c835ad230b4068..f91d0d45551cc67cd11a3509bf6b5e9192069ba4 100644 (file)
@@ -49,6 +49,7 @@ struct pnfs_layout_segment {
 enum {
        NFS_LAYOUT_RO_FAILED = 0,       /* get ro layout failed stop trying */
        NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
+       NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
        NFS_LAYOUT_DESTROYED,           /* no new use of layout allowed */
 };
 
@@ -67,9 +68,11 @@ struct pnfs_layoutdriver_type {
 struct pnfs_layout_hdr {
        atomic_t                plh_refcount;
        struct list_head        plh_layouts;   /* other client layouts */
+       struct list_head        plh_bulk_recall; /* clnt list of bulk recalls */
        struct list_head        plh_segs;      /* layout segments list */
        nfs4_stateid            plh_stateid;
        atomic_t                plh_outstanding; /* number of RPCs out */
+       u32                     plh_barrier; /* ignore lower seqids */
        unsigned long           plh_flags;
        struct inode            *plh_inode;
 };
@@ -139,18 +142,26 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
 
 /* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
                   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
 void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+                            const nfs4_stateid *new,
+                            bool update_barrier);
 int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
                                  struct pnfs_layout_hdr *lo,
                                  struct nfs4_state *open_state);
+int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+                               struct list_head *tmp_list,
+                               u32 iomode);
 
 
 static inline int lo_fail_bit(u32 iomode)