]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - fs/nfs/nfs4filelayoutdev.c
NFSv4.1 move deviceid cache to filelayout driver
[mv-sheeva.git] / fs / nfs / nfs4filelayoutdev.c
index 51fe64ace55a34523f9f3d6fd461962e364c1b4e..f594ca35a99657c2c29d4ed03dd975349a27a367 100644 (file)
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS_LD
 
+/*
+ * Device ID RCU cache. A device ID is unique per client ID and layout type.
+ */
+#define NFS4_FL_DEVICE_ID_HASH_BITS    5
+#define NFS4_FL_DEVICE_ID_HASH_SIZE    (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
+#define NFS4_FL_DEVICE_ID_HASH_MASK    (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
+
+static inline u32
+nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
+{
+       unsigned char *cptr = (unsigned char *)id->data;
+       unsigned int nbytes = NFS4_DEVICEID4_SIZE;
+       u32 x = 0;
+
+       while (nbytes--) {
+               x *= 37;
+               x += *cptr++;
+       }
+       return x & NFS4_FL_DEVICE_ID_HASH_MASK;
+}
+
+static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
+static DEFINE_SPINLOCK(filelayout_deviceid_lock);
+
 /*
  * Data server cache
  *
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
        return NULL;
 }
 
+/*
+ * Create an rpc connection to the nfs4_pnfs_ds data server
+ * Currently only support IPv4
+ */
+static int
+nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
+{
+       struct nfs_client *clp;
+       struct sockaddr_in sin;
+       int status = 0;
+
+       dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
+               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+               mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
+
+       sin.sin_family = AF_INET;
+       sin.sin_addr.s_addr = ds->ds_ip_addr;
+       sin.sin_port = ds->ds_port;
+
+       clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
+                                sizeof(sin), IPPROTO_TCP);
+       if (IS_ERR(clp)) {
+               status = PTR_ERR(clp);
+               goto out;
+       }
+
+       if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
+               if (!is_ds_client(clp)) {
+                       status = -ENODEV;
+                       goto out_put;
+               }
+               ds->ds_clp = clp;
+               dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
+                       ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+               goto out;
+       }
+
+       /*
+        * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
+        * be equal to the MDS lease. Renewal is scheduled in create_session.
+        */
+       spin_lock(&mds_srv->nfs_client->cl_lock);
+       clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
+       spin_unlock(&mds_srv->nfs_client->cl_lock);
+       clp->cl_last_renewal = jiffies;
+
+       /* New nfs_client */
+       status = nfs4_init_ds_session(clp);
+       if (status)
+               goto out_put;
+
+       ds->ds_clp = clp;
+       dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
+               ntohs(ds->ds_port));
+out:
+       return status;
+out_put:
+       nfs_put_client(clp);
+       goto out;
+}
+
 static void
 destroy_ds(struct nfs4_pnfs_ds *ds)
 {
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
        struct nfs4_pnfs_ds *ds;
        int i;
 
-       print_deviceid(&dsaddr->deviceid.de_id);
+       print_deviceid(&dsaddr->deviceid);
 
        for (i = 0; i < dsaddr->ds_num; i++) {
                ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
        kfree(dsaddr);
 }
 
-void
-nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
-{
-       struct nfs4_file_layout_dsaddr *dsaddr =
-               container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
-
-       nfs4_fl_free_deviceid(dsaddr);
-}
-
 static struct nfs4_pnfs_ds *
 nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
 {
@@ -214,17 +290,26 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
 
        /* ipv6 length plus port is legal */
        if (rlen > INET6_ADDRSTRLEN + 8) {
-               dprintk("%s Invalid address, length %d\n", __func__,
+               dprintk("%s: Invalid address, length %d\n", __func__,
                        rlen);
                goto out_err;
        }
        buf = kmalloc(rlen + 1, GFP_KERNEL);
+       if (!buf) {
+               dprintk("%s: Not enough memory\n", __func__);
+               goto out_err;
+       }
        buf[rlen] = '\0';
        memcpy(buf, r_addr, rlen);
 
        /* replace the port dots with dashes for the in4_pton() delimiter*/
        for (i = 0; i < 2; i++) {
                char *res = strrchr(buf, '.');
+               if (!res) {
+                       dprintk("%s: Failed finding expected dots in port\n",
+                               __func__);
+                       goto out_free;
+               }
                *res = '-';
        }
 
@@ -240,7 +325,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
        port = htons((tmp[0] << 8) | (tmp[1]));
 
        ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
-       dprintk("%s Decoded address and port %s\n", __func__, buf);
+       dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
        kfree(buf);
 out_err:
@@ -291,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
        dsaddr->stripe_count = cnt;
        dsaddr->ds_num = num;
 
-       memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
+       memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
 
        /* Go back an read stripe indices */
        p = indicesp;
@@ -341,28 +426,37 @@ out_err:
 }
 
 /*
- * Decode the opaque device specified in 'dev'
- * and add it to the list of available devices.
- * If the deviceid is already cached, nfs4_add_deviceid will return
- * a pointer to the cached struct and throw away the new.
+ * Decode the opaque device specified in 'dev' and add it to the cache of
+ * available devices.
  */
-static struct nfs4_file_layout_dsaddr*
+static struct nfs4_file_layout_dsaddr *
 decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
 {
-       struct nfs4_file_layout_dsaddr *dsaddr;
-       struct pnfs_deviceid_node *d;
+       struct nfs4_file_layout_dsaddr *d, *new;
+       long hash;
 
-       dsaddr = decode_device(inode, dev);
-       if (!dsaddr) {
+       new = decode_device(inode, dev);
+       if (!new) {
                printk(KERN_WARNING "%s: Could not decode or add device\n",
                        __func__);
                return NULL;
        }
 
-       d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
-                             &dsaddr->deviceid);
+       spin_lock(&filelayout_deviceid_lock);
+       d = nfs4_fl_find_get_deviceid(&new->deviceid);
+       if (d) {
+               spin_unlock(&filelayout_deviceid_lock);
+               nfs4_fl_free_deviceid(new);
+               return d;
+       }
+
+       INIT_HLIST_NODE(&new->node);
+       atomic_set(&new->ref, 1);
+       hash = nfs4_fl_deviceid_hash(&new->deviceid);
+       hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
+       spin_unlock(&filelayout_deviceid_lock);
 
-       return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+       return new;
 }
 
 /*
@@ -437,12 +531,103 @@ out_free:
        return dsaddr;
 }
 
+void
+nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
+{
+       if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
+               hlist_del_rcu(&dsaddr->node);
+               spin_unlock(&filelayout_deviceid_lock);
+
+               synchronize_rcu();
+               nfs4_fl_free_deviceid(dsaddr);
+       }
+}
+
 struct nfs4_file_layout_dsaddr *
-nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
+nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
+{
+       struct nfs4_file_layout_dsaddr *d;
+       struct hlist_node *n;
+       long hash = nfs4_fl_deviceid_hash(id);
+
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
+               if (!memcmp(&d->deviceid, id, sizeof(*id))) {
+                       if (!atomic_inc_not_zero(&d->ref))
+                               goto fail;
+                       rcu_read_unlock();
+                       return d;
+               }
+       }
+fail:
+       rcu_read_unlock();
+       return NULL;
+}
+
+/*
+ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
+ * Then: ((res + fsi) % dsaddr->stripe_count)
+ */
+u32
+nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
 {
-       struct pnfs_deviceid_node *d;
+       struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+       u64 tmp;
 
-       d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
-       return (d == NULL) ? NULL :
-               container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+       tmp = offset - flseg->pattern_offset;
+       do_div(tmp, flseg->stripe_unit);
+       tmp += flseg->first_stripe_index;
+       return do_div(tmp, flseg->dsaddr->stripe_count);
+}
+
+u32
+nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
+{
+       return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
+}
+
+struct nfs_fh *
+nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
+{
+       struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+       u32 i;
+
+       if (flseg->stripe_type == STRIPE_SPARSE) {
+               if (flseg->num_fh == 1)
+                       i = 0;
+               else if (flseg->num_fh == 0)
+                       /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
+                       return NULL;
+               else
+                       i = nfs4_fl_calc_ds_index(lseg, j);
+       } else
+               i = j;
+       return flseg->fh_array[i];
+}
+
+struct nfs4_pnfs_ds *
+nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
+{
+       struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
+       struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
+
+       if (ds == NULL) {
+               printk(KERN_ERR "%s: No data server for offset index %d\n",
+                       __func__, ds_idx);
+               return NULL;
+       }
+
+       if (!ds->ds_clp) {
+               int err;
+
+               err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode),
+                                         dsaddr->ds_list[ds_idx]);
+               if (err) {
+                       printk(KERN_ERR "%s nfs4_ds_connect error %d\n",
+                              __func__, err);
+                       return NULL;
+               }
+       }
+       return ds;
 }