}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
- {
- switch (sa->sa_family) {
- default:
- return NULL;
- case AF_INET6:
- return &((const struct sockaddr_in6 *)sa)->sin6_addr;
- break;
- case AF_INET:
- ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
- addr_mapped);
- return addr_mapped;
- }
- }
-
- static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
- const struct sockaddr *sa2)
- {
- const struct in6_addr *addr1;
- const struct in6_addr *addr2;
- struct in6_addr addr1_mapped;
- struct in6_addr addr2_mapped;
-
- addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
- if (likely(addr1 != NULL)) {
- addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
- if (likely(addr2 != NULL))
- return ipv6_addr_equal(addr1, addr2);
- }
- return 0;
- }
-
/*
* Test if two ip6 socket addresses refer to the same socket by
* comparing relevant fields. The padding bytes specifically, are not
*
* The caller should ensure both socket addresses are AF_INET6.
*/
- static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
- const struct sockaddr *sa2)
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
{
- const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
- const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+ const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+ const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
- if (!ipv6_addr_equal(&saddr1->sin6_addr,
- &saddr1->sin6_addr))
- return 0;
- if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
- saddr1->sin6_scope_id != saddr2->sin6_scope_id)
+ if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
+ sin1->sin6_scope_id != sin2->sin6_scope_id)
return 0;
- return saddr1->sin6_port == saddr2->sin6_port;
- }
- #else
- static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
- const struct sockaddr_in *sa2)
- {
- return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
- }
- static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
- const struct sockaddr *sa2)
- {
- if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
- return 0;
- return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
- (const struct sockaddr_in *)sa2);
+ return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
}
-
- static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
- const struct sockaddr * sa2)
+ #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
{
return 0;
}
*
* The caller should ensure both socket addresses are AF_INET.
*/
+ static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+ {
+ const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+ const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
+
+ return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+ }
+
+ static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+ {
+ const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+ const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
+
+ return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
+ (sin1->sin6_port == sin2->sin6_port);
+ }
+
static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
- const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
- const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+ const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+ const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
- if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+ return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
+ (sin1->sin_port == sin2->sin_port);
+ }
+
+ /*
+ * Test if two socket addresses represent the same actual socket,
+ * by comparing (only) relevant fields, excluding the port number.
+ */
+ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+ {
+ if (sa1->sa_family != sa2->sa_family)
return 0;
- return saddr1->sin_port == saddr2->sin_port;
+
+ switch (sa1->sa_family) {
+ case AF_INET:
+ return nfs_sockaddr_match_ipaddr4(sa1, sa2);
+ case AF_INET6:
+ return nfs_sockaddr_match_ipaddr6(sa1, sa2);
+ }
+ return 0;
}
/*
* Test if two socket addresses represent the same actual socket,
- * by comparing (only) relevant fields.
+ * by comparing (only) relevant fields, including the port number.
*/
static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
const struct sockaddr *sa2)
if (!proc_fs_nfs)
goto error_0;
- proc_fs_nfs->owner = THIS_MODULE;
-
/* a file of servers with which we're dealing */
p = proc_create("servers", S_IFREG|S_IRUGO,
proc_fs_nfs, &nfs_server_list_fops);
iput(inode);
}
-struct dentry_operations nfs_dentry_operations = {
+const struct dentry_operations nfs_dentry_operations = {
.d_revalidate = nfs_lookup_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
#ifdef CONFIG_NFS_V4
static int nfs_open_revalidate(struct dentry *, struct nameidata *);
-struct dentry_operations nfs4_dentry_operations = {
+const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs_open_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
} else if (atomic_read(&new_dentry->d_count) > 1)
/* dentry still busy? */
goto out;
- } else
- nfs_drop_nlink(new_inode);
+ }
go_ahead:
/*
}
nfs_inode_return_delegation(old_inode);
- if (new_inode != NULL) {
+ if (new_inode != NULL)
nfs_inode_return_delegation(new_inode);
- d_delete(new_dentry);
- }
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
new_dir, &new_dentry->d_name);
if (rehash)
d_rehash(rehash);
if (!error) {
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
d_move(old_dentry, new_dentry);
nfs_set_verifier(new_dentry,
nfs_save_change_attribute(new_dir));
.write = do_sync_write,
.aio_read = nfs_file_read,
.aio_write = nfs_file_write,
- #ifdef CONFIG_MMU
.mmap = nfs_file_mmap,
- #else
- .mmap = generic_file_mmap,
- #endif
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
dentry->d_parent->d_name.name,
dentry->d_name.name);
- /* Ensure that dirty pages are flushed out with the right creds */
- if (filp->f_mode & FMODE_WRITE)
- nfs_wb_all(dentry->d_inode);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
}
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
- int status;
dprintk("NFS: flush(%s/%s)\n",
dentry->d_parent->d_name.name,
return 0;
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
- /* Ensure that data+attribute caches are up to date after close() */
- status = nfs_do_fsync(ctx, inode);
- if (!status)
- nfs_revalidate_inode(NFS_SERVER(inode), inode);
- return status;
+ /* Flush writes to the server and return any errors */
+ return nfs_do_fsync(ctx, inode);
}
static ssize_t
dprintk("NFS: mmap(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
- status = nfs_revalidate_mapping(inode, file->f_mapping);
+ /* Note: generic_file_mmap() returns ENOSYS on nommu systems
+ * so we call that before revalidating the mapping
+ */
+ status = generic_file_mmap(file, vma);
if (!status) {
vma->vm_ops = &nfs_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
- file_accessed(file);
+ status = nfs_revalidate_mapping(inode, file->f_mapping);
}
return status;
}
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
+ /*
+ * Prevent starvation issues if someone is doing a consistency
+ * sync-to-disk
+ */
+ ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+ if (ret)
+ return ret;
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
.launder_page = nfs_launder_page,
};
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
+ struct page *page = vmf->page;
struct file *filp = vma->vm_file;
struct dentry *dentry = filp->f_path.dentry;
unsigned pagelen;
ret = pagelen;
out_unlock:
unlock_page(page);
+ if (ret)
+ ret = VM_FAULT_SIGBUS;
return ret;
}
*/
struct radix_tree_root nfs_page_tree;
- unsigned long ncommit,
- npages;
+ unsigned long npages;
/* Open contexts for shared mmap writes */
struct list_head open_files;
#define NFS_INO_STALE (1) /* possible stale inode */
#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */
+ #define NFS_INO_FLUSHING (4) /* inode is flushing out data */
static inline struct nfs_inode *NFS_I(const struct inode *inode)
{
extern const struct inode_operations nfs3_dir_inode_operations;
#endif /* CONFIG_NFS_V3 */
extern const struct file_operations nfs_dir_operations;
-extern struct dentry_operations nfs_dentry_operations;
+extern const struct dentry_operations nfs_dentry_operations;
extern void nfs_force_lookup_revalidate(struct inode *dir);
extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
}
struct nfs_fattr {
- unsigned short valid; /* which fields are valid */
- __u64 pre_size; /* pre_op_attr.size */
- struct timespec pre_mtime; /* pre_op_attr.mtime */
- struct timespec pre_ctime; /* pre_op_attr.ctime */
- enum nfs_ftype type; /* always use NFSv2 types */
- __u32 mode;
+ unsigned int valid; /* which fields are valid */
+ umode_t mode;
__u32 nlink;
__u32 uid;
__u32 gid;
struct timespec atime;
struct timespec mtime;
struct timespec ctime;
- __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */
__u64 change_attr; /* NFSv4 change attribute */
__u64 pre_change_attr;/* pre-op NFSv4 change attribute */
+ __u64 pre_size; /* pre_op_attr.size */
+ struct timespec pre_mtime; /* pre_op_attr.mtime */
+ struct timespec pre_ctime; /* pre_op_attr.ctime */
unsigned long time_start;
unsigned long gencount;
};
- #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */
- #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */
- #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */
- #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */
- #define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */
- #define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */
+ #define NFS_ATTR_FATTR_TYPE (1U << 0)
+ #define NFS_ATTR_FATTR_MODE (1U << 1)
+ #define NFS_ATTR_FATTR_NLINK (1U << 2)
+ #define NFS_ATTR_FATTR_OWNER (1U << 3)
+ #define NFS_ATTR_FATTR_GROUP (1U << 4)
+ #define NFS_ATTR_FATTR_RDEV (1U << 5)
+ #define NFS_ATTR_FATTR_SIZE (1U << 6)
+ #define NFS_ATTR_FATTR_PRESIZE (1U << 7)
+ #define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8)
+ #define NFS_ATTR_FATTR_SPACE_USED (1U << 9)
+ #define NFS_ATTR_FATTR_FSID (1U << 10)
+ #define NFS_ATTR_FATTR_FILEID (1U << 11)
+ #define NFS_ATTR_FATTR_ATIME (1U << 12)
+ #define NFS_ATTR_FATTR_MTIME (1U << 13)
+ #define NFS_ATTR_FATTR_CTIME (1U << 14)
+ #define NFS_ATTR_FATTR_PREMTIME (1U << 15)
+ #define NFS_ATTR_FATTR_PRECTIME (1U << 16)
+ #define NFS_ATTR_FATTR_CHANGE (1U << 17)
+ #define NFS_ATTR_FATTR_PRECHANGE (1U << 18)
+ #define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */
+
+ #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
+ | NFS_ATTR_FATTR_MODE \
+ | NFS_ATTR_FATTR_NLINK \
+ | NFS_ATTR_FATTR_OWNER \
+ | NFS_ATTR_FATTR_GROUP \
+ | NFS_ATTR_FATTR_RDEV \
+ | NFS_ATTR_FATTR_SIZE \
+ | NFS_ATTR_FATTR_FSID \
+ | NFS_ATTR_FATTR_FILEID \
+ | NFS_ATTR_FATTR_ATIME \
+ | NFS_ATTR_FATTR_MTIME \
+ | NFS_ATTR_FATTR_CTIME)
+ #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
+ | NFS_ATTR_FATTR_BLOCKS_USED)
+ #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
+ | NFS_ATTR_FATTR_SPACE_USED)
+ #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
+ | NFS_ATTR_FATTR_SPACE_USED \
+ | NFS_ATTR_FATTR_CHANGE)
/*
* Info on the file system
*/
struct nfs_rpc_ops {
u32 version; /* Protocol version */
- struct dentry_operations *dentry_ops;
+ const struct dentry_operations *dentry_ops;
const struct inode_operations *dir_inode_ops;
const struct inode_operations *file_inode_ops;
int (*lock)(struct file *, int, struct file_lock *);
int (*lock_check_bounds)(const struct file_lock *);
void (*clear_acl_cache)(struct inode *);
+ void (*close_context)(struct nfs_open_context *ctx, int);
};
/*
switch (m->mode) {
case SVC_POOL_PERCPU:
{
- set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+ set_cpus_allowed_ptr(task, cpumask_of(node));
break;
}
case SVC_POOL_PERNODE:
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
- serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
- return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+ return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+ void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, family, shutdown);
+ serv = __svc_create(prog, bufsize, npools, shutdown);
if (serv != NULL) {
serv->sv_function = func;
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
- #ifdef CONFIG_SUNRPC_REGISTER_V4
-
/*
* Register an "inet" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in sin = {
+ const struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
netid = RPCBIND_NETID_TCP;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin, netid);
+
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * registration request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, protocol, port);
+
+ return error;
}
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in6 sin6 = {
+ const struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
netid = RPCBIND_NETID_TCP6;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin6, netid);
+
+ /*
+ * User space didn't support rpcbind version 4, so we won't
+ * use a PF_INET6 listener.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = -EAFNOSUPPORT;
+
+ return error;
}
+ #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
/*
* Register a kernel RPC service via rpcbind version 4.
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
- static int __svc_register(const u32 program, const u32 version,
- const sa_family_t family,
+ static int __svc_register(const char *progname,
+ const u32 program, const u32 version,
+ const int family,
const unsigned short protocol,
const unsigned short port)
{
- int error;
+ int error = -EAFNOSUPPORT;
switch (family) {
- case AF_INET:
- return __svc_rpcb_register4(program, version,
+ case PF_INET:
+ error = __svc_rpcb_register4(program, version,
protocol, port);
- case AF_INET6:
+ break;
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
- if (error < 0)
- return error;
-
- /*
- * Work around bug in some versions of Linux rpcbind
- * which don't allow registration of both inet and
- * inet6 netids.
- *
- * Error return ignored for now.
- */
- __svc_rpcb_register4(program, version,
- protocol, port);
- return 0;
+ #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
}
- return -EAFNOSUPPORT;
- }
-
- #else /* CONFIG_SUNRPC_REGISTER_V4 */
-
- /*
- * Register a kernel RPC service via rpcbind version 2.
- *
- * Returns zero on success; a negative errno value is returned
- * if any error occurs.
- */
- static int __svc_register(const u32 program, const u32 version,
- sa_family_t family,
- const unsigned short protocol,
- const unsigned short port)
- {
- if (family != AF_INET)
- return -EAFNOSUPPORT;
-
- return rpcb_register(program, version, protocol, port);
+ if (error < 0)
+ printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+ "service (errno %d).\n", progname, version, -error);
+ return error;
}
- #endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/**
* svc_register - register an RPC service with the local portmapper
* @serv: svc_serv struct for the service to register
+ * @family: protocol family of service's listener socket
* @proto: transport protocol number to advertise
* @port: port to advertise
*
- * Service is registered for any address in serv's address family
+ * Service is registered for any address in the passed-in protocol family
*/
- int svc_register(const struct svc_serv *serv, const unsigned short proto,
- const unsigned short port)
+ int svc_register(const struct svc_serv *serv, const int family,
+ const unsigned short proto, const unsigned short port)
{
struct svc_program *progp;
unsigned int i;
i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
- serv->sv_family,
+ family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = __svc_register(progp->pg_prog, i,
- serv->sv_family, proto, port);
+ error = __svc_register(progp->pg_name, progp->pg_prog,
+ i, family, proto, port);
if (error < 0)
break;
}
return error;
}
- #ifdef CONFIG_SUNRPC_REGISTER_V4
-
+ /*
+ * If user space is running rpcbind, it should take the v4 UNSET
+ * and clear everything for this [program, version]. If user space
+ * is running portmap, it will reject the v4 UNSET, but won't have
+ * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
+ * in this case to clear all existing entries for [program, version].
+ */
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = 0,
- };
int error;
- error = rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, "");
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
- }
-
- #else /* CONFIG_SUNRPC_REGISTER_V4 */
+ error = rpcb_v4_register(program, version, NULL, "");
- static void __svc_unregister(const u32 program, const u32 version,
- const char *progname)
- {
- int error;
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, 0, 0);
- error = rpcb_register(program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
- #endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/*
* All netids, bind addresses and ports registered for [program, version]
* are removed from the local rpcbind database (if the service is not
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
+ #define XS_TCP_LINGER_TO (15U * HZ)
+ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
+
/*
* We can register our own files under /proc/sys/sunrpc by
* calling register_sysctl_table() again. The files in that
.extra1 = &xprt_min_resvport_limit,
.extra2 = &xprt_max_resvport_limit
},
+ {
+ .procname = "tcp_fin_timeout",
+ .data = &xs_tcp_fin_timeout,
+ .maxlen = sizeof(xs_tcp_fin_timeout),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies
+ },
{
.ctl_name = 0,
},
* @task: task to put to sleep
*
*/
- static void xs_nospace(struct rpc_task *task)
+ static int xs_nospace(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ int ret = 0;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ ret = -EAGAIN;
/*
* Notify TCP that we're limited by the application
* window size
}
} else {
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- task->tk_status = -ENOTCONN;
+ ret = -ENOTCONN;
}
spin_unlock_bh(&xprt->transport_lock);
+ return ret;
}
/**
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ENETUNREACH:
case -EPIPE:
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
}
-
+ out:
return status;
}
status = -EAGAIN;
break;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ECONNRESET:
+ case -EPIPE:
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
- case -EPIPE:
- status = -ENOTCONN;
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- xs_tcp_shutdown(xprt);
}
-
+ out:
return status;
}
sk->sk_error_report = transport->old_error_report;
}
- /**
- * xs_close - close a socket
- * @xprt: transport
- *
- * This is used when all requests are complete; ie, no DRC state remains
- * on the server we want to save.
- */
- static void xs_close(struct rpc_xprt *xprt)
+ static void xs_reset_transport(struct sock_xprt *transport)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
- if (!sk)
- goto clear_close_wait;
-
- dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (sk == NULL)
+ return;
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
sk->sk_no_check = 0;
sock_release(sock);
- clear_close_wait:
+ }
+
+ /**
+ * xs_close - close a socket
+ * @xprt: transport
+ *
+ * This is used when all requests are complete; ie, no DRC state remains
+ * on the server we want to save.
+ */
+ static void xs_close(struct rpc_xprt *xprt)
+ {
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ dprintk("RPC: xs_close xprt %p\n", xprt);
+
+ xs_reset_transport(transport);
+
smp_mb__before_clear_bit();
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
read_unlock(&sk->sk_callback_lock);
}
+ /*
+ * Do the equivalent of linger/linger2 handling for dealing with
+ * broken servers that don't close the socket in a timely
+ * fashion
+ */
+ static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
+ unsigned long timeout)
+ {
+ struct sock_xprt *transport;
+
+ if (xprt_test_and_set_connecting(xprt))
+ return;
+ set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
+ timeout);
+ }
+
+ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+ {
+ struct sock_xprt *transport;
+
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
+ !cancel_delayed_work(&transport->connect_worker))
+ return;
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ xprt_clear_connecting(xprt);
+ }
+
+ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+ {
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ clear_bit(XPRT_CLOSING, &xprt->state);
+ smp_mb__after_clear_bit();
+ /* Mark transport as closed and wake up all pending tasks */
+ xprt_disconnect_done(xprt);
+ }
+
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
- xprt_wake_pending_tasks(xprt, 0);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock_bh(&xprt->transport_lock);
break;
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
smp_mb__after_clear_bit();
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
- set_bit(XPRT_CLOSING, &xprt->state);
xprt_force_disconnect(xprt);
case TCP_SYN_SENT:
xprt->connect_cookie++;
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
break;
case TCP_LAST_ACK:
+ set_bit(XPRT_CLOSING, &xprt->state);
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTED, &xprt->state);
smp_mb__after_clear_bit();
break;
case TCP_CLOSE:
- smp_mb__before_clear_bit();
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
- /* Mark transport as closed and wake up all pending tasks */
- xprt_disconnect_done(xprt);
+ xs_tcp_cancel_linger_timeout(xprt);
+ xs_sock_mark_closed(xprt);
}
out:
read_unlock(&sk->sk_callback_lock);
}
/**
- * xs_tcp_error_report - callback mainly for catching RST events
+ * xs_error_report - callback mainly for catching socket errors
* @sk: socket
*/
- static void xs_tcp_error_report(struct sock *sk)
+ static void xs_error_report(struct sock *sk)
{
struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock);
- if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
- goto out;
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: %s client %p...\n"
"RPC: error %d\n",
__func__, xprt, sk->sk_err);
-
- xprt_force_disconnect(xprt);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
read_unlock(&sk->sk_callback_lock);
}
+static void xs_write_space(struct sock *sk)
+{
+ struct socket *sock;
+ struct rpc_xprt *xprt;
+
+ if (unlikely(!(sock = sk->sk_socket)))
+ return;
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+
+ if (unlikely(!(xprt = xprt_from_sock(sk))))
+ return;
+ if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+ return;
+
+ xprt_write_space(xprt);
+}
+
/**
* xs_udp_write_space - callback invoked when socket buffer space
* becomes available
read_lock(&sk->sk_callback_lock);
/* from net/core/sock.c:sock_def_write_space */
- if (sock_writeable(sk)) {
- struct socket *sock;
- struct rpc_xprt *xprt;
-
- if (unlikely(!(sock = sk->sk_socket)))
- goto out;
- clear_bit(SOCK_NOSPACE, &sock->flags);
-
- if (unlikely(!(xprt = xprt_from_sock(sk))))
- goto out;
- if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
- goto out;
-
- xprt_write_space(xprt);
- }
+ if (sock_writeable(sk))
+ xs_write_space(sk);
- out:
read_unlock(&sk->sk_callback_lock);
}
read_lock(&sk->sk_callback_lock);
/* from net/core/stream.c:sk_stream_write_space */
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
- struct socket *sock;
- struct rpc_xprt *xprt;
-
- if (unlikely(!(sock = sk->sk_socket)))
- goto out;
- clear_bit(SOCK_NOSPACE, &sock->flags);
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ xs_write_space(sk);
- if (unlikely(!(xprt = xprt_from_sock(sk))))
- goto out;
- if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
- goto out;
-
- xprt_write_space(xprt);
- }
-
- out:
read_unlock(&sk->sk_callback_lock);
}
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
+ sk->sk_error_report = xs_error_report;
sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/**
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/*
* We need to preserve the port number so the reply cache on the server can
* find our cached RPC replies when we get around to reconnecting.
*/
- static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
+ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
{
int result;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sockaddr any;
dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
- if (result)
+ if (!result)
+ xs_sock_mark_closed(xprt);
+ else
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
}
+ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+ {
+ unsigned int state = transport->inet->sk_state;
+
+ if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
+ return;
+ if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
+ return;
+ xs_abort_connection(xprt, transport);
+ }
+
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sk->sk_error_report = xs_tcp_error_report;
+ sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
}
/**
- * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
+ * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
- static void xs_tcp_connect_worker4(struct work_struct *work)
+ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
+ struct sock_xprt *transport,
+ struct socket *(*create_sock)(struct rpc_xprt *,
+ struct sock_xprt *))
{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
- int err, status = -EIO;
+ int status = -EIO;
if (xprt->shutdown)
goto out;
if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ sock = create_sock(xprt, transport);
+ if (IS_ERR(sock)) {
+ status = PTR_ERR(sock);
goto out;
}
- xs_reclassify_socket4(sock);
+ } else {
+ int abort_and_exit;
- if (xs_bind4(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
+ abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
+ &xprt->state);
/* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ xs_tcp_reuse_connection(xprt, transport);
+
+ if (abort_and_exit)
+ goto out_eagain;
+ }
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
+ switch (status) {
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENETUNREACH:
+ /* retry with existing socket, after a delay */
+ case 0:
+ case -EINPROGRESS:
+ case -EALREADY:
+ xprt_clear_connecting(xprt);
+ return;
}
+ /* get rid of existing socket, and retry */
+ xs_tcp_shutdown(xprt);
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+ out_eagain:
+ status = -EAGAIN;
out:
- xprt_wake_pending_tasks(xprt, status);
- out_clear:
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
+ }
+
+ static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+ {
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket4(sock);
+
+ if (xs_bind4(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+ out_err:
+ return ERR_PTR(-EIO);
}
/**
- * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
- static void xs_tcp_connect_worker6(struct work_struct *work)
+ static void xs_tcp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
- if (xprt->shutdown)
- goto out;
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
+ }
- if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket6(sock);
+ static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+ {
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket6(sock);
- if (xs_bind6(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
- /* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+ out_err:
+ return ERR_PTR(-EIO);
+ }
- dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+ /**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+ static void xs_tcp_connect_worker6(struct work_struct *work)
+ {
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
- status = xs_tcp_finish_connecting(xprt, sock);
- dprintk("RPC: %p connect status %d connected %d sock state %d\n",
- xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
- }
- out:
- xprt_wake_pending_tasks(xprt, status);
- out_clear:
- xprt_clear_connecting(xprt);
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
}
/**
{
struct rpc_xprt *xprt = task->tk_xprt;
- /* Initiate graceful shutdown of the socket if not already done */
- if (test_bit(XPRT_CONNECTED, &xprt->state))
- xs_tcp_shutdown(xprt);
/* Exit if we need to wait for socket shutdown to complete */
if (test_bit(XPRT_CLOSING, &xprt->state))
return;