X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=net%2Fnetfilter%2Fipvs%2Fip_vs_ctl.c;fp=net%2Fnetfilter%2Fipvs%2Fip_vs_ctl.c;h=ba98e1308f3ced1a19414259285d2bdba2f37fd3;hb=f2ec334db8d14ae3ec2e4bf8d974f75b8f772e26;hp=0f0c079c422a03f87616a1c1df76c0e387245a9e;hpb=81e20d4d8d0317ecf1c7d193a52ab26cf74e1737;p=mv-sheeva.git diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0f0c079c422..ba98e1308f3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock); static DEFINE_RWLOCK(__ip_vs_rs_lock); /* lock for state and timeout tables */ -static DEFINE_RWLOCK(__ip_vs_securetcp_lock); +static DEFINE_SPINLOCK(ip_vs_securetcp_lock); /* lock for drop entry handling */ static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); @@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0; int sysctl_ip_vs_expire_quiescent_template = 0; int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; int sysctl_ip_vs_nat_icmp_send = 0; +#ifdef CONFIG_IP_VS_NFCT +int sysctl_ip_vs_conntrack; +#endif +int sysctl_ip_vs_snat_reroute = 1; #ifdef CONFIG_IP_VS_DEBUG @@ -106,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) struct rt6_info *rt; struct flowi fl = { .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *addr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + .fl6_dst = *addr, + .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); @@ -204,7 +206,7 @@ static void update_defense_level(void) spin_unlock(&__ip_vs_droppacket_lock); /* secure_tcp */ - write_lock(&__ip_vs_securetcp_lock); + spin_lock(&ip_vs_securetcp_lock); switch (sysctl_ip_vs_secure_tcp) { case 0: if (old_secure_tcp >= 2) @@ -238,7 +240,7 @@ static void update_defense_level(void) old_secure_tcp = sysctl_ip_vs_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); - write_unlock(&__ip_vs_securetcp_lock); + spin_unlock(&ip_vs_securetcp_lock); local_bh_enable(); } @@ -401,7 +403,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, +__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { unsigned hash; @@ -416,7 +418,6 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, && (svc->port == vport) && (svc->protocol == protocol)) { /* HIT */ - atomic_inc(&svc->usecnt); return svc; } } @@ -429,7 +430,7 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_get(int af, __u32 fwmark) +__ip_vs_svc_fwm_find(int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; @@ -440,7 +441,6 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark) list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ - atomic_inc(&svc->usecnt); return svc; } } @@ -459,14 +459,14 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) + if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_get(af, protocol, vaddr, vport); + svc = __ip_vs_service_find(af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -476,7 +476,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -484,10 +484,12 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_get(af, protocol, vaddr, 0); + svc = __ip_vs_service_find(af, protocol, vaddr, 0); } out: + if (svc) + atomic_inc(&svc->usecnt); read_unlock(&__ip_vs_svc_lock); IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", @@ -506,14 +508,19 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) dest->svc = svc; } -static inline void +static void __ip_vs_unbind_svc(struct ip_vs_dest *dest) { struct ip_vs_service *svc = dest->svc; dest->svc = NULL; - if (atomic_dec_and_test(&svc->refcnt)) + if (atomic_dec_and_test(&svc->refcnt)) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + svc->fwmark, + IP_VS_DBG_ADDR(svc->af, &svc->addr), + ntohs(svc->port), atomic_read(&svc->usecnt)); kfree(svc); + } } @@ -758,31 +765,18 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) * Update a destination in the given service */ static void -__ip_vs_update_dest(struct ip_vs_service *svc, - struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) +__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, + struct ip_vs_dest_user_kern *udest, int add) { int conn_flags; /* set the weight and the flags */ atomic_set(&dest->weight, udest->weight); - conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; - - /* check if local node and update the flags */ -#ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) { - if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } - } else -#endif - if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } + conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; + conn_flags |= IP_VS_CONN_F_INACTIVE; /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ - if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { + if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* @@ -813,6 +807,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, dest->flags &= ~IP_VS_DEST_F_OVERLOAD; dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; + + spin_lock_bh(&dest->dst_lock); + ip_vs_dst_reset(dest); + spin_unlock_bh(&dest->dst_lock); + + if (add) + ip_vs_new_estimator(&dest->stats); + + write_lock_bh(&__ip_vs_svc_lock); + + /* Wait until all other svc users go away */ + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); + + if (add) { + list_add(&dest->n_list, &svc->destinations); + svc->num_dests++; + } + + /* call the update_service, because server weight may be changed */ + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); + + write_unlock_bh(&__ip_vs_svc_lock); } @@ -843,7 +860,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, return -EINVAL; } - dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); + dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); if (dest == NULL) { pr_err("%s(): no memory.\n", __func__); return -ENOMEM; @@ -860,13 +877,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->activeconns, 0); atomic_set(&dest->inactconns, 0); atomic_set(&dest->persistconns, 0); - atomic_set(&dest->refcnt, 0); + atomic_set(&dest->refcnt, 1); INIT_LIST_HEAD(&dest->d_list); spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->stats.lock); - __ip_vs_update_dest(svc, dest, udest); - ip_vs_new_estimator(&dest->stats); + __ip_vs_update_dest(svc, dest, udest, 1); *dest_p = dest; @@ -926,65 +942,22 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); - __ip_vs_update_dest(svc, dest, udest); - /* * Get the destination from the trash */ list_del(&dest->n_list); - ip_vs_new_estimator(&dest->stats); - - write_lock_bh(&__ip_vs_svc_lock); - + __ip_vs_update_dest(svc, dest, udest, 1); + ret = 0; + } else { /* - * Wait until all other svc users go away. + * Allocate and initialize the dest structure */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); - - list_add(&dest->n_list, &svc->destinations); - svc->num_dests++; - - /* call the update_service function of its scheduler */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); - return 0; - } - - /* - * Allocate and initialize the dest structure - */ - ret = ip_vs_new_dest(svc, udest, &dest); - if (ret) { - return ret; + ret = ip_vs_new_dest(svc, udest, &dest); } - - /* - * Add the dest entry into the list - */ - atomic_inc(&dest->refcnt); - - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); - - list_add(&dest->n_list, &svc->destinations); - svc->num_dests++; - - /* call the update_service function of its scheduler */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); - LeaveFunction(2); - return 0; + return ret; } @@ -1023,19 +996,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ENOENT; } - __ip_vs_update_dest(svc, dest, udest); - - write_lock_bh(&__ip_vs_svc_lock); - - /* Wait until all other svc users go away */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); - - /* call the update_service, because server weight may be changed */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); - + __ip_vs_update_dest(svc, dest, udest, 0); LeaveFunction(2); return 0; @@ -1062,6 +1023,10 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) * the destination into the trash. */ if (atomic_dec_and_test(&dest->refcnt)) { + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", + dest->vfwmark, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port)); ip_vs_dst_reset(dest); /* simply decrease svc->refcnt here, let the caller check and release the service if nobody refers to it. @@ -1128,7 +1093,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Wait until all other svc users go away. */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); /* * Unlink dest from the service @@ -1157,6 +1122,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, { int ret = 0; struct ip_vs_scheduler *sched = NULL; + struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; /* increase the module use count */ @@ -1167,7 +1133,17 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, if (sched == NULL) { pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); ret = -ENOENT; - goto out_mod_dec; + goto out_err; + } + + if (u->pe_name && *u->pe_name) { + pe = ip_vs_pe_get(u->pe_name); + if (pe == NULL) { + pr_info("persistence engine module ip_vs_pe_%s " + "not found\n", u->pe_name); + ret = -ENOENT; + goto out_err; + } } #ifdef CONFIG_IP_VS_IPV6 @@ -1177,7 +1153,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } #endif - svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); + svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); if (svc == NULL) { IP_VS_DBG(1, "%s(): no memory\n", __func__); ret = -ENOMEM; @@ -1185,7 +1161,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } /* I'm the first user of the service */ - atomic_set(&svc->usecnt, 1); + atomic_set(&svc->usecnt, 0); atomic_set(&svc->refcnt, 0); svc->af = u->af; @@ -1207,6 +1183,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, goto out_err; sched = NULL; + /* Bind the ct retriever */ + ip_vs_bind_pe(svc, pe); + pe = NULL; + /* Update the virtual service counters */ if (svc->port == FTPPORT) atomic_inc(&ip_vs_ftpsvc_counter); @@ -1227,10 +1207,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, *svc_p = svc; return 0; - out_err: + out_err: if (svc != NULL) { - if (svc->scheduler) - ip_vs_unbind_scheduler(svc); + ip_vs_unbind_scheduler(svc); if (svc->inc) { local_bh_disable(); ip_vs_app_inc_put(svc->inc); @@ -1239,8 +1218,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, kfree(svc); } ip_vs_scheduler_put(sched); + ip_vs_pe_put(pe); - out_mod_dec: /* decrease the module use count */ ip_vs_use_count_dec(); @@ -1255,6 +1234,7 @@ static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* @@ -1267,6 +1247,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } old_sched = sched; + if (u->pe_name && *u->pe_name) { + pe = ip_vs_pe_get(u->pe_name); + if (pe == NULL) { + pr_info("persistence engine module ip_vs_pe_%s " + "not found\n", u->pe_name); + ret = -ENOENT; + goto out; + } + old_pe = pe; + } + #ifdef CONFIG_IP_VS_IPV6 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { ret = -EINVAL; @@ -1279,7 +1270,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) /* * Wait until all other svc users go away. */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); /* * Set the flags and timeout value @@ -1318,15 +1309,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } } + old_pe = svc->pe; + if (pe != old_pe) { + ip_vs_unbind_pe(svc); + ip_vs_bind_pe(svc, pe); + } + out_unlock: write_unlock_bh(&__ip_vs_svc_lock); -#ifdef CONFIG_IP_VS_IPV6 out: -#endif - - if (old_sched) - ip_vs_scheduler_put(old_sched); - + ip_vs_scheduler_put(old_sched); + ip_vs_pe_put(old_pe); return ret; } @@ -1340,6 +1333,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; + struct ip_vs_pe *old_pe; + + pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1350,8 +1346,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* Unbind scheduler */ old_sched = svc->scheduler; ip_vs_unbind_scheduler(svc); - if (old_sched) - ip_vs_scheduler_put(old_sched); + ip_vs_scheduler_put(old_sched); + + /* Unbind persistence engine */ + old_pe = svc->pe; + ip_vs_unbind_pe(svc); + ip_vs_pe_put(old_pe); /* Unbind app inc */ if (svc->inc) { @@ -1378,21 +1378,23 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Free the service if nobody refers to it */ - if (atomic_read(&svc->refcnt) == 0) + if (atomic_read(&svc->refcnt) == 0) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + svc->fwmark, + IP_VS_DBG_ADDR(svc->af, &svc->addr), + ntohs(svc->port), atomic_read(&svc->usecnt)); kfree(svc); + } /* decrease the module use count */ ip_vs_use_count_dec(); } /* - * Delete a service from the service list + * Unlink a service from list and try to delete it if its refcnt reached 0 */ -static int ip_vs_del_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc) { - if (svc == NULL) - return -EEXIST; - /* * Unhash it from the service table */ @@ -1403,11 +1405,21 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Wait until all the svc users go away. */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); __ip_vs_del_service(svc); write_unlock_bh(&__ip_vs_svc_lock); +} + +/* + * Delete a service from the service list + */ +static int ip_vs_del_service(struct ip_vs_service *svc) +{ + if (svc == NULL) + return -EEXIST; + ip_vs_unlink_service(svc); return 0; } @@ -1426,14 +1438,7 @@ static int ip_vs_flush(void) */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc); - write_unlock_bh(&__ip_vs_svc_lock); + ip_vs_unlink_service(svc); } } @@ -1443,14 +1448,7 @@ static int ip_vs_flush(void) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc); - write_unlock_bh(&__ip_vs_svc_lock); + ip_vs_unlink_service(svc); } } @@ -1579,6 +1577,15 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_do_defense_mode, }, +#ifdef CONFIG_IP_VS_NFCT + { + .procname = "conntrack", + .data = &sysctl_ip_vs_conntrack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, @@ -1586,6 +1593,13 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_do_defense_mode, }, + { + .procname = "snat_reroute", + .data = &sysctl_ip_vs_snat_reroute, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #if 0 { .procname = "timeout_established", @@ -2041,6 +2055,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, struct ip_vs_service_user *usvc_compat) { + memset(usvc, 0, sizeof(*usvc)); + usvc->af = AF_INET; usvc->protocol = usvc_compat->protocol; usvc->addr.ip = usvc_compat->addr; @@ -2058,6 +2074,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, struct ip_vs_dest_user *udest_compat) { + memset(udest, 0, sizeof(*udest)); + udest->addr.ip = udest_compat->addr; udest->port = udest_compat->port; udest->conn_flags = udest_compat->conn_flags; @@ -2147,10 +2165,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_get(usvc.af, usvc.protocol, - &usvc.addr, usvc.port); + svc = __ip_vs_service_find(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2189,9 +2207,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; } - if (svc) - ip_vs_service_put(svc); - out_unlock: mutex_unlock(&__ip_vs_mutex); out_dec: @@ -2284,10 +2299,10 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); else - svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, - get->port); + svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, + get->port); if (svc) { int count = 0; @@ -2315,7 +2330,6 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, } count++; } - ip_vs_service_put(svc); } else ret = -ESRCH; return ret; @@ -2436,15 +2450,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); else - svc = __ip_vs_service_get(AF_INET, entry->protocol, - &addr, entry->port); + svc = __ip_vs_service_find(AF_INET, entry->protocol, + &addr, entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) ret = -EFAULT; - ip_vs_service_put(svc); } else ret = -ESRCH; } @@ -2559,6 +2572,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_PENAME_MAXLEN }, [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, .len = sizeof(struct ip_vs_flags) }, [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, @@ -2635,6 +2650,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + if (svc->pe) + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name); NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); @@ -2711,10 +2728,12 @@ nla_put_failure: } static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, - struct nlattr *nla, int full_entry) + struct nlattr *nla, int full_entry, + struct ip_vs_service **ret_svc) { struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; + struct ip_vs_service *svc; /* Parse mandatory identifying service fields first */ if (nla == NULL || @@ -2750,14 +2769,21 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, usvc->fwmark = 0; } + if (usvc->fwmark) + svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); + else + svc = __ip_vs_service_find(usvc->af, usvc->protocol, + &usvc->addr, usvc->port); + *ret_svc = svc; + /* If a full entry was requested, check for the additional fields */ if (full_entry) { - struct nlattr *nla_sched, *nla_flags, *nla_timeout, + struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, *nla_netmask; struct ip_vs_flags flags; - struct ip_vs_service *svc; nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; @@ -2768,21 +2794,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, nla_memcpy(&flags, nla_flags, sizeof(flags)); /* prefill flags from service if it already exists */ - if (usvc->fwmark) - svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); - else - svc = __ip_vs_service_get(usvc->af, usvc->protocol, - &usvc->addr, usvc->port); - if (svc) { + if (svc) usvc->flags = svc->flags; - ip_vs_service_put(svc); - } else - usvc->flags = 0; /* set new flags from userland */ usvc->flags = (usvc->flags & ~flags.mask) | (flags.flags & flags.mask); usvc->sched_name = nla_data(nla_sched); + usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; usvc->timeout = nla_get_u32(nla_timeout); usvc->netmask = nla_get_u32(nla_netmask); } @@ -2793,17 +2812,11 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) { struct ip_vs_service_user_kern usvc; + struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(&usvc, nla, 0); - if (ret) - return ERR_PTR(ret); - - if (usvc.fwmark) - return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); - else - return __ip_vs_service_get(usvc.af, usvc.protocol, - &usvc.addr, usvc.port); + ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); + return ret ? ERR_PTR(ret) : svc; } static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) @@ -2894,7 +2907,6 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, nla_put_failure: cb->args[0] = idx; - ip_vs_service_put(svc); out_err: mutex_unlock(&__ip_vs_mutex); @@ -3107,17 +3119,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_genl_parse_service(&usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], - need_full_svc); + need_full_svc, &svc); if (ret) goto out; - /* Lookup the exact service by or fwmark */ - if (usvc.fwmark == 0) - svc = __ip_vs_service_get(usvc.af, usvc.protocol, - &usvc.addr, usvc.port); - else - svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); - /* Unless we're adding a new service, the service must already exist */ if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { ret = -ESRCH; @@ -3151,6 +3156,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) break; case IPVS_CMD_DEL_SERVICE: ret = ip_vs_del_service(svc); + /* do not use svc, it can be freed */ break; case IPVS_CMD_NEW_DEST: ret = ip_vs_add_dest(svc, &udest); @@ -3169,8 +3175,6 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } out: - if (svc) - ip_vs_service_put(svc); mutex_unlock(&__ip_vs_mutex); return ret; @@ -3216,7 +3220,6 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) goto out_err; } else if (svc) { ret = ip_vs_genl_fill_service(msg, svc); - ip_vs_service_put(svc); if (ret) goto nla_put_failure; } else { @@ -3385,6 +3388,16 @@ int __init ip_vs_control_init(void) EnterFunction(2); + /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ + for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_LIST_HEAD(&ip_vs_svc_table[idx]); + INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + } + for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { + INIT_LIST_HEAD(&ip_vs_rtable[idx]); + } + smp_wmb(); + ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); @@ -3403,15 +3416,6 @@ int __init ip_vs_control_init(void) sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_rtable[idx]); - } - ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ @@ -3426,7 +3430,7 @@ void ip_vs_control_cleanup(void) { EnterFunction(2); ip_vs_trash_cleanup(); - cancel_rearming_delayed_work(&defense_work); + cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header);