2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level;
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
82 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
83 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
95 static void update_defense_level(struct netns_ipvs *ipvs)
98 static int old_secure_tcp = 0;
103 /* we only count free and buffered memory (in pages) */
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
111 nomem = (availmem < ipvs->sysctl_amemthresh);
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
119 atomic_set(&ipvs->dropentry, 0);
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
126 atomic_set(&ipvs->dropentry, 0);
131 atomic_set(&ipvs->dropentry, 1);
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
138 atomic_set(&ipvs->dropentry, 1);
141 spin_unlock(&ipvs->dropentry_lock);
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
166 ipvs->sysctl_drop_packet = 1;
170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
173 spin_unlock(&ipvs->droppacket_lock);
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
179 if (old_secure_tcp >= 2)
184 if (old_secure_tcp < 2)
186 ipvs->sysctl_secure_tcp = 2;
188 if (old_secure_tcp >= 2)
194 if (old_secure_tcp < 2)
197 if (old_secure_tcp >= 2)
199 ipvs->sysctl_secure_tcp = 1;
203 if (old_secure_tcp < 2)
207 old_secure_tcp = ipvs->sysctl_secure_tcp;
209 ip_vs_protocol_timeout_change(ipvs,
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
218 * Timer for checking the defense
220 #define DEFENSE_TIMER_PERIOD 1*HZ
222 static void defense_work_handler(struct work_struct *work)
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
235 ip_vs_use_count_inc(void)
237 return try_module_get(THIS_MODULE);
241 ip_vs_use_count_dec(void)
243 module_put(THIS_MODULE);
248 * Hash table: for virtual service lookups
250 #define IP_VS_SVC_TAB_BITS 8
251 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254 /* the service table hashed by <protocol, addr, port> */
255 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256 /* the service table hashed by fwmark */
257 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
261 * Returns hash value for virtual service
263 static inline unsigned
264 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
265 const union nf_inet_addr *addr, __be16 port)
267 register unsigned porth = ntohs(port);
268 __be32 addr_fold = addr->ip;
270 #ifdef CONFIG_IP_VS_IPV6
272 addr_fold = addr->ip6[0]^addr->ip6[1]^
273 addr->ip6[2]^addr->ip6[3];
275 addr_fold ^= ((size_t)net>>8);
277 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
278 & IP_VS_SVC_TAB_MASK;
282 * Returns hash value of fwmark for virtual service lookup
284 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
286 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
290 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
291 * or in the ip_vs_svc_fwm_table by fwmark.
292 * Should be called with locked tables.
294 static int ip_vs_svc_hash(struct ip_vs_service *svc)
298 if (svc->flags & IP_VS_SVC_F_HASHED) {
299 pr_err("%s(): request for already hashed, called from %pF\n",
300 __func__, __builtin_return_address(0));
304 if (svc->fwmark == 0) {
306 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
308 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
309 &svc->addr, svc->port);
310 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
313 * Hash it by fwmark in svc_fwm_table
315 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
316 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
319 svc->flags |= IP_VS_SVC_F_HASHED;
320 /* increase its refcnt because it is referenced by the svc table */
321 atomic_inc(&svc->refcnt);
327 * Unhashes a service from svc_table / svc_fwm_table.
328 * Should be called with locked tables.
330 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
332 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
333 pr_err("%s(): request for unhash flagged, called from %pF\n",
334 __func__, __builtin_return_address(0));
338 if (svc->fwmark == 0) {
339 /* Remove it from the svc_table table */
340 list_del(&svc->s_list);
342 /* Remove it from the svc_fwm_table table */
343 list_del(&svc->f_list);
346 svc->flags &= ~IP_VS_SVC_F_HASHED;
347 atomic_dec(&svc->refcnt);
353 * Get service by {netns, proto,addr,port} in the service table.
355 static inline struct ip_vs_service *
356 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
357 const union nf_inet_addr *vaddr, __be16 vport)
360 struct ip_vs_service *svc;
362 /* Check for "full" addressed entries */
363 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
365 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
368 && (svc->port == vport)
369 && (svc->protocol == protocol)
370 && net_eq(svc->net, net)) {
381 * Get service by {fwmark} in the service table.
383 static inline struct ip_vs_service *
384 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
387 struct ip_vs_service *svc;
389 /* Check for fwmark addressed entries */
390 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
393 if (svc->fwmark == fwmark && svc->af == af
394 && net_eq(svc->net, net)) {
403 struct ip_vs_service *
404 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
405 const union nf_inet_addr *vaddr, __be16 vport)
407 struct ip_vs_service *svc;
408 struct netns_ipvs *ipvs = net_ipvs(net);
410 read_lock(&__ip_vs_svc_lock);
413 * Check the table hashed by fwmark first
416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 * Check the table hashed by <protocol,addr,port>
423 * for "full" addressed entries
425 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
428 && protocol == IPPROTO_TCP
429 && atomic_read(&ipvs->ftpsvc_counter)
430 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
432 * Check if ftp service entry exists, the packet
433 * might belong to FTP data connections.
435 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
439 && atomic_read(&ipvs->nullsvc_counter)) {
441 * Check if the catch-all port (port zero) exists
443 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
448 atomic_inc(&svc->usecnt);
449 read_unlock(&__ip_vs_svc_lock);
451 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
452 fwmark, ip_vs_proto_name(protocol),
453 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
454 svc ? "hit" : "not hit");
461 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463 atomic_inc(&svc->refcnt);
468 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
470 struct ip_vs_service *svc = dest->svc;
473 if (atomic_dec_and_test(&svc->refcnt)) {
474 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
476 IP_VS_DBG_ADDR(svc->af, &svc->addr),
477 ntohs(svc->port), atomic_read(&svc->usecnt));
478 free_percpu(svc->stats.cpustats);
485 * Returns hash value for real service
487 static inline unsigned ip_vs_rs_hashkey(int af,
488 const union nf_inet_addr *addr,
491 register unsigned porth = ntohs(port);
492 __be32 addr_fold = addr->ip;
494 #ifdef CONFIG_IP_VS_IPV6
496 addr_fold = addr->ip6[0]^addr->ip6[1]^
497 addr->ip6[2]^addr->ip6[3];
500 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
505 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
506 * should be called with locked tables.
508 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
512 if (!list_empty(&dest->d_list)) {
517 * Hash by proto,addr,port,
518 * which are the parameters of the real service.
520 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
522 list_add(&dest->d_list, &ipvs->rs_table[hash]);
528 * UNhashes ip_vs_dest from rs_table.
529 * should be called with locked tables.
531 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
534 * Remove it from the rs_table table.
536 if (!list_empty(&dest->d_list)) {
537 list_del(&dest->d_list);
538 INIT_LIST_HEAD(&dest->d_list);
545 * Lookup real service by <proto,addr,port> in the real service table.
548 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
549 const union nf_inet_addr *daddr,
552 struct netns_ipvs *ipvs = net_ipvs(net);
554 struct ip_vs_dest *dest;
557 * Check for "full" addressed entries
558 * Return the first found entry
560 hash = ip_vs_rs_hashkey(af, daddr, dport);
562 read_lock(&ipvs->rs_lock);
563 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
565 && ip_vs_addr_equal(af, &dest->addr, daddr)
566 && (dest->port == dport)
567 && ((dest->protocol == protocol) ||
570 read_unlock(&ipvs->rs_lock);
574 read_unlock(&ipvs->rs_lock);
580 * Lookup destination by {addr,port} in the given service
582 static struct ip_vs_dest *
583 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
586 struct ip_vs_dest *dest;
589 * Find the destination for the given service
591 list_for_each_entry(dest, &svc->destinations, n_list) {
592 if ((dest->af == svc->af)
593 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
594 && (dest->port == dport)) {
604 * Find destination by {daddr,dport,vaddr,protocol}
605 * Cretaed to be used in ip_vs_process_message() in
606 * the backup synchronization daemon. It finds the
607 * destination to be bound to the received connection
610 * ip_vs_lookup_real_service() looked promissing, but
611 * seems not working as expected.
613 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
614 const union nf_inet_addr *daddr,
616 const union nf_inet_addr *vaddr,
617 __be16 vport, __u16 protocol, __u32 fwmark)
619 struct ip_vs_dest *dest;
620 struct ip_vs_service *svc;
622 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
625 dest = ip_vs_lookup_dest(svc, daddr, dport);
627 atomic_inc(&dest->refcnt);
628 ip_vs_service_put(svc);
633 * Lookup dest by {svc,addr,port} in the destination trash.
634 * The destination trash is used to hold the destinations that are removed
635 * from the service table but are still referenced by some conn entries.
636 * The reason to add the destination trash is when the dest is temporary
637 * down (either by administrator or by monitor program), the dest can be
638 * picked back from the trash, the remaining connections to the dest can
639 * continue, and the counting information of the dest is also useful for
642 static struct ip_vs_dest *
643 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
646 struct ip_vs_dest *dest, *nxt;
647 struct netns_ipvs *ipvs = net_ipvs(svc->net);
650 * Find the destination in trash
652 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
653 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
656 IP_VS_DBG_ADDR(svc->af, &dest->addr),
658 atomic_read(&dest->refcnt));
659 if (dest->af == svc->af &&
660 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
661 dest->port == dport &&
662 dest->vfwmark == svc->fwmark &&
663 dest->protocol == svc->protocol &&
665 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
666 dest->vport == svc->port))) {
672 * Try to purge the destination from trash if not referenced
674 if (atomic_read(&dest->refcnt) == 1) {
675 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
678 IP_VS_DBG_ADDR(svc->af, &dest->addr),
680 list_del(&dest->n_list);
681 ip_vs_dst_reset(dest);
682 __ip_vs_unbind_svc(dest);
683 free_percpu(dest->stats.cpustats);
693 * Clean up all the destinations in the trash
694 * Called by the ip_vs_control_cleanup()
696 * When the ip_vs_control_clearup is activated by ipvs module exit,
697 * the service tables must have been flushed and all the connections
698 * are expired, and the refcnt of each destination in the trash must
699 * be 1, so we simply release them here.
701 static void ip_vs_trash_cleanup(struct net *net)
703 struct ip_vs_dest *dest, *nxt;
704 struct netns_ipvs *ipvs = net_ipvs(net);
706 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
707 list_del(&dest->n_list);
708 ip_vs_dst_reset(dest);
709 __ip_vs_unbind_svc(dest);
710 free_percpu(dest->stats.cpustats);
716 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
718 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
720 spin_lock_bh(&src->lock);
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
728 ip_vs_read_estimator(dst, src);
730 spin_unlock_bh(&src->lock);
734 ip_vs_zero_stats(struct ip_vs_stats *stats)
736 spin_lock_bh(&stats->lock);
738 /* get current counters as zero point, rates are zeroed */
740 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
748 ip_vs_zero_estimator(stats);
750 spin_unlock_bh(&stats->lock);
754 * Update a destination in the given service
757 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
758 struct ip_vs_dest_user_kern *udest, int add)
760 struct netns_ipvs *ipvs = net_ipvs(svc->net);
763 /* set the weight and the flags */
764 atomic_set(&dest->weight, udest->weight);
765 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
766 conn_flags |= IP_VS_CONN_F_INACTIVE;
768 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
769 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
770 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
773 * Put the real service in rs_table if not present.
774 * For now only for NAT!
776 write_lock_bh(&ipvs->rs_lock);
777 ip_vs_rs_hash(ipvs, dest);
778 write_unlock_bh(&ipvs->rs_lock);
780 atomic_set(&dest->conn_flags, conn_flags);
782 /* bind the service */
784 __ip_vs_bind_svc(dest, svc);
786 if (dest->svc != svc) {
787 __ip_vs_unbind_svc(dest);
788 ip_vs_zero_stats(&dest->stats);
789 __ip_vs_bind_svc(dest, svc);
793 /* set the dest status flags */
794 dest->flags |= IP_VS_DEST_F_AVAILABLE;
796 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
797 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
798 dest->u_threshold = udest->u_threshold;
799 dest->l_threshold = udest->l_threshold;
801 spin_lock_bh(&dest->dst_lock);
802 ip_vs_dst_reset(dest);
803 spin_unlock_bh(&dest->dst_lock);
806 ip_vs_start_estimator(svc->net, &dest->stats);
808 write_lock_bh(&__ip_vs_svc_lock);
810 /* Wait until all other svc users go away */
811 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
814 list_add(&dest->n_list, &svc->destinations);
818 /* call the update_service, because server weight may be changed */
819 if (svc->scheduler->update_service)
820 svc->scheduler->update_service(svc);
822 write_unlock_bh(&__ip_vs_svc_lock);
827 * Create a destination for the given service
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831 struct ip_vs_dest **dest_p)
833 struct ip_vs_dest *dest;
838 #ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
843 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
848 atype = inet_addr_type(svc->net, udest->addr.ip);
849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
855 pr_err("%s(): no memory.\n", __func__);
858 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859 if (!dest->stats.cpustats) {
860 pr_err("%s() alloc_percpu failed\n", __func__);
865 dest->protocol = svc->protocol;
866 dest->vaddr = svc->addr;
867 dest->vport = svc->port;
868 dest->vfwmark = svc->fwmark;
869 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
870 dest->port = udest->port;
872 atomic_set(&dest->activeconns, 0);
873 atomic_set(&dest->inactconns, 0);
874 atomic_set(&dest->persistconns, 0);
875 atomic_set(&dest->refcnt, 1);
877 INIT_LIST_HEAD(&dest->d_list);
878 spin_lock_init(&dest->dst_lock);
879 spin_lock_init(&dest->stats.lock);
880 __ip_vs_update_dest(svc, dest, udest, 1);
894 * Add a destination into an existing service
897 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
899 struct ip_vs_dest *dest;
900 union nf_inet_addr daddr;
901 __be16 dport = udest->port;
906 if (udest->weight < 0) {
907 pr_err("%s(): server weight less than zero\n", __func__);
911 if (udest->l_threshold > udest->u_threshold) {
912 pr_err("%s(): lower threshold is higher than upper threshold\n",
917 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
920 * Check if the dest already exists in the list
922 dest = ip_vs_lookup_dest(svc, &daddr, dport);
925 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
930 * Check if the dest already exists in the trash and
931 * is from the same service
933 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
936 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
937 "dest->refcnt=%d, service %u/%s:%u\n",
938 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
939 atomic_read(&dest->refcnt),
941 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
945 * Get the destination from the trash
947 list_del(&dest->n_list);
949 __ip_vs_update_dest(svc, dest, udest, 1);
953 * Allocate and initialize the dest structure
955 ret = ip_vs_new_dest(svc, udest, &dest);
964 * Edit a destination in the given service
967 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
969 struct ip_vs_dest *dest;
970 union nf_inet_addr daddr;
971 __be16 dport = udest->port;
975 if (udest->weight < 0) {
976 pr_err("%s(): server weight less than zero\n", __func__);
980 if (udest->l_threshold > udest->u_threshold) {
981 pr_err("%s(): lower threshold is higher than upper threshold\n",
986 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
989 * Lookup the destination list
991 dest = ip_vs_lookup_dest(svc, &daddr, dport);
994 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
998 __ip_vs_update_dest(svc, dest, udest, 0);
1006 * Delete a destination (must be already unlinked from the service)
1008 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1010 struct netns_ipvs *ipvs = net_ipvs(net);
1012 ip_vs_stop_estimator(net, &dest->stats);
1015 * Remove it from the d-linked list with the real services.
1017 write_lock_bh(&ipvs->rs_lock);
1018 ip_vs_rs_unhash(dest);
1019 write_unlock_bh(&ipvs->rs_lock);
1022 * Decrease the refcnt of the dest, and free the dest
1023 * if nobody refers to it (refcnt=0). Otherwise, throw
1024 * the destination into the trash.
1026 if (atomic_dec_and_test(&dest->refcnt)) {
1027 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031 ip_vs_dst_reset(dest);
1032 /* simply decrease svc->refcnt here, let the caller check
1033 and release the service if nobody refers to it.
1034 Only user context can release destination and service,
1035 and only one user context can update virtual service at a
1036 time, so the operation here is OK */
1037 atomic_dec(&dest->svc->refcnt);
1038 free_percpu(dest->stats.cpustats);
1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 atomic_read(&dest->refcnt));
1046 list_add(&dest->n_list, &ipvs->dest_trash);
1047 atomic_inc(&dest->refcnt);
1053 * Unlink a destination from the given service
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062 * Remove it from the d-linked destination list.
1064 list_del(&dest->n_list);
1068 * Call the update_service function of its scheduler
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
1076 * Delete a destination server in the given service
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1081 struct ip_vs_dest *dest;
1082 __be16 dport = udest->port;
1086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1093 write_lock_bh(&__ip_vs_svc_lock);
1096 * Wait until all other svc users go away.
1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1101 * Unlink dest from the service
1103 __ip_vs_unlink_dest(svc, dest, 1);
1105 write_unlock_bh(&__ip_vs_svc_lock);
1108 * Delete the destination
1110 __ip_vs_del_dest(svc->net, dest);
1119 * Add a service into the service hash table
1122 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1123 struct ip_vs_service **svc_p)
1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1128 struct ip_vs_service *svc = NULL;
1129 struct netns_ipvs *ipvs = net_ipvs(net);
1131 /* increase the module use count */
1132 ip_vs_use_count_inc();
1134 /* Lookup the scheduler by 'u->sched_name' */
1135 sched = ip_vs_scheduler_get(u->sched_name);
1136 if (sched == NULL) {
1137 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1142 if (u->pe_name && *u->pe_name) {
1143 pe = ip_vs_pe_getbyname(u->pe_name);
1145 pr_info("persistence engine module ip_vs_pe_%s "
1146 "not found\n", u->pe_name);
1152 #ifdef CONFIG_IP_VS_IPV6
1153 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1159 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1161 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1165 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166 if (!svc->stats.cpustats) {
1167 pr_err("%s() alloc_percpu failed\n", __func__);
1171 /* I'm the first user of the service */
1172 atomic_set(&svc->usecnt, 0);
1173 atomic_set(&svc->refcnt, 0);
1176 svc->protocol = u->protocol;
1177 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1178 svc->port = u->port;
1179 svc->fwmark = u->fwmark;
1180 svc->flags = u->flags;
1181 svc->timeout = u->timeout * HZ;
1182 svc->netmask = u->netmask;
1185 INIT_LIST_HEAD(&svc->destinations);
1186 rwlock_init(&svc->sched_lock);
1187 spin_lock_init(&svc->stats.lock);
1189 /* Bind the scheduler */
1190 ret = ip_vs_bind_scheduler(svc, sched);
1195 /* Bind the ct retriever */
1196 ip_vs_bind_pe(svc, pe);
1199 /* Update the virtual service counters */
1200 if (svc->port == FTPPORT)
1201 atomic_inc(&ipvs->ftpsvc_counter);
1202 else if (svc->port == 0)
1203 atomic_inc(&ipvs->nullsvc_counter);
1205 ip_vs_start_estimator(net, &svc->stats);
1207 /* Count only IPv4 services for old get/setsockopt interface */
1208 if (svc->af == AF_INET)
1209 ipvs->num_services++;
1211 /* Hash the service into the service table */
1212 write_lock_bh(&__ip_vs_svc_lock);
1213 ip_vs_svc_hash(svc);
1214 write_unlock_bh(&__ip_vs_svc_lock);
1222 ip_vs_unbind_scheduler(svc);
1225 ip_vs_app_inc_put(svc->inc);
1228 if (svc->stats.cpustats)
1229 free_percpu(svc->stats.cpustats);
1232 ip_vs_scheduler_put(sched);
1235 /* decrease the module use count */
1236 ip_vs_use_count_dec();
1243 * Edit a service and bind it with a new scheduler
1246 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1248 struct ip_vs_scheduler *sched, *old_sched;
1249 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1253 * Lookup the scheduler, by 'u->sched_name'
1255 sched = ip_vs_scheduler_get(u->sched_name);
1256 if (sched == NULL) {
1257 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1262 if (u->pe_name && *u->pe_name) {
1263 pe = ip_vs_pe_getbyname(u->pe_name);
1265 pr_info("persistence engine module ip_vs_pe_%s "
1266 "not found\n", u->pe_name);
1273 #ifdef CONFIG_IP_VS_IPV6
1274 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1280 write_lock_bh(&__ip_vs_svc_lock);
1283 * Wait until all other svc users go away.
1285 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1288 * Set the flags and timeout value
1290 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291 svc->timeout = u->timeout * HZ;
1292 svc->netmask = u->netmask;
1294 old_sched = svc->scheduler;
1295 if (sched != old_sched) {
1297 * Unbind the old scheduler
1299 if ((ret = ip_vs_unbind_scheduler(svc))) {
1305 * Bind the new scheduler
1307 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1309 * If ip_vs_bind_scheduler fails, restore the old
1311 * The main reason of failure is out of memory.
1313 * The question is if the old scheduler can be
1314 * restored all the time. TODO: if it cannot be
1315 * restored some time, we must delete the service,
1316 * otherwise the system may crash.
1318 ip_vs_bind_scheduler(svc, old_sched);
1326 ip_vs_unbind_pe(svc);
1327 ip_vs_bind_pe(svc, pe);
1331 write_unlock_bh(&__ip_vs_svc_lock);
1333 ip_vs_scheduler_put(old_sched);
1334 ip_vs_pe_put(old_pe);
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1344 static void __ip_vs_del_service(struct ip_vs_service *svc)
1346 struct ip_vs_dest *dest, *nxt;
1347 struct ip_vs_scheduler *old_sched;
1348 struct ip_vs_pe *old_pe;
1349 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1351 pr_info("%s: enter\n", __func__);
1353 /* Count only IPv4 services for old get/setsockopt interface */
1354 if (svc->af == AF_INET)
1355 ipvs->num_services--;
1357 ip_vs_stop_estimator(svc->net, &svc->stats);
1359 /* Unbind scheduler */
1360 old_sched = svc->scheduler;
1361 ip_vs_unbind_scheduler(svc);
1362 ip_vs_scheduler_put(old_sched);
1364 /* Unbind persistence engine */
1366 ip_vs_unbind_pe(svc);
1367 ip_vs_pe_put(old_pe);
1369 /* Unbind app inc */
1371 ip_vs_app_inc_put(svc->inc);
1376 * Unlink the whole destination list
1378 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1379 __ip_vs_unlink_dest(svc, dest, 0);
1380 __ip_vs_del_dest(svc->net, dest);
1384 * Update the virtual service counters
1386 if (svc->port == FTPPORT)
1387 atomic_dec(&ipvs->ftpsvc_counter);
1388 else if (svc->port == 0)
1389 atomic_dec(&ipvs->nullsvc_counter);
1392 * Free the service if nobody refers to it
1394 if (atomic_read(&svc->refcnt) == 0) {
1395 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1397 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1398 ntohs(svc->port), atomic_read(&svc->usecnt));
1399 free_percpu(svc->stats.cpustats);
1403 /* decrease the module use count */
1404 ip_vs_use_count_dec();
1408 * Unlink a service from list and try to delete it if its refcnt reached 0
1410 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1413 * Unhash it from the service table
1415 write_lock_bh(&__ip_vs_svc_lock);
1417 ip_vs_svc_unhash(svc);
1420 * Wait until all the svc users go away.
1422 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1424 __ip_vs_del_service(svc);
1426 write_unlock_bh(&__ip_vs_svc_lock);
1430 * Delete a service from the service list
1432 static int ip_vs_del_service(struct ip_vs_service *svc)
1436 ip_vs_unlink_service(svc);
1443 * Flush all the virtual services
1445 static int ip_vs_flush(struct net *net)
1448 struct ip_vs_service *svc, *nxt;
1451 * Flush the service table hashed by <netns,protocol,addr,port>
1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1454 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1456 if (net_eq(svc->net, net))
1457 ip_vs_unlink_service(svc);
1462 * Flush the service table hashed by fwmark
1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465 list_for_each_entry_safe(svc, nxt,
1466 &ip_vs_svc_fwm_table[idx], f_list) {
1467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
1477 * Zero counters in a service or all services
1479 static int ip_vs_zero_service(struct ip_vs_service *svc)
1481 struct ip_vs_dest *dest;
1483 write_lock_bh(&__ip_vs_svc_lock);
1484 list_for_each_entry(dest, &svc->destinations, n_list) {
1485 ip_vs_zero_stats(&dest->stats);
1487 ip_vs_zero_stats(&svc->stats);
1488 write_unlock_bh(&__ip_vs_svc_lock);
1492 static int ip_vs_zero_all(struct net *net)
1495 struct ip_vs_service *svc;
1497 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1498 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1499 if (net_eq(svc->net, net))
1500 ip_vs_zero_service(svc);
1504 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1505 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1506 if (net_eq(svc->net, net))
1507 ip_vs_zero_service(svc);
1511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1515 #ifdef CONFIG_SYSCTL
1517 proc_do_defense_mode(ctl_table *table, int write,
1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1520 struct net *net = current->nsproxy->net_ns;
1521 int *valp = table->data;
1525 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1531 update_defense_level(net_ipvs(net));
1538 proc_do_sync_threshold(ctl_table *table, int write,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1541 int *valp = table->data;
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1557 proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1560 int *valp = table->data;
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
1583 static struct ctl_table vs_vars[] = {
1585 .procname = "amemthresh",
1586 .maxlen = sizeof(int),
1588 .proc_handler = proc_dointvec,
1591 .procname = "am_droprate",
1592 .maxlen = sizeof(int),
1594 .proc_handler = proc_dointvec,
1597 .procname = "drop_entry",
1598 .maxlen = sizeof(int),
1600 .proc_handler = proc_do_defense_mode,
1603 .procname = "drop_packet",
1604 .maxlen = sizeof(int),
1606 .proc_handler = proc_do_defense_mode,
1608 #ifdef CONFIG_IP_VS_NFCT
1610 .procname = "conntrack",
1611 .maxlen = sizeof(int),
1613 .proc_handler = &proc_dointvec,
1617 .procname = "secure_tcp",
1618 .maxlen = sizeof(int),
1620 .proc_handler = proc_do_defense_mode,
1623 .procname = "snat_reroute",
1624 .maxlen = sizeof(int),
1626 .proc_handler = &proc_dointvec,
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1632 .proc_handler = &proc_do_sync_mode,
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1638 .proc_handler = proc_dointvec,
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1644 .proc_handler = proc_dointvec,
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1650 .proc_handler = proc_dointvec,
1653 .procname = "sync_threshold",
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1657 .proc_handler = proc_do_sync_threshold,
1660 .procname = "nat_icmp_send",
1661 .maxlen = sizeof(int),
1663 .proc_handler = proc_dointvec,
1665 #ifdef CONFIG_IP_VS_DEBUG
1667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1671 .proc_handler = proc_dointvec,
1676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1680 .proc_handler = proc_dointvec_jiffies,
1683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1687 .proc_handler = proc_dointvec_jiffies,
1690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1694 .proc_handler = proc_dointvec_jiffies,
1697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1701 .proc_handler = proc_dointvec_jiffies,
1704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1708 .proc_handler = proc_dointvec_jiffies,
1711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1715 .proc_handler = proc_dointvec_jiffies,
1718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec_jiffies,
1725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1729 .proc_handler = proc_dointvec_jiffies,
1732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1736 .proc_handler = proc_dointvec_jiffies,
1739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1743 .proc_handler = proc_dointvec_jiffies,
1746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1750 .proc_handler = proc_dointvec_jiffies,
1753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1757 .proc_handler = proc_dointvec_jiffies,
1763 const struct ctl_path net_vs_ctl_path[] = {
1764 { .procname = "net", },
1765 { .procname = "ipv4", },
1766 { .procname = "vs", },
1769 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1772 #ifdef CONFIG_PROC_FS
1775 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1776 struct list_head *table;
1781 * Write the contents of the VS rule table to a PROCfs file.
1782 * (It is kept just for backward compatibility)
1784 static inline const char *ip_vs_fwd_name(unsigned flags)
1786 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1787 case IP_VS_CONN_F_LOCALNODE:
1789 case IP_VS_CONN_F_TUNNEL:
1791 case IP_VS_CONN_F_DROUTE:
1799 /* Get the Nth entry in the two lists */
1800 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1802 struct net *net = seq_file_net(seq);
1803 struct ip_vs_iter *iter = seq->private;
1805 struct ip_vs_service *svc;
1807 /* look in hash by protocol */
1808 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1810 if (net_eq(svc->net, net) && pos-- == 0) {
1811 iter->table = ip_vs_svc_table;
1818 /* keep looking in fwmark */
1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1821 if (net_eq(svc->net, net) && pos-- == 0) {
1822 iter->table = ip_vs_svc_fwm_table;
1832 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1833 __acquires(__ip_vs_svc_lock)
1836 read_lock_bh(&__ip_vs_svc_lock);
1837 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1841 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 struct list_head *e;
1844 struct ip_vs_iter *iter;
1845 struct ip_vs_service *svc;
1848 if (v == SEQ_START_TOKEN)
1849 return ip_vs_info_array(seq,0);
1852 iter = seq->private;
1854 if (iter->table == ip_vs_svc_table) {
1855 /* next service in table hashed by protocol */
1856 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1857 return list_entry(e, struct ip_vs_service, s_list);
1860 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1861 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1867 iter->table = ip_vs_svc_fwm_table;
1872 /* next service in hashed by fwmark */
1873 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1874 return list_entry(e, struct ip_vs_service, f_list);
1877 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1878 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1886 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1887 __releases(__ip_vs_svc_lock)
1889 read_unlock_bh(&__ip_vs_svc_lock);
1893 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1895 if (v == SEQ_START_TOKEN) {
1897 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1898 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1900 "Prot LocalAddress:Port Scheduler Flags\n");
1902 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1904 const struct ip_vs_service *svc = v;
1905 const struct ip_vs_iter *iter = seq->private;
1906 const struct ip_vs_dest *dest;
1908 if (iter->table == ip_vs_svc_table) {
1909 #ifdef CONFIG_IP_VS_IPV6
1910 if (svc->af == AF_INET6)
1911 seq_printf(seq, "%s [%pI6]:%04X %s ",
1912 ip_vs_proto_name(svc->protocol),
1915 svc->scheduler->name);
1918 seq_printf(seq, "%s %08X:%04X %s %s ",
1919 ip_vs_proto_name(svc->protocol),
1920 ntohl(svc->addr.ip),
1922 svc->scheduler->name,
1923 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1925 seq_printf(seq, "FWM %08X %s %s",
1926 svc->fwmark, svc->scheduler->name,
1927 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1930 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1931 seq_printf(seq, "persistent %d %08X\n",
1933 ntohl(svc->netmask));
1935 seq_putc(seq, '\n');
1937 list_for_each_entry(dest, &svc->destinations, n_list) {
1938 #ifdef CONFIG_IP_VS_IPV6
1939 if (dest->af == AF_INET6)
1942 " %-7s %-6d %-10d %-10d\n",
1945 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946 atomic_read(&dest->weight),
1947 atomic_read(&dest->activeconns),
1948 atomic_read(&dest->inactconns));
1953 "%-7s %-6d %-10d %-10d\n",
1954 ntohl(dest->addr.ip),
1956 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957 atomic_read(&dest->weight),
1958 atomic_read(&dest->activeconns),
1959 atomic_read(&dest->inactconns));
1966 static const struct seq_operations ip_vs_info_seq_ops = {
1967 .start = ip_vs_info_seq_start,
1968 .next = ip_vs_info_seq_next,
1969 .stop = ip_vs_info_seq_stop,
1970 .show = ip_vs_info_seq_show,
1973 static int ip_vs_info_open(struct inode *inode, struct file *file)
1975 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1976 sizeof(struct ip_vs_iter));
1979 static const struct file_operations ip_vs_info_fops = {
1980 .owner = THIS_MODULE,
1981 .open = ip_vs_info_open,
1983 .llseek = seq_lseek,
1984 .release = seq_release_private,
1989 #ifdef CONFIG_PROC_FS
1990 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1992 struct net *net = seq_file_single_net(seq);
1993 struct ip_vs_stats_user show;
1995 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1997 " Total Incoming Outgoing Incoming Outgoing\n");
1999 " Conns Packets Packets Bytes Bytes\n");
2001 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2002 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2003 show.inpkts, show.outpkts,
2004 (unsigned long long) show.inbytes,
2005 (unsigned long long) show.outbytes);
2007 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2009 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2010 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2011 show.cps, show.inpps, show.outpps,
2012 show.inbps, show.outbps);
2017 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2019 return single_open_net(inode, file, ip_vs_stats_show);
2022 static const struct file_operations ip_vs_stats_fops = {
2023 .owner = THIS_MODULE,
2024 .open = ip_vs_stats_seq_open,
2026 .llseek = seq_lseek,
2027 .release = single_release,
2030 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2032 struct net *net = seq_file_single_net(seq);
2033 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2035 struct ip_vs_stats_user rates;
2038 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2040 " Total Incoming Outgoing Incoming Outgoing\n");
2042 "CPU Conns Packets Packets Bytes Bytes\n");
2044 for_each_possible_cpu(i) {
2045 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2047 __u64 inbytes, outbytes;
2050 start = u64_stats_fetch_begin_bh(&u->syncp);
2051 inbytes = u->ustats.inbytes;
2052 outbytes = u->ustats.outbytes;
2053 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2055 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2056 i, u->ustats.conns, u->ustats.inpkts,
2057 u->ustats.outpkts, (__u64)inbytes,
2061 spin_lock_bh(&tot_stats->lock);
2063 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2064 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2065 tot_stats->ustats.outpkts,
2066 (unsigned long long) tot_stats->ustats.inbytes,
2067 (unsigned long long) tot_stats->ustats.outbytes);
2069 ip_vs_read_estimator(&rates, tot_stats);
2071 spin_unlock_bh(&tot_stats->lock);
2073 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2075 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2076 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2086 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2088 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2091 static const struct file_operations ip_vs_stats_percpu_fops = {
2092 .owner = THIS_MODULE,
2093 .open = ip_vs_stats_percpu_seq_open,
2095 .llseek = seq_lseek,
2096 .release = single_release,
2101 * Set timeout values for tcp tcpfin udp in the timeout_table.
2103 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2105 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2106 struct ip_vs_proto_data *pd;
2109 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2114 #ifdef CONFIG_IP_VS_PROTO_TCP
2115 if (u->tcp_timeout) {
2116 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2117 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2118 = u->tcp_timeout * HZ;
2121 if (u->tcp_fin_timeout) {
2122 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2124 = u->tcp_fin_timeout * HZ;
2128 #ifdef CONFIG_IP_VS_PROTO_UDP
2129 if (u->udp_timeout) {
2130 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2131 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2132 = u->udp_timeout * HZ;
2139 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2140 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2141 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2142 sizeof(struct ip_vs_dest_user))
2143 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2144 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2145 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2147 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2148 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2149 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2151 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2152 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2158 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2161 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2162 struct ip_vs_service_user *usvc_compat)
2164 memset(usvc, 0, sizeof(*usvc));
2167 usvc->protocol = usvc_compat->protocol;
2168 usvc->addr.ip = usvc_compat->addr;
2169 usvc->port = usvc_compat->port;
2170 usvc->fwmark = usvc_compat->fwmark;
2172 /* Deep copy of sched_name is not needed here */
2173 usvc->sched_name = usvc_compat->sched_name;
2175 usvc->flags = usvc_compat->flags;
2176 usvc->timeout = usvc_compat->timeout;
2177 usvc->netmask = usvc_compat->netmask;
2180 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2181 struct ip_vs_dest_user *udest_compat)
2183 memset(udest, 0, sizeof(*udest));
2185 udest->addr.ip = udest_compat->addr;
2186 udest->port = udest_compat->port;
2187 udest->conn_flags = udest_compat->conn_flags;
2188 udest->weight = udest_compat->weight;
2189 udest->u_threshold = udest_compat->u_threshold;
2190 udest->l_threshold = udest_compat->l_threshold;
2194 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2196 struct net *net = sock_net(sk);
2198 unsigned char arg[MAX_ARG_LEN];
2199 struct ip_vs_service_user *usvc_compat;
2200 struct ip_vs_service_user_kern usvc;
2201 struct ip_vs_service *svc;
2202 struct ip_vs_dest_user *udest_compat;
2203 struct ip_vs_dest_user_kern udest;
2205 if (!capable(CAP_NET_ADMIN))
2208 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2210 if (len < 0 || len > MAX_ARG_LEN)
2212 if (len != set_arglen[SET_CMDID(cmd)]) {
2213 pr_err("set_ctl: len %u != %u\n",
2214 len, set_arglen[SET_CMDID(cmd)]);
2218 if (copy_from_user(arg, user, len) != 0)
2221 /* increase the module use count */
2222 ip_vs_use_count_inc();
2224 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2229 if (cmd == IP_VS_SO_SET_FLUSH) {
2230 /* Flush the virtual service */
2231 ret = ip_vs_flush(net);
2233 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2234 /* Set timeout values for (tcp tcpfin udp) */
2235 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2237 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2238 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2239 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2242 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2243 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2244 ret = stop_sync_thread(net, dm->state);
2248 usvc_compat = (struct ip_vs_service_user *)arg;
2249 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2251 /* We only use the new structs internally, so copy userspace compat
2252 * structs to extended internal versions */
2253 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2254 ip_vs_copy_udest_compat(&udest, udest_compat);
2256 if (cmd == IP_VS_SO_SET_ZERO) {
2257 /* if no service address is set, zero counters in all */
2258 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2259 ret = ip_vs_zero_all(net);
2264 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2265 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2266 usvc.protocol != IPPROTO_SCTP) {
2267 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2268 usvc.protocol, &usvc.addr.ip,
2269 ntohs(usvc.port), usvc.sched_name);
2274 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2275 if (usvc.fwmark == 0)
2276 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2277 &usvc.addr, usvc.port);
2279 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2281 if (cmd != IP_VS_SO_SET_ADD
2282 && (svc == NULL || svc->protocol != usvc.protocol)) {
2288 case IP_VS_SO_SET_ADD:
2292 ret = ip_vs_add_service(net, &usvc, &svc);
2294 case IP_VS_SO_SET_EDIT:
2295 ret = ip_vs_edit_service(svc, &usvc);
2297 case IP_VS_SO_SET_DEL:
2298 ret = ip_vs_del_service(svc);
2302 case IP_VS_SO_SET_ZERO:
2303 ret = ip_vs_zero_service(svc);
2305 case IP_VS_SO_SET_ADDDEST:
2306 ret = ip_vs_add_dest(svc, &udest);
2308 case IP_VS_SO_SET_EDITDEST:
2309 ret = ip_vs_edit_dest(svc, &udest);
2311 case IP_VS_SO_SET_DELDEST:
2312 ret = ip_vs_del_dest(svc, &udest);
2319 mutex_unlock(&__ip_vs_mutex);
2321 /* decrease the module use count */
2322 ip_vs_use_count_dec();
2329 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2331 dst->protocol = src->protocol;
2332 dst->addr = src->addr.ip;
2333 dst->port = src->port;
2334 dst->fwmark = src->fwmark;
2335 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2336 dst->flags = src->flags;
2337 dst->timeout = src->timeout / HZ;
2338 dst->netmask = src->netmask;
2339 dst->num_dests = src->num_dests;
2340 ip_vs_copy_stats(&dst->stats, &src->stats);
2344 __ip_vs_get_service_entries(struct net *net,
2345 const struct ip_vs_get_services *get,
2346 struct ip_vs_get_services __user *uptr)
2349 struct ip_vs_service *svc;
2350 struct ip_vs_service_entry entry;
2353 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2354 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2355 /* Only expose IPv4 entries to old interface */
2356 if (svc->af != AF_INET || !net_eq(svc->net, net))
2359 if (count >= get->num_services)
2361 memset(&entry, 0, sizeof(entry));
2362 ip_vs_copy_service(&entry, svc);
2363 if (copy_to_user(&uptr->entrytable[count],
2364 &entry, sizeof(entry))) {
2372 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2373 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2374 /* Only expose IPv4 entries to old interface */
2375 if (svc->af != AF_INET || !net_eq(svc->net, net))
2378 if (count >= get->num_services)
2380 memset(&entry, 0, sizeof(entry));
2381 ip_vs_copy_service(&entry, svc);
2382 if (copy_to_user(&uptr->entrytable[count],
2383 &entry, sizeof(entry))) {
2395 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2396 struct ip_vs_get_dests __user *uptr)
2398 struct ip_vs_service *svc;
2399 union nf_inet_addr addr = { .ip = get->addr };
2403 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2405 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2410 struct ip_vs_dest *dest;
2411 struct ip_vs_dest_entry entry;
2413 list_for_each_entry(dest, &svc->destinations, n_list) {
2414 if (count >= get->num_dests)
2417 entry.addr = dest->addr.ip;
2418 entry.port = dest->port;
2419 entry.conn_flags = atomic_read(&dest->conn_flags);
2420 entry.weight = atomic_read(&dest->weight);
2421 entry.u_threshold = dest->u_threshold;
2422 entry.l_threshold = dest->l_threshold;
2423 entry.activeconns = atomic_read(&dest->activeconns);
2424 entry.inactconns = atomic_read(&dest->inactconns);
2425 entry.persistconns = atomic_read(&dest->persistconns);
2426 ip_vs_copy_stats(&entry.stats, &dest->stats);
2427 if (copy_to_user(&uptr->entrytable[count],
2428 &entry, sizeof(entry))) {
2440 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2442 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2443 struct ip_vs_proto_data *pd;
2446 #ifdef CONFIG_IP_VS_PROTO_TCP
2447 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2448 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2449 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2451 #ifdef CONFIG_IP_VS_PROTO_UDP
2452 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2454 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2459 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2460 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2461 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2462 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2463 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2464 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2465 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2467 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2468 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2469 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2474 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2478 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2480 unsigned char arg[128];
2482 unsigned int copylen;
2483 struct net *net = sock_net(sk);
2484 struct netns_ipvs *ipvs = net_ipvs(net);
2487 if (!capable(CAP_NET_ADMIN))
2490 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2493 if (*len < get_arglen[GET_CMDID(cmd)]) {
2494 pr_err("get_ctl: len %u < %u\n",
2495 *len, get_arglen[GET_CMDID(cmd)]);
2499 copylen = get_arglen[GET_CMDID(cmd)];
2503 if (copy_from_user(arg, user, copylen) != 0)
2506 if (mutex_lock_interruptible(&__ip_vs_mutex))
2507 return -ERESTARTSYS;
2510 case IP_VS_SO_GET_VERSION:
2514 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2515 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2516 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2520 *len = strlen(buf)+1;
2524 case IP_VS_SO_GET_INFO:
2526 struct ip_vs_getinfo info;
2527 info.version = IP_VS_VERSION_CODE;
2528 info.size = ip_vs_conn_tab_size;
2529 info.num_services = ipvs->num_services;
2530 if (copy_to_user(user, &info, sizeof(info)) != 0)
2535 case IP_VS_SO_GET_SERVICES:
2537 struct ip_vs_get_services *get;
2540 get = (struct ip_vs_get_services *)arg;
2541 size = sizeof(*get) +
2542 sizeof(struct ip_vs_service_entry) * get->num_services;
2544 pr_err("length: %u != %u\n", *len, size);
2548 ret = __ip_vs_get_service_entries(net, get, user);
2552 case IP_VS_SO_GET_SERVICE:
2554 struct ip_vs_service_entry *entry;
2555 struct ip_vs_service *svc;
2556 union nf_inet_addr addr;
2558 entry = (struct ip_vs_service_entry *)arg;
2559 addr.ip = entry->addr;
2561 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2563 svc = __ip_vs_service_find(net, AF_INET,
2564 entry->protocol, &addr,
2567 ip_vs_copy_service(entry, svc);
2568 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2575 case IP_VS_SO_GET_DESTS:
2577 struct ip_vs_get_dests *get;
2580 get = (struct ip_vs_get_dests *)arg;
2581 size = sizeof(*get) +
2582 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2584 pr_err("length: %u != %u\n", *len, size);
2588 ret = __ip_vs_get_dest_entries(net, get, user);
2592 case IP_VS_SO_GET_TIMEOUT:
2594 struct ip_vs_timeout_user t;
2596 __ip_vs_get_timeouts(net, &t);
2597 if (copy_to_user(user, &t, sizeof(t)) != 0)
2602 case IP_VS_SO_GET_DAEMON:
2604 struct ip_vs_daemon_user d[2];
2606 memset(&d, 0, sizeof(d));
2607 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2608 d[0].state = IP_VS_STATE_MASTER;
2609 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2610 sizeof(d[0].mcast_ifn));
2611 d[0].syncid = ipvs->master_syncid;
2613 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2614 d[1].state = IP_VS_STATE_BACKUP;
2615 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2616 sizeof(d[1].mcast_ifn));
2617 d[1].syncid = ipvs->backup_syncid;
2619 if (copy_to_user(user, &d, sizeof(d)) != 0)
2629 mutex_unlock(&__ip_vs_mutex);
2634 static struct nf_sockopt_ops ip_vs_sockopts = {
2636 .set_optmin = IP_VS_BASE_CTL,
2637 .set_optmax = IP_VS_SO_SET_MAX+1,
2638 .set = do_ip_vs_set_ctl,
2639 .get_optmin = IP_VS_BASE_CTL,
2640 .get_optmax = IP_VS_SO_GET_MAX+1,
2641 .get = do_ip_vs_get_ctl,
2642 .owner = THIS_MODULE,
2646 * Generic Netlink interface
2649 /* IPVS genetlink family */
2650 static struct genl_family ip_vs_genl_family = {
2651 .id = GENL_ID_GENERATE,
2653 .name = IPVS_GENL_NAME,
2654 .version = IPVS_GENL_VERSION,
2655 .maxattr = IPVS_CMD_MAX,
2656 .netnsok = true, /* Make ipvsadm to work on netns */
2659 /* Policy used for first-level command attributes */
2660 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2661 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2662 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2663 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2664 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2665 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2666 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2669 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2670 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2671 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2672 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2673 .len = IP_VS_IFNAME_MAXLEN },
2674 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2677 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2678 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2679 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2680 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2681 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2682 .len = sizeof(union nf_inet_addr) },
2683 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2684 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2685 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2686 .len = IP_VS_SCHEDNAME_MAXLEN },
2687 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2688 .len = IP_VS_PENAME_MAXLEN },
2689 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2690 .len = sizeof(struct ip_vs_flags) },
2691 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2692 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2693 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2696 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2697 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2698 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2699 .len = sizeof(union nf_inet_addr) },
2700 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2701 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2708 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2711 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2712 struct ip_vs_stats *stats)
2714 struct ip_vs_stats_user ustats;
2715 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2719 ip_vs_copy_stats(&ustats, stats);
2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2732 nla_nest_end(skb, nl_stats);
2737 nla_nest_cancel(skb, nl_stats);
2741 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2742 struct ip_vs_service *svc)
2744 struct nlattr *nl_service;
2745 struct ip_vs_flags flags = { .flags = svc->flags,
2748 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2752 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2755 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2757 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2758 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2759 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2762 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2764 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2765 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2766 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2767 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2769 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2770 goto nla_put_failure;
2772 nla_nest_end(skb, nl_service);
2777 nla_nest_cancel(skb, nl_service);
2781 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2782 struct ip_vs_service *svc,
2783 struct netlink_callback *cb)
2787 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2788 &ip_vs_genl_family, NLM_F_MULTI,
2789 IPVS_CMD_NEW_SERVICE);
2793 if (ip_vs_genl_fill_service(skb, svc) < 0)
2794 goto nla_put_failure;
2796 return genlmsg_end(skb, hdr);
2799 genlmsg_cancel(skb, hdr);
2803 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2804 struct netlink_callback *cb)
2807 int start = cb->args[0];
2808 struct ip_vs_service *svc;
2809 struct net *net = skb_sknet(skb);
2811 mutex_lock(&__ip_vs_mutex);
2812 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2813 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2814 if (++idx <= start || !net_eq(svc->net, net))
2816 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2818 goto nla_put_failure;
2823 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2824 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2825 if (++idx <= start || !net_eq(svc->net, net))
2827 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2829 goto nla_put_failure;
2835 mutex_unlock(&__ip_vs_mutex);
2841 static int ip_vs_genl_parse_service(struct net *net,
2842 struct ip_vs_service_user_kern *usvc,
2843 struct nlattr *nla, int full_entry,
2844 struct ip_vs_service **ret_svc)
2846 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2847 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2848 struct ip_vs_service *svc;
2850 /* Parse mandatory identifying service fields first */
2852 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2855 nla_af = attrs[IPVS_SVC_ATTR_AF];
2856 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2857 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2858 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2859 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2861 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2864 memset(usvc, 0, sizeof(*usvc));
2866 usvc->af = nla_get_u16(nla_af);
2867 #ifdef CONFIG_IP_VS_IPV6
2868 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2870 if (usvc->af != AF_INET)
2872 return -EAFNOSUPPORT;
2875 usvc->protocol = IPPROTO_TCP;
2876 usvc->fwmark = nla_get_u32(nla_fwmark);
2878 usvc->protocol = nla_get_u16(nla_protocol);
2879 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2880 usvc->port = nla_get_u16(nla_port);
2885 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2887 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2888 &usvc->addr, usvc->port);
2891 /* If a full entry was requested, check for the additional fields */
2893 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2895 struct ip_vs_flags flags;
2897 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2898 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2899 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2900 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2901 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2903 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2906 nla_memcpy(&flags, nla_flags, sizeof(flags));
2908 /* prefill flags from service if it already exists */
2910 usvc->flags = svc->flags;
2912 /* set new flags from userland */
2913 usvc->flags = (usvc->flags & ~flags.mask) |
2914 (flags.flags & flags.mask);
2915 usvc->sched_name = nla_data(nla_sched);
2916 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2917 usvc->timeout = nla_get_u32(nla_timeout);
2918 usvc->netmask = nla_get_u32(nla_netmask);
2924 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2927 struct ip_vs_service_user_kern usvc;
2928 struct ip_vs_service *svc;
2931 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2932 return ret ? ERR_PTR(ret) : svc;
2935 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2937 struct nlattr *nl_dest;
2939 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2943 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2944 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2946 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2947 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2949 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2951 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2952 atomic_read(&dest->activeconns));
2953 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2954 atomic_read(&dest->inactconns));
2955 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2956 atomic_read(&dest->persistconns));
2958 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2959 goto nla_put_failure;
2961 nla_nest_end(skb, nl_dest);
2966 nla_nest_cancel(skb, nl_dest);
2970 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2971 struct netlink_callback *cb)
2975 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2976 &ip_vs_genl_family, NLM_F_MULTI,
2981 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2982 goto nla_put_failure;
2984 return genlmsg_end(skb, hdr);
2987 genlmsg_cancel(skb, hdr);
2991 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2992 struct netlink_callback *cb)
2995 int start = cb->args[0];
2996 struct ip_vs_service *svc;
2997 struct ip_vs_dest *dest;
2998 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2999 struct net *net = skb_sknet(skb);
3001 mutex_lock(&__ip_vs_mutex);
3003 /* Try to find the service for which to dump destinations */
3004 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3005 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3009 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3010 if (IS_ERR(svc) || svc == NULL)
3013 /* Dump the destinations */
3014 list_for_each_entry(dest, &svc->destinations, n_list) {
3017 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3019 goto nla_put_failure;
3027 mutex_unlock(&__ip_vs_mutex);
3032 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3033 struct nlattr *nla, int full_entry)
3035 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3036 struct nlattr *nla_addr, *nla_port;
3038 /* Parse mandatory identifying destination fields first */
3040 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3043 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3044 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3046 if (!(nla_addr && nla_port))
3049 memset(udest, 0, sizeof(*udest));
3051 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3052 udest->port = nla_get_u16(nla_port);
3054 /* If a full entry was requested, check for the additional fields */
3056 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3059 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3060 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3061 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3062 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3064 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3067 udest->conn_flags = nla_get_u32(nla_fwd)
3068 & IP_VS_CONN_F_FWD_MASK;
3069 udest->weight = nla_get_u32(nla_weight);
3070 udest->u_threshold = nla_get_u32(nla_u_thresh);
3071 udest->l_threshold = nla_get_u32(nla_l_thresh);
3077 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3078 const char *mcast_ifn, __be32 syncid)
3080 struct nlattr *nl_daemon;
3082 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3086 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3087 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3088 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3090 nla_nest_end(skb, nl_daemon);
3095 nla_nest_cancel(skb, nl_daemon);
3099 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3100 const char *mcast_ifn, __be32 syncid,
3101 struct netlink_callback *cb)
3104 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3105 &ip_vs_genl_family, NLM_F_MULTI,
3106 IPVS_CMD_NEW_DAEMON);
3110 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3111 goto nla_put_failure;
3113 return genlmsg_end(skb, hdr);
3116 genlmsg_cancel(skb, hdr);
3120 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3121 struct netlink_callback *cb)
3123 struct net *net = skb_net(skb);
3124 struct netns_ipvs *ipvs = net_ipvs(net);
3126 mutex_lock(&__ip_vs_mutex);
3127 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3128 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3129 ipvs->master_mcast_ifn,
3130 ipvs->master_syncid, cb) < 0)
3131 goto nla_put_failure;
3136 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3137 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3138 ipvs->backup_mcast_ifn,
3139 ipvs->backup_syncid, cb) < 0)
3140 goto nla_put_failure;
3146 mutex_unlock(&__ip_vs_mutex);
3151 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3153 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3154 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3155 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3158 return start_sync_thread(net,
3159 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3160 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3161 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3164 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3166 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3169 return stop_sync_thread(net,
3170 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3173 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3175 struct ip_vs_timeout_user t;
3177 __ip_vs_get_timeouts(net, &t);
3179 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3180 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3182 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3184 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3186 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3187 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3189 return ip_vs_set_timeout(net, &t);
3192 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3194 struct ip_vs_service *svc = NULL;
3195 struct ip_vs_service_user_kern usvc;
3196 struct ip_vs_dest_user_kern udest;
3198 int need_full_svc = 0, need_full_dest = 0;
3200 struct netns_ipvs *ipvs;
3202 net = skb_sknet(skb);
3203 ipvs = net_ipvs(net);
3204 cmd = info->genlhdr->cmd;
3206 mutex_lock(&__ip_vs_mutex);
3208 if (cmd == IPVS_CMD_FLUSH) {
3209 ret = ip_vs_flush(net);
3211 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3212 ret = ip_vs_genl_set_config(net, info->attrs);
3214 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3215 cmd == IPVS_CMD_DEL_DAEMON) {
3217 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3219 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3220 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3221 info->attrs[IPVS_CMD_ATTR_DAEMON],
3222 ip_vs_daemon_policy)) {
3227 if (cmd == IPVS_CMD_NEW_DAEMON)
3228 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3230 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3232 } else if (cmd == IPVS_CMD_ZERO &&
3233 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3234 ret = ip_vs_zero_all(net);
3238 /* All following commands require a service argument, so check if we
3239 * received a valid one. We need a full service specification when
3240 * adding / editing a service. Only identifying members otherwise. */
3241 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3244 ret = ip_vs_genl_parse_service(net, &usvc,
3245 info->attrs[IPVS_CMD_ATTR_SERVICE],
3246 need_full_svc, &svc);
3250 /* Unless we're adding a new service, the service must already exist */
3251 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3256 /* Destination commands require a valid destination argument. For
3257 * adding / editing a destination, we need a full destination
3259 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3260 cmd == IPVS_CMD_DEL_DEST) {
3261 if (cmd != IPVS_CMD_DEL_DEST)
3264 ret = ip_vs_genl_parse_dest(&udest,
3265 info->attrs[IPVS_CMD_ATTR_DEST],
3272 case IPVS_CMD_NEW_SERVICE:
3274 ret = ip_vs_add_service(net, &usvc, &svc);
3278 case IPVS_CMD_SET_SERVICE:
3279 ret = ip_vs_edit_service(svc, &usvc);
3281 case IPVS_CMD_DEL_SERVICE:
3282 ret = ip_vs_del_service(svc);
3283 /* do not use svc, it can be freed */
3285 case IPVS_CMD_NEW_DEST:
3286 ret = ip_vs_add_dest(svc, &udest);
3288 case IPVS_CMD_SET_DEST:
3289 ret = ip_vs_edit_dest(svc, &udest);
3291 case IPVS_CMD_DEL_DEST:
3292 ret = ip_vs_del_dest(svc, &udest);
3295 ret = ip_vs_zero_service(svc);
3302 mutex_unlock(&__ip_vs_mutex);
3307 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3309 struct sk_buff *msg;
3311 int ret, cmd, reply_cmd;
3313 struct netns_ipvs *ipvs;
3315 net = skb_sknet(skb);
3316 ipvs = net_ipvs(net);
3317 cmd = info->genlhdr->cmd;
3319 if (cmd == IPVS_CMD_GET_SERVICE)
3320 reply_cmd = IPVS_CMD_NEW_SERVICE;
3321 else if (cmd == IPVS_CMD_GET_INFO)
3322 reply_cmd = IPVS_CMD_SET_INFO;
3323 else if (cmd == IPVS_CMD_GET_CONFIG)
3324 reply_cmd = IPVS_CMD_SET_CONFIG;
3326 pr_err("unknown Generic Netlink command\n");
3330 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3334 mutex_lock(&__ip_vs_mutex);
3336 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3338 goto nla_put_failure;
3341 case IPVS_CMD_GET_SERVICE:
3343 struct ip_vs_service *svc;
3345 svc = ip_vs_genl_find_service(net,
3346 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3351 ret = ip_vs_genl_fill_service(msg, svc);
3353 goto nla_put_failure;
3362 case IPVS_CMD_GET_CONFIG:
3364 struct ip_vs_timeout_user t;
3366 __ip_vs_get_timeouts(net, &t);
3367 #ifdef CONFIG_IP_VS_PROTO_TCP
3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3369 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3372 #ifdef CONFIG_IP_VS_PROTO_UDP
3373 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3379 case IPVS_CMD_GET_INFO:
3380 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3381 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3382 ip_vs_conn_tab_size);
3386 genlmsg_end(msg, reply);
3387 ret = genlmsg_reply(msg, info);
3391 pr_err("not enough space in Netlink message\n");
3397 mutex_unlock(&__ip_vs_mutex);
3403 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3405 .cmd = IPVS_CMD_NEW_SERVICE,
3406 .flags = GENL_ADMIN_PERM,
3407 .policy = ip_vs_cmd_policy,
3408 .doit = ip_vs_genl_set_cmd,
3411 .cmd = IPVS_CMD_SET_SERVICE,
3412 .flags = GENL_ADMIN_PERM,
3413 .policy = ip_vs_cmd_policy,
3414 .doit = ip_vs_genl_set_cmd,
3417 .cmd = IPVS_CMD_DEL_SERVICE,
3418 .flags = GENL_ADMIN_PERM,
3419 .policy = ip_vs_cmd_policy,
3420 .doit = ip_vs_genl_set_cmd,
3423 .cmd = IPVS_CMD_GET_SERVICE,
3424 .flags = GENL_ADMIN_PERM,
3425 .doit = ip_vs_genl_get_cmd,
3426 .dumpit = ip_vs_genl_dump_services,
3427 .policy = ip_vs_cmd_policy,
3430 .cmd = IPVS_CMD_NEW_DEST,
3431 .flags = GENL_ADMIN_PERM,
3432 .policy = ip_vs_cmd_policy,
3433 .doit = ip_vs_genl_set_cmd,
3436 .cmd = IPVS_CMD_SET_DEST,
3437 .flags = GENL_ADMIN_PERM,
3438 .policy = ip_vs_cmd_policy,
3439 .doit = ip_vs_genl_set_cmd,
3442 .cmd = IPVS_CMD_DEL_DEST,
3443 .flags = GENL_ADMIN_PERM,
3444 .policy = ip_vs_cmd_policy,
3445 .doit = ip_vs_genl_set_cmd,
3448 .cmd = IPVS_CMD_GET_DEST,
3449 .flags = GENL_ADMIN_PERM,
3450 .policy = ip_vs_cmd_policy,
3451 .dumpit = ip_vs_genl_dump_dests,
3454 .cmd = IPVS_CMD_NEW_DAEMON,
3455 .flags = GENL_ADMIN_PERM,
3456 .policy = ip_vs_cmd_policy,
3457 .doit = ip_vs_genl_set_cmd,
3460 .cmd = IPVS_CMD_DEL_DAEMON,
3461 .flags = GENL_ADMIN_PERM,
3462 .policy = ip_vs_cmd_policy,
3463 .doit = ip_vs_genl_set_cmd,
3466 .cmd = IPVS_CMD_GET_DAEMON,
3467 .flags = GENL_ADMIN_PERM,
3468 .dumpit = ip_vs_genl_dump_daemons,
3471 .cmd = IPVS_CMD_SET_CONFIG,
3472 .flags = GENL_ADMIN_PERM,
3473 .policy = ip_vs_cmd_policy,
3474 .doit = ip_vs_genl_set_cmd,
3477 .cmd = IPVS_CMD_GET_CONFIG,
3478 .flags = GENL_ADMIN_PERM,
3479 .doit = ip_vs_genl_get_cmd,
3482 .cmd = IPVS_CMD_GET_INFO,
3483 .flags = GENL_ADMIN_PERM,
3484 .doit = ip_vs_genl_get_cmd,
3487 .cmd = IPVS_CMD_ZERO,
3488 .flags = GENL_ADMIN_PERM,
3489 .policy = ip_vs_cmd_policy,
3490 .doit = ip_vs_genl_set_cmd,
3493 .cmd = IPVS_CMD_FLUSH,
3494 .flags = GENL_ADMIN_PERM,
3495 .doit = ip_vs_genl_set_cmd,
3499 static int __init ip_vs_genl_register(void)
3501 return genl_register_family_with_ops(&ip_vs_genl_family,
3502 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3505 static void ip_vs_genl_unregister(void)
3507 genl_unregister_family(&ip_vs_genl_family);
3510 /* End of Generic Netlink interface definitions */
3513 * per netns intit/exit func.
3515 #ifdef CONFIG_SYSCTL
3516 int __net_init __ip_vs_control_init_sysctl(struct net *net)
3519 struct netns_ipvs *ipvs = net_ipvs(net);
3520 struct ctl_table *tbl;
3522 atomic_set(&ipvs->dropentry, 0);
3523 spin_lock_init(&ipvs->dropentry_lock);
3524 spin_lock_init(&ipvs->droppacket_lock);
3525 spin_lock_init(&ipvs->securetcp_lock);
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3533 /* Initialize sysctl defaults */
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541 #ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3552 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3553 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3561 if (ipvs->sysctl_hdr == NULL) {
3562 if (!net_eq(net, &init_net))
3566 ip_vs_start_estimator(net, &ipvs->tot_stats);
3567 ipvs->sysctl_tbl = tbl;
3568 /* Schedule defense work */
3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3575 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3579 cancel_delayed_work_sync(&ipvs->defense_work);
3580 cancel_work_sync(&ipvs->defense_work.work);
3581 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3586 int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3591 int __net_init __ip_vs_control_init(struct net *net)
3594 struct netns_ipvs *ipvs = net_ipvs(net);
3596 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3598 /* Initialize rs_table */
3599 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3602 INIT_LIST_HEAD(&ipvs->dest_trash);
3603 atomic_set(&ipvs->ftpsvc_counter, 0);
3604 atomic_set(&ipvs->nullsvc_counter, 0);
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3608 if (!ipvs->tot_stats.cpustats) {
3609 pr_err("%s(): alloc_percpu.\n", __func__);
3612 spin_lock_init(&ipvs->tot_stats.lock);
3614 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617 &ip_vs_stats_percpu_fops);
3619 if (__ip_vs_control_init_sysctl(net))
3625 free_percpu(ipvs->tot_stats.cpustats);
3629 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3631 struct netns_ipvs *ipvs = net_ipvs(net);
3633 ip_vs_trash_cleanup(net);
3634 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3635 __ip_vs_control_cleanup_sysctl(net);
3636 proc_net_remove(net, "ip_vs_stats_percpu");
3637 proc_net_remove(net, "ip_vs_stats");
3638 proc_net_remove(net, "ip_vs");
3639 free_percpu(ipvs->tot_stats.cpustats);
3642 static struct pernet_operations ipvs_control_ops = {
3643 .init = __ip_vs_control_init,
3644 .exit = __ip_vs_control_cleanup,
3647 int __init ip_vs_control_init(void)
3654 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3655 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3656 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3657 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3660 ret = register_pernet_subsys(&ipvs_control_ops);
3662 pr_err("cannot register namespace.\n");
3666 smp_wmb(); /* Do we really need it now ? */
3668 ret = nf_register_sockopt(&ip_vs_sockopts);
3670 pr_err("cannot register sockopt.\n");
3674 ret = ip_vs_genl_register();
3676 pr_err("cannot register Generic Netlink interface.\n");
3677 nf_unregister_sockopt(&ip_vs_sockopts);
3685 unregister_pernet_subsys(&ipvs_control_ops);
3691 void ip_vs_control_cleanup(void)
3694 unregister_pernet_subsys(&ipvs_control_ops);
3695 ip_vs_genl_unregister();
3696 nf_unregister_sockopt(&ip_vs_sockopts);