2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
42 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ip6_route.h>
46 #include <net/route.h>
48 #include <net/genetlink.h>
50 #include <asm/uaccess.h>
52 #include <net/ip_vs.h>
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
94 int sysctl_ip_vs_snat_reroute = 1;
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
100 int ip_vs_get_debug_level(void)
102 return sysctl_ip_vs_debug_level;
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
116 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
119 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
127 * update_defense_level is called from keventd and from sysctl,
128 * so it needs to protect itself from softirqs
130 static void update_defense_level(void)
133 static int old_secure_tcp = 0;
138 /* we only count free and buffered memory (in pages) */
140 availmem = i.freeram + i.bufferram;
141 /* however in linux 2.5 the i.bufferram is total page cache size,
143 /* si_swapinfo(&i); */
144 /* availmem = availmem - (i.totalswap - i.freeswap); */
146 nomem = (availmem < sysctl_ip_vs_amemthresh);
151 spin_lock(&__ip_vs_dropentry_lock);
152 switch (sysctl_ip_vs_drop_entry) {
154 atomic_set(&ip_vs_dropentry, 0);
158 atomic_set(&ip_vs_dropentry, 1);
159 sysctl_ip_vs_drop_entry = 2;
161 atomic_set(&ip_vs_dropentry, 0);
166 atomic_set(&ip_vs_dropentry, 1);
168 atomic_set(&ip_vs_dropentry, 0);
169 sysctl_ip_vs_drop_entry = 1;
173 atomic_set(&ip_vs_dropentry, 1);
176 spin_unlock(&__ip_vs_dropentry_lock);
179 spin_lock(&__ip_vs_droppacket_lock);
180 switch (sysctl_ip_vs_drop_packet) {
186 ip_vs_drop_rate = ip_vs_drop_counter
187 = sysctl_ip_vs_amemthresh /
188 (sysctl_ip_vs_amemthresh-availmem);
189 sysctl_ip_vs_drop_packet = 2;
196 ip_vs_drop_rate = ip_vs_drop_counter
197 = sysctl_ip_vs_amemthresh /
198 (sysctl_ip_vs_amemthresh-availmem);
201 sysctl_ip_vs_drop_packet = 1;
205 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
208 spin_unlock(&__ip_vs_droppacket_lock);
211 spin_lock(&ip_vs_securetcp_lock);
212 switch (sysctl_ip_vs_secure_tcp) {
214 if (old_secure_tcp >= 2)
219 if (old_secure_tcp < 2)
221 sysctl_ip_vs_secure_tcp = 2;
223 if (old_secure_tcp >= 2)
229 if (old_secure_tcp < 2)
232 if (old_secure_tcp >= 2)
234 sysctl_ip_vs_secure_tcp = 1;
238 if (old_secure_tcp < 2)
242 old_secure_tcp = sysctl_ip_vs_secure_tcp;
244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
245 spin_unlock(&ip_vs_securetcp_lock);
252 * Timer for checking the defense
254 #define DEFENSE_TIMER_PERIOD 1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
258 static void defense_work_handler(struct work_struct *work)
260 update_defense_level();
261 if (atomic_read(&ip_vs_dropentry))
262 ip_vs_random_dropentry();
264 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
268 ip_vs_use_count_inc(void)
270 return try_module_get(THIS_MODULE);
274 ip_vs_use_count_dec(void)
276 module_put(THIS_MODULE);
281 * Hash table: for virtual service lookups
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
293 * Hash table: for real service lookups
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
299 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
302 * Trash for destinations
304 static LIST_HEAD(ip_vs_dest_trash);
307 * FTP & NULL virtual service counters
309 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
314 * Returns hash value for virtual service
316 static __inline__ unsigned
317 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
320 register unsigned porth = ntohs(port);
321 __be32 addr_fold = addr->ip;
323 #ifdef CONFIG_IP_VS_IPV6
325 addr_fold = addr->ip6[0]^addr->ip6[1]^
326 addr->ip6[2]^addr->ip6[3];
329 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
330 & IP_VS_SVC_TAB_MASK;
334 * Returns hash value of fwmark for virtual service lookup
336 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
338 return fwmark & IP_VS_SVC_TAB_MASK;
342 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343 * or in the ip_vs_svc_fwm_table by fwmark.
344 * Should be called with locked tables.
346 static int ip_vs_svc_hash(struct ip_vs_service *svc)
350 if (svc->flags & IP_VS_SVC_F_HASHED) {
351 pr_err("%s(): request for already hashed, called from %pF\n",
352 __func__, __builtin_return_address(0));
356 if (svc->fwmark == 0) {
358 * Hash it by <protocol,addr,port> in ip_vs_svc_table
360 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
362 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
365 * Hash it by fwmark in ip_vs_svc_fwm_table
367 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
371 svc->flags |= IP_VS_SVC_F_HASHED;
372 /* increase its refcnt because it is referenced by the svc table */
373 atomic_inc(&svc->refcnt);
379 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380 * Should be called with locked tables.
382 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
384 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
385 pr_err("%s(): request for unhash flagged, called from %pF\n",
386 __func__, __builtin_return_address(0));
390 if (svc->fwmark == 0) {
391 /* Remove it from the ip_vs_svc_table table */
392 list_del(&svc->s_list);
394 /* Remove it from the ip_vs_svc_fwm_table table */
395 list_del(&svc->f_list);
398 svc->flags &= ~IP_VS_SVC_F_HASHED;
399 atomic_dec(&svc->refcnt);
405 * Get service by {proto,addr,port} in the service table.
407 static inline struct ip_vs_service *
408 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
412 struct ip_vs_service *svc;
414 /* Check for "full" addressed entries */
415 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
417 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
419 && ip_vs_addr_equal(af, &svc->addr, vaddr)
420 && (svc->port == vport)
421 && (svc->protocol == protocol)) {
432 * Get service by {fwmark} in the service table.
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
438 struct ip_vs_service *svc;
440 /* Check for fwmark addressed entries */
441 hash = ip_vs_svc_fwm_hashkey(fwmark);
443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444 if (svc->fwmark == fwmark && svc->af == af) {
453 struct ip_vs_service *
454 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
455 const union nf_inet_addr *vaddr, __be16 vport)
457 struct ip_vs_service *svc;
459 read_lock(&__ip_vs_svc_lock);
462 * Check the table hashed by fwmark first
464 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
468 * Check the table hashed by <protocol,addr,port>
469 * for "full" addressed entries
471 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
474 && protocol == IPPROTO_TCP
475 && atomic_read(&ip_vs_ftpsvc_counter)
476 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
478 * Check if ftp service entry exists, the packet
479 * might belong to FTP data connections.
481 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
485 && atomic_read(&ip_vs_nullsvc_counter)) {
487 * Check if the catch-all port (port zero) exists
489 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
494 atomic_inc(&svc->usecnt);
495 read_unlock(&__ip_vs_svc_lock);
497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498 fwmark, ip_vs_proto_name(protocol),
499 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500 svc ? "hit" : "not hit");
507 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
509 atomic_inc(&svc->refcnt);
514 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
516 struct ip_vs_service *svc = dest->svc;
519 if (atomic_dec_and_test(&svc->refcnt)) {
520 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
522 IP_VS_DBG_ADDR(svc->af, &svc->addr),
523 ntohs(svc->port), atomic_read(&svc->usecnt));
530 * Returns hash value for real service
532 static inline unsigned ip_vs_rs_hashkey(int af,
533 const union nf_inet_addr *addr,
536 register unsigned porth = ntohs(port);
537 __be32 addr_fold = addr->ip;
539 #ifdef CONFIG_IP_VS_IPV6
541 addr_fold = addr->ip6[0]^addr->ip6[1]^
542 addr->ip6[2]^addr->ip6[3];
545 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
550 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
551 * should be called with locked tables.
553 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
557 if (!list_empty(&dest->d_list)) {
562 * Hash by proto,addr,port,
563 * which are the parameters of the real service.
565 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
567 list_add(&dest->d_list, &ip_vs_rtable[hash]);
573 * UNhashes ip_vs_dest from ip_vs_rtable.
574 * should be called with locked tables.
576 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
579 * Remove it from the ip_vs_rtable table.
581 if (!list_empty(&dest->d_list)) {
582 list_del(&dest->d_list);
583 INIT_LIST_HEAD(&dest->d_list);
590 * Lookup real service by <proto,addr,port> in the real service table.
593 ip_vs_lookup_real_service(int af, __u16 protocol,
594 const union nf_inet_addr *daddr,
598 struct ip_vs_dest *dest;
601 * Check for "full" addressed entries
602 * Return the first found entry
604 hash = ip_vs_rs_hashkey(af, daddr, dport);
606 read_lock(&__ip_vs_rs_lock);
607 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
609 && ip_vs_addr_equal(af, &dest->addr, daddr)
610 && (dest->port == dport)
611 && ((dest->protocol == protocol) ||
614 read_unlock(&__ip_vs_rs_lock);
618 read_unlock(&__ip_vs_rs_lock);
624 * Lookup destination by {addr,port} in the given service
626 static struct ip_vs_dest *
627 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
630 struct ip_vs_dest *dest;
633 * Find the destination for the given service
635 list_for_each_entry(dest, &svc->destinations, n_list) {
636 if ((dest->af == svc->af)
637 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
638 && (dest->port == dport)) {
648 * Find destination by {daddr,dport,vaddr,protocol}
649 * Cretaed to be used in ip_vs_process_message() in
650 * the backup synchronization daemon. It finds the
651 * destination to be bound to the received connection
654 * ip_vs_lookup_real_service() looked promissing, but
655 * seems not working as expected.
657 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
659 const union nf_inet_addr *vaddr,
660 __be16 vport, __u16 protocol)
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
665 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
689 struct ip_vs_dest *dest, *nxt;
692 * Find the destination in trash
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708 dest->vport == svc->port))) {
714 * Try to purge the destination from trash if not referenced
716 if (atomic_read(&dest->refcnt) == 1) {
717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
742 static void ip_vs_trash_cleanup(void)
744 struct ip_vs_dest *dest, *nxt;
746 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747 list_del(&dest->n_list);
748 ip_vs_dst_reset(dest);
749 __ip_vs_unbind_svc(dest);
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
758 spin_lock_bh(&stats->lock);
760 memset(&stats->ustats, 0, sizeof(stats->ustats));
761 ip_vs_zero_estimator(stats);
763 spin_unlock_bh(&stats->lock);
767 * Update a destination in the given service
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add)
775 /* set the weight and the flags */
776 atomic_set(&dest->weight, udest->weight);
777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778 conn_flags |= IP_VS_CONN_F_INACTIVE;
780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
785 * Put the real service in ip_vs_rtable if not present.
786 * For now only for NAT!
788 write_lock_bh(&__ip_vs_rs_lock);
790 write_unlock_bh(&__ip_vs_rs_lock);
792 atomic_set(&dest->conn_flags, conn_flags);
794 /* bind the service */
796 __ip_vs_bind_svc(dest, svc);
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
805 /* set the dest status flags */
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
818 ip_vs_new_estimator(&dest->stats);
820 write_lock_bh(&__ip_vs_svc_lock);
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
826 list_add(&dest->n_list, &svc->destinations);
830 /* call the update_service, because server weight may be changed */
831 if (svc->scheduler->update_service)
832 svc->scheduler->update_service(svc);
834 write_unlock_bh(&__ip_vs_svc_lock);
839 * Create a destination for the given service
842 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 struct ip_vs_dest **dest_p)
845 struct ip_vs_dest *dest;
850 #ifdef CONFIG_IP_VS_IPV6
851 if (svc->af == AF_INET6) {
852 atype = ipv6_addr_type(&udest->addr.in6);
853 if ((!(atype & IPV6_ADDR_UNICAST) ||
854 atype & IPV6_ADDR_LINKLOCAL) &&
855 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
860 atype = inet_addr_type(&init_net, udest->addr.ip);
861 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
865 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
867 pr_err("%s(): no memory.\n", __func__);
872 dest->protocol = svc->protocol;
873 dest->vaddr = svc->addr;
874 dest->vport = svc->port;
875 dest->vfwmark = svc->fwmark;
876 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877 dest->port = udest->port;
879 atomic_set(&dest->activeconns, 0);
880 atomic_set(&dest->inactconns, 0);
881 atomic_set(&dest->persistconns, 0);
882 atomic_set(&dest->refcnt, 1);
884 INIT_LIST_HEAD(&dest->d_list);
885 spin_lock_init(&dest->dst_lock);
886 spin_lock_init(&dest->stats.lock);
887 __ip_vs_update_dest(svc, dest, udest, 1);
897 * Add a destination into an existing service
900 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
902 struct ip_vs_dest *dest;
903 union nf_inet_addr daddr;
904 __be16 dport = udest->port;
909 if (udest->weight < 0) {
910 pr_err("%s(): server weight less than zero\n", __func__);
914 if (udest->l_threshold > udest->u_threshold) {
915 pr_err("%s(): lower threshold is higher than upper threshold\n",
920 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
923 * Check if the dest already exists in the list
925 dest = ip_vs_lookup_dest(svc, &daddr, dport);
928 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
933 * Check if the dest already exists in the trash and
934 * is from the same service
936 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
939 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
940 "dest->refcnt=%d, service %u/%s:%u\n",
941 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
942 atomic_read(&dest->refcnt),
944 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
948 * Get the destination from the trash
950 list_del(&dest->n_list);
952 __ip_vs_update_dest(svc, dest, udest, 1);
956 * Allocate and initialize the dest structure
958 ret = ip_vs_new_dest(svc, udest, &dest);
967 * Edit a destination in the given service
970 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
972 struct ip_vs_dest *dest;
973 union nf_inet_addr daddr;
974 __be16 dport = udest->port;
978 if (udest->weight < 0) {
979 pr_err("%s(): server weight less than zero\n", __func__);
983 if (udest->l_threshold > udest->u_threshold) {
984 pr_err("%s(): lower threshold is higher than upper threshold\n",
989 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
992 * Lookup the destination list
994 dest = ip_vs_lookup_dest(svc, &daddr, dport);
997 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1001 __ip_vs_update_dest(svc, dest, udest, 0);
1009 * Delete a destination (must be already unlinked from the service)
1011 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1013 ip_vs_kill_estimator(&dest->stats);
1016 * Remove it from the d-linked list with the real services.
1018 write_lock_bh(&__ip_vs_rs_lock);
1019 ip_vs_rs_unhash(dest);
1020 write_unlock_bh(&__ip_vs_rs_lock);
1023 * Decrease the refcnt of the dest, and free the dest
1024 * if nobody refers to it (refcnt=0). Otherwise, throw
1025 * the destination into the trash.
1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ip_vs_dst_reset(dest);
1033 /* simply decrease svc->refcnt here, let the caller check
1034 and release the service if nobody refers to it.
1035 Only user context can release destination and service,
1036 and only one user context can update virtual service at a
1037 time, so the operation here is OK */
1038 atomic_dec(&dest->svc->refcnt);
1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 atomic_read(&dest->refcnt));
1046 list_add(&dest->n_list, &ip_vs_dest_trash);
1047 atomic_inc(&dest->refcnt);
1053 * Unlink a destination from the given service
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062 * Remove it from the d-linked destination list.
1064 list_del(&dest->n_list);
1068 * Call the update_service function of its scheduler
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
1076 * Delete a destination server in the given service
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1081 struct ip_vs_dest *dest;
1082 __be16 dport = udest->port;
1086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1093 write_lock_bh(&__ip_vs_svc_lock);
1096 * Wait until all other svc users go away.
1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1101 * Unlink dest from the service
1103 __ip_vs_unlink_dest(svc, dest, 1);
1105 write_unlock_bh(&__ip_vs_svc_lock);
1108 * Delete the destination
1110 __ip_vs_del_dest(dest);
1119 * Add a service into the service hash table
1122 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1123 struct ip_vs_service **svc_p)
1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1128 struct ip_vs_service *svc = NULL;
1130 /* increase the module use count */
1131 ip_vs_use_count_inc();
1133 /* Lookup the scheduler by 'u->sched_name' */
1134 sched = ip_vs_scheduler_get(u->sched_name);
1135 if (sched == NULL) {
1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_get(u->pe_name);
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1151 #ifdef CONFIG_IP_VS_IPV6
1152 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1165 /* I'm the first user of the service */
1166 atomic_set(&svc->usecnt, 0);
1167 atomic_set(&svc->refcnt, 0);
1170 svc->protocol = u->protocol;
1171 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1172 svc->port = u->port;
1173 svc->fwmark = u->fwmark;
1174 svc->flags = u->flags;
1175 svc->timeout = u->timeout * HZ;
1176 svc->netmask = u->netmask;
1178 INIT_LIST_HEAD(&svc->destinations);
1179 rwlock_init(&svc->sched_lock);
1180 spin_lock_init(&svc->stats.lock);
1182 /* Bind the scheduler */
1183 ret = ip_vs_bind_scheduler(svc, sched);
1188 /* Bind the ct retriever */
1189 ip_vs_bind_pe(svc, pe);
1192 /* Update the virtual service counters */
1193 if (svc->port == FTPPORT)
1194 atomic_inc(&ip_vs_ftpsvc_counter);
1195 else if (svc->port == 0)
1196 atomic_inc(&ip_vs_nullsvc_counter);
1198 ip_vs_new_estimator(&svc->stats);
1200 /* Count only IPv4 services for old get/setsockopt interface */
1201 if (svc->af == AF_INET)
1202 ip_vs_num_services++;
1204 /* Hash the service into the service table */
1205 write_lock_bh(&__ip_vs_svc_lock);
1206 ip_vs_svc_hash(svc);
1207 write_unlock_bh(&__ip_vs_svc_lock);
1214 ip_vs_unbind_scheduler(svc);
1217 ip_vs_app_inc_put(svc->inc);
1222 ip_vs_scheduler_put(sched);
1225 /* decrease the module use count */
1226 ip_vs_use_count_dec();
1233 * Edit a service and bind it with a new scheduler
1236 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1238 struct ip_vs_scheduler *sched, *old_sched;
1239 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1243 * Lookup the scheduler, by 'u->sched_name'
1245 sched = ip_vs_scheduler_get(u->sched_name);
1246 if (sched == NULL) {
1247 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1252 if (u->pe_name && *u->pe_name) {
1253 pe = ip_vs_pe_get(u->pe_name);
1255 pr_info("persistence engine module ip_vs_pe_%s "
1256 "not found\n", u->pe_name);
1263 #ifdef CONFIG_IP_VS_IPV6
1264 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1270 write_lock_bh(&__ip_vs_svc_lock);
1273 * Wait until all other svc users go away.
1275 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1278 * Set the flags and timeout value
1280 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1281 svc->timeout = u->timeout * HZ;
1282 svc->netmask = u->netmask;
1284 old_sched = svc->scheduler;
1285 if (sched != old_sched) {
1287 * Unbind the old scheduler
1289 if ((ret = ip_vs_unbind_scheduler(svc))) {
1295 * Bind the new scheduler
1297 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1299 * If ip_vs_bind_scheduler fails, restore the old
1301 * The main reason of failure is out of memory.
1303 * The question is if the old scheduler can be
1304 * restored all the time. TODO: if it cannot be
1305 * restored some time, we must delete the service,
1306 * otherwise the system may crash.
1308 ip_vs_bind_scheduler(svc, old_sched);
1316 ip_vs_unbind_pe(svc);
1317 ip_vs_bind_pe(svc, pe);
1321 write_unlock_bh(&__ip_vs_svc_lock);
1323 ip_vs_scheduler_put(old_sched);
1324 ip_vs_pe_put(old_pe);
1330 * Delete a service from the service list
1331 * - The service must be unlinked, unlocked and not referenced!
1332 * - We are called under _bh lock
1334 static void __ip_vs_del_service(struct ip_vs_service *svc)
1336 struct ip_vs_dest *dest, *nxt;
1337 struct ip_vs_scheduler *old_sched;
1338 struct ip_vs_pe *old_pe;
1340 pr_info("%s: enter\n", __func__);
1342 /* Count only IPv4 services for old get/setsockopt interface */
1343 if (svc->af == AF_INET)
1344 ip_vs_num_services--;
1346 ip_vs_kill_estimator(&svc->stats);
1348 /* Unbind scheduler */
1349 old_sched = svc->scheduler;
1350 ip_vs_unbind_scheduler(svc);
1351 ip_vs_scheduler_put(old_sched);
1353 /* Unbind persistence engine */
1355 ip_vs_unbind_pe(svc);
1356 ip_vs_pe_put(old_pe);
1358 /* Unbind app inc */
1360 ip_vs_app_inc_put(svc->inc);
1365 * Unlink the whole destination list
1367 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1368 __ip_vs_unlink_dest(svc, dest, 0);
1369 __ip_vs_del_dest(dest);
1373 * Update the virtual service counters
1375 if (svc->port == FTPPORT)
1376 atomic_dec(&ip_vs_ftpsvc_counter);
1377 else if (svc->port == 0)
1378 atomic_dec(&ip_vs_nullsvc_counter);
1381 * Free the service if nobody refers to it
1383 if (atomic_read(&svc->refcnt) == 0) {
1384 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1386 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387 ntohs(svc->port), atomic_read(&svc->usecnt));
1391 /* decrease the module use count */
1392 ip_vs_use_count_dec();
1396 * Unlink a service from list and try to delete it if its refcnt reached 0
1398 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1401 * Unhash it from the service table
1403 write_lock_bh(&__ip_vs_svc_lock);
1405 ip_vs_svc_unhash(svc);
1408 * Wait until all the svc users go away.
1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1412 __ip_vs_del_service(svc);
1414 write_unlock_bh(&__ip_vs_svc_lock);
1418 * Delete a service from the service list
1420 static int ip_vs_del_service(struct ip_vs_service *svc)
1424 ip_vs_unlink_service(svc);
1431 * Flush all the virtual services
1433 static int ip_vs_flush(void)
1436 struct ip_vs_service *svc, *nxt;
1439 * Flush the service table hashed by <protocol,addr,port>
1441 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1442 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1443 ip_vs_unlink_service(svc);
1448 * Flush the service table hashed by fwmark
1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1451 list_for_each_entry_safe(svc, nxt,
1452 &ip_vs_svc_fwm_table[idx], f_list) {
1453 ip_vs_unlink_service(svc);
1462 * Zero counters in a service or all services
1464 static int ip_vs_zero_service(struct ip_vs_service *svc)
1466 struct ip_vs_dest *dest;
1468 write_lock_bh(&__ip_vs_svc_lock);
1469 list_for_each_entry(dest, &svc->destinations, n_list) {
1470 ip_vs_zero_stats(&dest->stats);
1472 ip_vs_zero_stats(&svc->stats);
1473 write_unlock_bh(&__ip_vs_svc_lock);
1477 static int ip_vs_zero_all(void)
1480 struct ip_vs_service *svc;
1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1484 ip_vs_zero_service(svc);
1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1490 ip_vs_zero_service(svc);
1494 ip_vs_zero_stats(&ip_vs_stats);
1500 proc_do_defense_mode(ctl_table *table, int write,
1501 void __user *buffer, size_t *lenp, loff_t *ppos)
1503 int *valp = table->data;
1507 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1508 if (write && (*valp != val)) {
1509 if ((*valp < 0) || (*valp > 3)) {
1510 /* Restore the correct value */
1513 update_defense_level();
1521 proc_do_sync_threshold(ctl_table *table, int write,
1522 void __user *buffer, size_t *lenp, loff_t *ppos)
1524 int *valp = table->data;
1528 /* backup the value first */
1529 memcpy(val, valp, sizeof(val));
1531 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1532 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1533 /* Restore the correct value */
1534 memcpy(valp, val, sizeof(val));
1541 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1544 static struct ctl_table vs_vars[] = {
1546 .procname = "amemthresh",
1547 .data = &sysctl_ip_vs_amemthresh,
1548 .maxlen = sizeof(int),
1550 .proc_handler = proc_dointvec,
1552 #ifdef CONFIG_IP_VS_DEBUG
1554 .procname = "debug_level",
1555 .data = &sysctl_ip_vs_debug_level,
1556 .maxlen = sizeof(int),
1558 .proc_handler = proc_dointvec,
1562 .procname = "am_droprate",
1563 .data = &sysctl_ip_vs_am_droprate,
1564 .maxlen = sizeof(int),
1566 .proc_handler = proc_dointvec,
1569 .procname = "drop_entry",
1570 .data = &sysctl_ip_vs_drop_entry,
1571 .maxlen = sizeof(int),
1573 .proc_handler = proc_do_defense_mode,
1576 .procname = "drop_packet",
1577 .data = &sysctl_ip_vs_drop_packet,
1578 .maxlen = sizeof(int),
1580 .proc_handler = proc_do_defense_mode,
1582 #ifdef CONFIG_IP_VS_NFCT
1584 .procname = "conntrack",
1585 .data = &sysctl_ip_vs_conntrack,
1586 .maxlen = sizeof(int),
1588 .proc_handler = &proc_dointvec,
1592 .procname = "secure_tcp",
1593 .data = &sysctl_ip_vs_secure_tcp,
1594 .maxlen = sizeof(int),
1596 .proc_handler = proc_do_defense_mode,
1599 .procname = "snat_reroute",
1600 .data = &sysctl_ip_vs_snat_reroute,
1601 .maxlen = sizeof(int),
1603 .proc_handler = &proc_dointvec,
1607 .procname = "timeout_established",
1608 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1609 .maxlen = sizeof(int),
1611 .proc_handler = proc_dointvec_jiffies,
1614 .procname = "timeout_synsent",
1615 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1616 .maxlen = sizeof(int),
1618 .proc_handler = proc_dointvec_jiffies,
1621 .procname = "timeout_synrecv",
1622 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1623 .maxlen = sizeof(int),
1625 .proc_handler = proc_dointvec_jiffies,
1628 .procname = "timeout_finwait",
1629 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1630 .maxlen = sizeof(int),
1632 .proc_handler = proc_dointvec_jiffies,
1635 .procname = "timeout_timewait",
1636 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1637 .maxlen = sizeof(int),
1639 .proc_handler = proc_dointvec_jiffies,
1642 .procname = "timeout_close",
1643 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1644 .maxlen = sizeof(int),
1646 .proc_handler = proc_dointvec_jiffies,
1649 .procname = "timeout_closewait",
1650 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1651 .maxlen = sizeof(int),
1653 .proc_handler = proc_dointvec_jiffies,
1656 .procname = "timeout_lastack",
1657 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1658 .maxlen = sizeof(int),
1660 .proc_handler = proc_dointvec_jiffies,
1663 .procname = "timeout_listen",
1664 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1665 .maxlen = sizeof(int),
1667 .proc_handler = proc_dointvec_jiffies,
1670 .procname = "timeout_synack",
1671 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1672 .maxlen = sizeof(int),
1674 .proc_handler = proc_dointvec_jiffies,
1677 .procname = "timeout_udp",
1678 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1679 .maxlen = sizeof(int),
1681 .proc_handler = proc_dointvec_jiffies,
1684 .procname = "timeout_icmp",
1685 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1686 .maxlen = sizeof(int),
1688 .proc_handler = proc_dointvec_jiffies,
1692 .procname = "cache_bypass",
1693 .data = &sysctl_ip_vs_cache_bypass,
1694 .maxlen = sizeof(int),
1696 .proc_handler = proc_dointvec,
1699 .procname = "expire_nodest_conn",
1700 .data = &sysctl_ip_vs_expire_nodest_conn,
1701 .maxlen = sizeof(int),
1703 .proc_handler = proc_dointvec,
1706 .procname = "expire_quiescent_template",
1707 .data = &sysctl_ip_vs_expire_quiescent_template,
1708 .maxlen = sizeof(int),
1710 .proc_handler = proc_dointvec,
1713 .procname = "sync_threshold",
1714 .data = &sysctl_ip_vs_sync_threshold,
1715 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1717 .proc_handler = proc_do_sync_threshold,
1720 .procname = "nat_icmp_send",
1721 .data = &sysctl_ip_vs_nat_icmp_send,
1722 .maxlen = sizeof(int),
1724 .proc_handler = proc_dointvec,
1729 const struct ctl_path net_vs_ctl_path[] = {
1730 { .procname = "net", },
1731 { .procname = "ipv4", },
1732 { .procname = "vs", },
1735 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1737 static struct ctl_table_header * sysctl_header;
1739 #ifdef CONFIG_PROC_FS
1742 struct list_head *table;
1747 * Write the contents of the VS rule table to a PROCfs file.
1748 * (It is kept just for backward compatibility)
1750 static inline const char *ip_vs_fwd_name(unsigned flags)
1752 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1753 case IP_VS_CONN_F_LOCALNODE:
1755 case IP_VS_CONN_F_TUNNEL:
1757 case IP_VS_CONN_F_DROUTE:
1765 /* Get the Nth entry in the two lists */
1766 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1768 struct ip_vs_iter *iter = seq->private;
1770 struct ip_vs_service *svc;
1772 /* look in hash by protocol */
1773 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1774 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1776 iter->table = ip_vs_svc_table;
1783 /* keep looking in fwmark */
1784 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1785 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1787 iter->table = ip_vs_svc_fwm_table;
1797 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1798 __acquires(__ip_vs_svc_lock)
1801 read_lock_bh(&__ip_vs_svc_lock);
1802 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1806 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808 struct list_head *e;
1809 struct ip_vs_iter *iter;
1810 struct ip_vs_service *svc;
1813 if (v == SEQ_START_TOKEN)
1814 return ip_vs_info_array(seq,0);
1817 iter = seq->private;
1819 if (iter->table == ip_vs_svc_table) {
1820 /* next service in table hashed by protocol */
1821 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1822 return list_entry(e, struct ip_vs_service, s_list);
1825 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1832 iter->table = ip_vs_svc_fwm_table;
1837 /* next service in hashed by fwmark */
1838 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1839 return list_entry(e, struct ip_vs_service, f_list);
1842 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1843 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1851 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1852 __releases(__ip_vs_svc_lock)
1854 read_unlock_bh(&__ip_vs_svc_lock);
1858 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1860 if (v == SEQ_START_TOKEN) {
1862 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1863 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1865 "Prot LocalAddress:Port Scheduler Flags\n");
1867 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1869 const struct ip_vs_service *svc = v;
1870 const struct ip_vs_iter *iter = seq->private;
1871 const struct ip_vs_dest *dest;
1873 if (iter->table == ip_vs_svc_table) {
1874 #ifdef CONFIG_IP_VS_IPV6
1875 if (svc->af == AF_INET6)
1876 seq_printf(seq, "%s [%pI6]:%04X %s ",
1877 ip_vs_proto_name(svc->protocol),
1880 svc->scheduler->name);
1883 seq_printf(seq, "%s %08X:%04X %s %s ",
1884 ip_vs_proto_name(svc->protocol),
1885 ntohl(svc->addr.ip),
1887 svc->scheduler->name,
1888 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1890 seq_printf(seq, "FWM %08X %s %s",
1891 svc->fwmark, svc->scheduler->name,
1892 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1895 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1896 seq_printf(seq, "persistent %d %08X\n",
1898 ntohl(svc->netmask));
1900 seq_putc(seq, '\n');
1902 list_for_each_entry(dest, &svc->destinations, n_list) {
1903 #ifdef CONFIG_IP_VS_IPV6
1904 if (dest->af == AF_INET6)
1907 " %-7s %-6d %-10d %-10d\n",
1910 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1911 atomic_read(&dest->weight),
1912 atomic_read(&dest->activeconns),
1913 atomic_read(&dest->inactconns));
1918 "%-7s %-6d %-10d %-10d\n",
1919 ntohl(dest->addr.ip),
1921 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1922 atomic_read(&dest->weight),
1923 atomic_read(&dest->activeconns),
1924 atomic_read(&dest->inactconns));
1931 static const struct seq_operations ip_vs_info_seq_ops = {
1932 .start = ip_vs_info_seq_start,
1933 .next = ip_vs_info_seq_next,
1934 .stop = ip_vs_info_seq_stop,
1935 .show = ip_vs_info_seq_show,
1938 static int ip_vs_info_open(struct inode *inode, struct file *file)
1940 return seq_open_private(file, &ip_vs_info_seq_ops,
1941 sizeof(struct ip_vs_iter));
1944 static const struct file_operations ip_vs_info_fops = {
1945 .owner = THIS_MODULE,
1946 .open = ip_vs_info_open,
1948 .llseek = seq_lseek,
1949 .release = seq_release_private,
1954 struct ip_vs_stats ip_vs_stats = {
1955 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1958 #ifdef CONFIG_PROC_FS
1959 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1962 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1964 " Total Incoming Outgoing Incoming Outgoing\n");
1966 " Conns Packets Packets Bytes Bytes\n");
1968 spin_lock_bh(&ip_vs_stats.lock);
1969 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1970 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1971 (unsigned long long) ip_vs_stats.ustats.inbytes,
1972 (unsigned long long) ip_vs_stats.ustats.outbytes);
1974 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1976 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1977 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1978 ip_vs_stats.ustats.cps,
1979 ip_vs_stats.ustats.inpps,
1980 ip_vs_stats.ustats.outpps,
1981 ip_vs_stats.ustats.inbps,
1982 ip_vs_stats.ustats.outbps);
1983 spin_unlock_bh(&ip_vs_stats.lock);
1988 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1990 return single_open(file, ip_vs_stats_show, NULL);
1993 static const struct file_operations ip_vs_stats_fops = {
1994 .owner = THIS_MODULE,
1995 .open = ip_vs_stats_seq_open,
1997 .llseek = seq_lseek,
1998 .release = single_release,
2004 * Set timeout values for tcp tcpfin udp in the timeout_table.
2006 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2008 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2013 #ifdef CONFIG_IP_VS_PROTO_TCP
2014 if (u->tcp_timeout) {
2015 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2016 = u->tcp_timeout * HZ;
2019 if (u->tcp_fin_timeout) {
2020 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2021 = u->tcp_fin_timeout * HZ;
2025 #ifdef CONFIG_IP_VS_PROTO_UDP
2026 if (u->udp_timeout) {
2027 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2028 = u->udp_timeout * HZ;
2035 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2036 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2037 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2038 sizeof(struct ip_vs_dest_user))
2039 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2040 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2041 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2043 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2044 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2046 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2048 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2053 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2054 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2057 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2058 struct ip_vs_service_user *usvc_compat)
2060 memset(usvc, 0, sizeof(*usvc));
2063 usvc->protocol = usvc_compat->protocol;
2064 usvc->addr.ip = usvc_compat->addr;
2065 usvc->port = usvc_compat->port;
2066 usvc->fwmark = usvc_compat->fwmark;
2068 /* Deep copy of sched_name is not needed here */
2069 usvc->sched_name = usvc_compat->sched_name;
2071 usvc->flags = usvc_compat->flags;
2072 usvc->timeout = usvc_compat->timeout;
2073 usvc->netmask = usvc_compat->netmask;
2076 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2077 struct ip_vs_dest_user *udest_compat)
2079 memset(udest, 0, sizeof(*udest));
2081 udest->addr.ip = udest_compat->addr;
2082 udest->port = udest_compat->port;
2083 udest->conn_flags = udest_compat->conn_flags;
2084 udest->weight = udest_compat->weight;
2085 udest->u_threshold = udest_compat->u_threshold;
2086 udest->l_threshold = udest_compat->l_threshold;
2090 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2093 unsigned char arg[MAX_ARG_LEN];
2094 struct ip_vs_service_user *usvc_compat;
2095 struct ip_vs_service_user_kern usvc;
2096 struct ip_vs_service *svc;
2097 struct ip_vs_dest_user *udest_compat;
2098 struct ip_vs_dest_user_kern udest;
2100 if (!capable(CAP_NET_ADMIN))
2103 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2105 if (len < 0 || len > MAX_ARG_LEN)
2107 if (len != set_arglen[SET_CMDID(cmd)]) {
2108 pr_err("set_ctl: len %u != %u\n",
2109 len, set_arglen[SET_CMDID(cmd)]);
2113 if (copy_from_user(arg, user, len) != 0)
2116 /* increase the module use count */
2117 ip_vs_use_count_inc();
2119 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2124 if (cmd == IP_VS_SO_SET_FLUSH) {
2125 /* Flush the virtual service */
2126 ret = ip_vs_flush();
2128 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2129 /* Set timeout values for (tcp tcpfin udp) */
2130 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2132 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2133 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2134 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2136 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2137 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2138 ret = stop_sync_thread(dm->state);
2142 usvc_compat = (struct ip_vs_service_user *)arg;
2143 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2145 /* We only use the new structs internally, so copy userspace compat
2146 * structs to extended internal versions */
2147 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2148 ip_vs_copy_udest_compat(&udest, udest_compat);
2150 if (cmd == IP_VS_SO_SET_ZERO) {
2151 /* if no service address is set, zero counters in all */
2152 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2153 ret = ip_vs_zero_all();
2158 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2159 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2160 usvc.protocol != IPPROTO_SCTP) {
2161 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2162 usvc.protocol, &usvc.addr.ip,
2163 ntohs(usvc.port), usvc.sched_name);
2168 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2169 if (usvc.fwmark == 0)
2170 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2171 &usvc.addr, usvc.port);
2173 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2175 if (cmd != IP_VS_SO_SET_ADD
2176 && (svc == NULL || svc->protocol != usvc.protocol)) {
2182 case IP_VS_SO_SET_ADD:
2186 ret = ip_vs_add_service(&usvc, &svc);
2188 case IP_VS_SO_SET_EDIT:
2189 ret = ip_vs_edit_service(svc, &usvc);
2191 case IP_VS_SO_SET_DEL:
2192 ret = ip_vs_del_service(svc);
2196 case IP_VS_SO_SET_ZERO:
2197 ret = ip_vs_zero_service(svc);
2199 case IP_VS_SO_SET_ADDDEST:
2200 ret = ip_vs_add_dest(svc, &udest);
2202 case IP_VS_SO_SET_EDITDEST:
2203 ret = ip_vs_edit_dest(svc, &udest);
2205 case IP_VS_SO_SET_DELDEST:
2206 ret = ip_vs_del_dest(svc, &udest);
2213 mutex_unlock(&__ip_vs_mutex);
2215 /* decrease the module use count */
2216 ip_vs_use_count_dec();
2223 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2225 spin_lock_bh(&src->lock);
2226 memcpy(dst, &src->ustats, sizeof(*dst));
2227 spin_unlock_bh(&src->lock);
2231 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2233 dst->protocol = src->protocol;
2234 dst->addr = src->addr.ip;
2235 dst->port = src->port;
2236 dst->fwmark = src->fwmark;
2237 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2238 dst->flags = src->flags;
2239 dst->timeout = src->timeout / HZ;
2240 dst->netmask = src->netmask;
2241 dst->num_dests = src->num_dests;
2242 ip_vs_copy_stats(&dst->stats, &src->stats);
2246 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2247 struct ip_vs_get_services __user *uptr)
2250 struct ip_vs_service *svc;
2251 struct ip_vs_service_entry entry;
2254 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2255 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2256 /* Only expose IPv4 entries to old interface */
2257 if (svc->af != AF_INET)
2260 if (count >= get->num_services)
2262 memset(&entry, 0, sizeof(entry));
2263 ip_vs_copy_service(&entry, svc);
2264 if (copy_to_user(&uptr->entrytable[count],
2265 &entry, sizeof(entry))) {
2273 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2274 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2275 /* Only expose IPv4 entries to old interface */
2276 if (svc->af != AF_INET)
2279 if (count >= get->num_services)
2281 memset(&entry, 0, sizeof(entry));
2282 ip_vs_copy_service(&entry, svc);
2283 if (copy_to_user(&uptr->entrytable[count],
2284 &entry, sizeof(entry))) {
2296 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2297 struct ip_vs_get_dests __user *uptr)
2299 struct ip_vs_service *svc;
2300 union nf_inet_addr addr = { .ip = get->addr };
2304 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2306 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2311 struct ip_vs_dest *dest;
2312 struct ip_vs_dest_entry entry;
2314 list_for_each_entry(dest, &svc->destinations, n_list) {
2315 if (count >= get->num_dests)
2318 entry.addr = dest->addr.ip;
2319 entry.port = dest->port;
2320 entry.conn_flags = atomic_read(&dest->conn_flags);
2321 entry.weight = atomic_read(&dest->weight);
2322 entry.u_threshold = dest->u_threshold;
2323 entry.l_threshold = dest->l_threshold;
2324 entry.activeconns = atomic_read(&dest->activeconns);
2325 entry.inactconns = atomic_read(&dest->inactconns);
2326 entry.persistconns = atomic_read(&dest->persistconns);
2327 ip_vs_copy_stats(&entry.stats, &dest->stats);
2328 if (copy_to_user(&uptr->entrytable[count],
2329 &entry, sizeof(entry))) {
2341 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2343 #ifdef CONFIG_IP_VS_PROTO_TCP
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2346 u->tcp_fin_timeout =
2347 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2349 #ifdef CONFIG_IP_VS_PROTO_UDP
2351 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2356 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2357 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2358 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2359 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2360 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2361 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2362 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2364 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2365 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2366 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2367 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2368 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2369 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2370 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2371 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2375 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2377 unsigned char arg[128];
2379 unsigned int copylen;
2381 if (!capable(CAP_NET_ADMIN))
2384 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2387 if (*len < get_arglen[GET_CMDID(cmd)]) {
2388 pr_err("get_ctl: len %u < %u\n",
2389 *len, get_arglen[GET_CMDID(cmd)]);
2393 copylen = get_arglen[GET_CMDID(cmd)];
2397 if (copy_from_user(arg, user, copylen) != 0)
2400 if (mutex_lock_interruptible(&__ip_vs_mutex))
2401 return -ERESTARTSYS;
2404 case IP_VS_SO_GET_VERSION:
2408 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2409 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2410 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2414 *len = strlen(buf)+1;
2418 case IP_VS_SO_GET_INFO:
2420 struct ip_vs_getinfo info;
2421 info.version = IP_VS_VERSION_CODE;
2422 info.size = ip_vs_conn_tab_size;
2423 info.num_services = ip_vs_num_services;
2424 if (copy_to_user(user, &info, sizeof(info)) != 0)
2429 case IP_VS_SO_GET_SERVICES:
2431 struct ip_vs_get_services *get;
2434 get = (struct ip_vs_get_services *)arg;
2435 size = sizeof(*get) +
2436 sizeof(struct ip_vs_service_entry) * get->num_services;
2438 pr_err("length: %u != %u\n", *len, size);
2442 ret = __ip_vs_get_service_entries(get, user);
2446 case IP_VS_SO_GET_SERVICE:
2448 struct ip_vs_service_entry *entry;
2449 struct ip_vs_service *svc;
2450 union nf_inet_addr addr;
2452 entry = (struct ip_vs_service_entry *)arg;
2453 addr.ip = entry->addr;
2455 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2457 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2458 &addr, entry->port);
2460 ip_vs_copy_service(entry, svc);
2461 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2468 case IP_VS_SO_GET_DESTS:
2470 struct ip_vs_get_dests *get;
2473 get = (struct ip_vs_get_dests *)arg;
2474 size = sizeof(*get) +
2475 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2477 pr_err("length: %u != %u\n", *len, size);
2481 ret = __ip_vs_get_dest_entries(get, user);
2485 case IP_VS_SO_GET_TIMEOUT:
2487 struct ip_vs_timeout_user t;
2489 __ip_vs_get_timeouts(&t);
2490 if (copy_to_user(user, &t, sizeof(t)) != 0)
2495 case IP_VS_SO_GET_DAEMON:
2497 struct ip_vs_daemon_user d[2];
2499 memset(&d, 0, sizeof(d));
2500 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2501 d[0].state = IP_VS_STATE_MASTER;
2502 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2503 d[0].syncid = ip_vs_master_syncid;
2505 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2506 d[1].state = IP_VS_STATE_BACKUP;
2507 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2508 d[1].syncid = ip_vs_backup_syncid;
2510 if (copy_to_user(user, &d, sizeof(d)) != 0)
2520 mutex_unlock(&__ip_vs_mutex);
2525 static struct nf_sockopt_ops ip_vs_sockopts = {
2527 .set_optmin = IP_VS_BASE_CTL,
2528 .set_optmax = IP_VS_SO_SET_MAX+1,
2529 .set = do_ip_vs_set_ctl,
2530 .get_optmin = IP_VS_BASE_CTL,
2531 .get_optmax = IP_VS_SO_GET_MAX+1,
2532 .get = do_ip_vs_get_ctl,
2533 .owner = THIS_MODULE,
2537 * Generic Netlink interface
2540 /* IPVS genetlink family */
2541 static struct genl_family ip_vs_genl_family = {
2542 .id = GENL_ID_GENERATE,
2544 .name = IPVS_GENL_NAME,
2545 .version = IPVS_GENL_VERSION,
2546 .maxattr = IPVS_CMD_MAX,
2549 /* Policy used for first-level command attributes */
2550 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2551 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2552 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2553 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2554 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2555 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2556 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2559 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2560 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2561 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2562 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2563 .len = IP_VS_IFNAME_MAXLEN },
2564 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2567 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2568 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2569 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2570 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2571 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2572 .len = sizeof(union nf_inet_addr) },
2573 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2574 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2575 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2576 .len = IP_VS_SCHEDNAME_MAXLEN },
2577 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2578 .len = IP_VS_PENAME_MAXLEN },
2579 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2580 .len = sizeof(struct ip_vs_flags) },
2581 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2582 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2583 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2586 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2587 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2588 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2589 .len = sizeof(union nf_inet_addr) },
2590 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2591 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2592 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2593 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2594 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2595 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2596 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2597 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2598 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2601 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2602 struct ip_vs_stats *stats)
2604 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2608 spin_lock_bh(&stats->lock);
2610 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2611 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2612 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2613 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2614 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2618 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2619 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2621 spin_unlock_bh(&stats->lock);
2623 nla_nest_end(skb, nl_stats);
2628 spin_unlock_bh(&stats->lock);
2629 nla_nest_cancel(skb, nl_stats);
2633 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2634 struct ip_vs_service *svc)
2636 struct nlattr *nl_service;
2637 struct ip_vs_flags flags = { .flags = svc->flags,
2640 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2644 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2647 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2649 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2650 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2651 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2656 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2657 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2658 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2659 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2661 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2662 goto nla_put_failure;
2664 nla_nest_end(skb, nl_service);
2669 nla_nest_cancel(skb, nl_service);
2673 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2674 struct ip_vs_service *svc,
2675 struct netlink_callback *cb)
2679 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2680 &ip_vs_genl_family, NLM_F_MULTI,
2681 IPVS_CMD_NEW_SERVICE);
2685 if (ip_vs_genl_fill_service(skb, svc) < 0)
2686 goto nla_put_failure;
2688 return genlmsg_end(skb, hdr);
2691 genlmsg_cancel(skb, hdr);
2695 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2696 struct netlink_callback *cb)
2699 int start = cb->args[0];
2700 struct ip_vs_service *svc;
2702 mutex_lock(&__ip_vs_mutex);
2703 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2704 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2707 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2709 goto nla_put_failure;
2714 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2715 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2718 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2720 goto nla_put_failure;
2726 mutex_unlock(&__ip_vs_mutex);
2732 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2733 struct nlattr *nla, int full_entry,
2734 struct ip_vs_service **ret_svc)
2736 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2737 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2738 struct ip_vs_service *svc;
2740 /* Parse mandatory identifying service fields first */
2742 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2745 nla_af = attrs[IPVS_SVC_ATTR_AF];
2746 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2747 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2748 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2749 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2751 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2754 memset(usvc, 0, sizeof(*usvc));
2756 usvc->af = nla_get_u16(nla_af);
2757 #ifdef CONFIG_IP_VS_IPV6
2758 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2760 if (usvc->af != AF_INET)
2762 return -EAFNOSUPPORT;
2765 usvc->protocol = IPPROTO_TCP;
2766 usvc->fwmark = nla_get_u32(nla_fwmark);
2768 usvc->protocol = nla_get_u16(nla_protocol);
2769 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2770 usvc->port = nla_get_u16(nla_port);
2775 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2777 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2778 &usvc->addr, usvc->port);
2781 /* If a full entry was requested, check for the additional fields */
2783 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2785 struct ip_vs_flags flags;
2787 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2788 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2789 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2790 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2791 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2793 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2796 nla_memcpy(&flags, nla_flags, sizeof(flags));
2798 /* prefill flags from service if it already exists */
2800 usvc->flags = svc->flags;
2802 /* set new flags from userland */
2803 usvc->flags = (usvc->flags & ~flags.mask) |
2804 (flags.flags & flags.mask);
2805 usvc->sched_name = nla_data(nla_sched);
2806 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2807 usvc->timeout = nla_get_u32(nla_timeout);
2808 usvc->netmask = nla_get_u32(nla_netmask);
2814 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2816 struct ip_vs_service_user_kern usvc;
2817 struct ip_vs_service *svc;
2820 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2821 return ret ? ERR_PTR(ret) : svc;
2824 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2826 struct nlattr *nl_dest;
2828 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2832 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2833 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2835 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2836 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2837 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2838 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2839 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2840 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2841 atomic_read(&dest->activeconns));
2842 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2843 atomic_read(&dest->inactconns));
2844 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2845 atomic_read(&dest->persistconns));
2847 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2848 goto nla_put_failure;
2850 nla_nest_end(skb, nl_dest);
2855 nla_nest_cancel(skb, nl_dest);
2859 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2860 struct netlink_callback *cb)
2864 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2865 &ip_vs_genl_family, NLM_F_MULTI,
2870 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2871 goto nla_put_failure;
2873 return genlmsg_end(skb, hdr);
2876 genlmsg_cancel(skb, hdr);
2880 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2881 struct netlink_callback *cb)
2884 int start = cb->args[0];
2885 struct ip_vs_service *svc;
2886 struct ip_vs_dest *dest;
2887 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2889 mutex_lock(&__ip_vs_mutex);
2891 /* Try to find the service for which to dump destinations */
2892 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2893 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2896 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2897 if (IS_ERR(svc) || svc == NULL)
2900 /* Dump the destinations */
2901 list_for_each_entry(dest, &svc->destinations, n_list) {
2904 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2906 goto nla_put_failure;
2914 mutex_unlock(&__ip_vs_mutex);
2919 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2920 struct nlattr *nla, int full_entry)
2922 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2923 struct nlattr *nla_addr, *nla_port;
2925 /* Parse mandatory identifying destination fields first */
2927 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2930 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2931 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2933 if (!(nla_addr && nla_port))
2936 memset(udest, 0, sizeof(*udest));
2938 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2939 udest->port = nla_get_u16(nla_port);
2941 /* If a full entry was requested, check for the additional fields */
2943 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2946 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2947 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2948 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2949 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2951 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2954 udest->conn_flags = nla_get_u32(nla_fwd)
2955 & IP_VS_CONN_F_FWD_MASK;
2956 udest->weight = nla_get_u32(nla_weight);
2957 udest->u_threshold = nla_get_u32(nla_u_thresh);
2958 udest->l_threshold = nla_get_u32(nla_l_thresh);
2964 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2965 const char *mcast_ifn, __be32 syncid)
2967 struct nlattr *nl_daemon;
2969 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2973 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2974 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2975 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2977 nla_nest_end(skb, nl_daemon);
2982 nla_nest_cancel(skb, nl_daemon);
2986 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2987 const char *mcast_ifn, __be32 syncid,
2988 struct netlink_callback *cb)
2991 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2992 &ip_vs_genl_family, NLM_F_MULTI,
2993 IPVS_CMD_NEW_DAEMON);
2997 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2998 goto nla_put_failure;
3000 return genlmsg_end(skb, hdr);
3003 genlmsg_cancel(skb, hdr);
3007 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3008 struct netlink_callback *cb)
3010 mutex_lock(&__ip_vs_mutex);
3011 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3012 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3013 ip_vs_master_mcast_ifn,
3014 ip_vs_master_syncid, cb) < 0)
3015 goto nla_put_failure;
3020 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3021 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3022 ip_vs_backup_mcast_ifn,
3023 ip_vs_backup_syncid, cb) < 0)
3024 goto nla_put_failure;
3030 mutex_unlock(&__ip_vs_mutex);
3035 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3037 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3038 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3039 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3042 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3043 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3044 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3047 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3049 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3052 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3055 static int ip_vs_genl_set_config(struct nlattr **attrs)
3057 struct ip_vs_timeout_user t;
3059 __ip_vs_get_timeouts(&t);
3061 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3062 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3064 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3066 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3068 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3069 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3071 return ip_vs_set_timeout(&t);
3074 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3076 struct ip_vs_service *svc = NULL;
3077 struct ip_vs_service_user_kern usvc;
3078 struct ip_vs_dest_user_kern udest;
3080 int need_full_svc = 0, need_full_dest = 0;
3082 cmd = info->genlhdr->cmd;
3084 mutex_lock(&__ip_vs_mutex);
3086 if (cmd == IPVS_CMD_FLUSH) {
3087 ret = ip_vs_flush();
3089 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3090 ret = ip_vs_genl_set_config(info->attrs);
3092 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3093 cmd == IPVS_CMD_DEL_DAEMON) {
3095 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3097 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3098 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3099 info->attrs[IPVS_CMD_ATTR_DAEMON],
3100 ip_vs_daemon_policy)) {
3105 if (cmd == IPVS_CMD_NEW_DAEMON)
3106 ret = ip_vs_genl_new_daemon(daemon_attrs);
3108 ret = ip_vs_genl_del_daemon(daemon_attrs);
3110 } else if (cmd == IPVS_CMD_ZERO &&
3111 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3112 ret = ip_vs_zero_all();
3116 /* All following commands require a service argument, so check if we
3117 * received a valid one. We need a full service specification when
3118 * adding / editing a service. Only identifying members otherwise. */
3119 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3122 ret = ip_vs_genl_parse_service(&usvc,
3123 info->attrs[IPVS_CMD_ATTR_SERVICE],
3124 need_full_svc, &svc);
3128 /* Unless we're adding a new service, the service must already exist */
3129 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3134 /* Destination commands require a valid destination argument. For
3135 * adding / editing a destination, we need a full destination
3137 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3138 cmd == IPVS_CMD_DEL_DEST) {
3139 if (cmd != IPVS_CMD_DEL_DEST)
3142 ret = ip_vs_genl_parse_dest(&udest,
3143 info->attrs[IPVS_CMD_ATTR_DEST],
3150 case IPVS_CMD_NEW_SERVICE:
3152 ret = ip_vs_add_service(&usvc, &svc);
3156 case IPVS_CMD_SET_SERVICE:
3157 ret = ip_vs_edit_service(svc, &usvc);
3159 case IPVS_CMD_DEL_SERVICE:
3160 ret = ip_vs_del_service(svc);
3161 /* do not use svc, it can be freed */
3163 case IPVS_CMD_NEW_DEST:
3164 ret = ip_vs_add_dest(svc, &udest);
3166 case IPVS_CMD_SET_DEST:
3167 ret = ip_vs_edit_dest(svc, &udest);
3169 case IPVS_CMD_DEL_DEST:
3170 ret = ip_vs_del_dest(svc, &udest);
3173 ret = ip_vs_zero_service(svc);
3180 mutex_unlock(&__ip_vs_mutex);
3185 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3187 struct sk_buff *msg;
3189 int ret, cmd, reply_cmd;
3191 cmd = info->genlhdr->cmd;
3193 if (cmd == IPVS_CMD_GET_SERVICE)
3194 reply_cmd = IPVS_CMD_NEW_SERVICE;
3195 else if (cmd == IPVS_CMD_GET_INFO)
3196 reply_cmd = IPVS_CMD_SET_INFO;
3197 else if (cmd == IPVS_CMD_GET_CONFIG)
3198 reply_cmd = IPVS_CMD_SET_CONFIG;
3200 pr_err("unknown Generic Netlink command\n");
3204 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3208 mutex_lock(&__ip_vs_mutex);
3210 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3212 goto nla_put_failure;
3215 case IPVS_CMD_GET_SERVICE:
3217 struct ip_vs_service *svc;
3219 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3224 ret = ip_vs_genl_fill_service(msg, svc);
3226 goto nla_put_failure;
3235 case IPVS_CMD_GET_CONFIG:
3237 struct ip_vs_timeout_user t;
3239 __ip_vs_get_timeouts(&t);
3240 #ifdef CONFIG_IP_VS_PROTO_TCP
3241 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3242 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3245 #ifdef CONFIG_IP_VS_PROTO_UDP
3246 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3252 case IPVS_CMD_GET_INFO:
3253 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3254 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3255 ip_vs_conn_tab_size);
3259 genlmsg_end(msg, reply);
3260 ret = genlmsg_reply(msg, info);
3264 pr_err("not enough space in Netlink message\n");
3270 mutex_unlock(&__ip_vs_mutex);
3276 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3278 .cmd = IPVS_CMD_NEW_SERVICE,
3279 .flags = GENL_ADMIN_PERM,
3280 .policy = ip_vs_cmd_policy,
3281 .doit = ip_vs_genl_set_cmd,
3284 .cmd = IPVS_CMD_SET_SERVICE,
3285 .flags = GENL_ADMIN_PERM,
3286 .policy = ip_vs_cmd_policy,
3287 .doit = ip_vs_genl_set_cmd,
3290 .cmd = IPVS_CMD_DEL_SERVICE,
3291 .flags = GENL_ADMIN_PERM,
3292 .policy = ip_vs_cmd_policy,
3293 .doit = ip_vs_genl_set_cmd,
3296 .cmd = IPVS_CMD_GET_SERVICE,
3297 .flags = GENL_ADMIN_PERM,
3298 .doit = ip_vs_genl_get_cmd,
3299 .dumpit = ip_vs_genl_dump_services,
3300 .policy = ip_vs_cmd_policy,
3303 .cmd = IPVS_CMD_NEW_DEST,
3304 .flags = GENL_ADMIN_PERM,
3305 .policy = ip_vs_cmd_policy,
3306 .doit = ip_vs_genl_set_cmd,
3309 .cmd = IPVS_CMD_SET_DEST,
3310 .flags = GENL_ADMIN_PERM,
3311 .policy = ip_vs_cmd_policy,
3312 .doit = ip_vs_genl_set_cmd,
3315 .cmd = IPVS_CMD_DEL_DEST,
3316 .flags = GENL_ADMIN_PERM,
3317 .policy = ip_vs_cmd_policy,
3318 .doit = ip_vs_genl_set_cmd,
3321 .cmd = IPVS_CMD_GET_DEST,
3322 .flags = GENL_ADMIN_PERM,
3323 .policy = ip_vs_cmd_policy,
3324 .dumpit = ip_vs_genl_dump_dests,
3327 .cmd = IPVS_CMD_NEW_DAEMON,
3328 .flags = GENL_ADMIN_PERM,
3329 .policy = ip_vs_cmd_policy,
3330 .doit = ip_vs_genl_set_cmd,
3333 .cmd = IPVS_CMD_DEL_DAEMON,
3334 .flags = GENL_ADMIN_PERM,
3335 .policy = ip_vs_cmd_policy,
3336 .doit = ip_vs_genl_set_cmd,
3339 .cmd = IPVS_CMD_GET_DAEMON,
3340 .flags = GENL_ADMIN_PERM,
3341 .dumpit = ip_vs_genl_dump_daemons,
3344 .cmd = IPVS_CMD_SET_CONFIG,
3345 .flags = GENL_ADMIN_PERM,
3346 .policy = ip_vs_cmd_policy,
3347 .doit = ip_vs_genl_set_cmd,
3350 .cmd = IPVS_CMD_GET_CONFIG,
3351 .flags = GENL_ADMIN_PERM,
3352 .doit = ip_vs_genl_get_cmd,
3355 .cmd = IPVS_CMD_GET_INFO,
3356 .flags = GENL_ADMIN_PERM,
3357 .doit = ip_vs_genl_get_cmd,
3360 .cmd = IPVS_CMD_ZERO,
3361 .flags = GENL_ADMIN_PERM,
3362 .policy = ip_vs_cmd_policy,
3363 .doit = ip_vs_genl_set_cmd,
3366 .cmd = IPVS_CMD_FLUSH,
3367 .flags = GENL_ADMIN_PERM,
3368 .doit = ip_vs_genl_set_cmd,
3372 static int __init ip_vs_genl_register(void)
3374 return genl_register_family_with_ops(&ip_vs_genl_family,
3375 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3378 static void ip_vs_genl_unregister(void)
3380 genl_unregister_family(&ip_vs_genl_family);
3383 /* End of Generic Netlink interface definitions */
3386 int __init ip_vs_control_init(void)
3393 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3394 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3395 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3396 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3398 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3399 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3403 ret = nf_register_sockopt(&ip_vs_sockopts);
3405 pr_err("cannot register sockopt.\n");
3409 ret = ip_vs_genl_register();
3411 pr_err("cannot register Generic Netlink interface.\n");
3412 nf_unregister_sockopt(&ip_vs_sockopts);
3416 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3417 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3419 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3421 ip_vs_new_estimator(&ip_vs_stats);
3423 /* Hook the defense timer */
3424 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3431 void ip_vs_control_cleanup(void)
3434 ip_vs_trash_cleanup();
3435 cancel_rearming_delayed_work(&defense_work);
3436 cancel_work_sync(&defense_work.work);
3437 ip_vs_kill_estimator(&ip_vs_stats);
3438 unregister_sysctl_table(sysctl_header);
3439 proc_net_remove(&init_net, "ip_vs_stats");
3440 proc_net_remove(&init_net, "ip_vs");
3441 ip_vs_genl_unregister();
3442 nf_unregister_sockopt(&ip_vs_sockopts);