2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
42 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ip6_route.h>
46 #include <net/route.h>
48 #include <net/genetlink.h>
50 #include <asm/uaccess.h>
52 #include <net/ip_vs.h>
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
94 int sysctl_ip_vs_snat_reroute = 1;
95 int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
100 int ip_vs_get_debug_level(void)
102 return sysctl_ip_vs_debug_level;
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
116 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
119 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
127 * update_defense_level is called from keventd and from sysctl,
128 * so it needs to protect itself from softirqs
130 static void update_defense_level(void)
133 static int old_secure_tcp = 0;
138 /* we only count free and buffered memory (in pages) */
140 availmem = i.freeram + i.bufferram;
141 /* however in linux 2.5 the i.bufferram is total page cache size,
143 /* si_swapinfo(&i); */
144 /* availmem = availmem - (i.totalswap - i.freeswap); */
146 nomem = (availmem < sysctl_ip_vs_amemthresh);
151 spin_lock(&__ip_vs_dropentry_lock);
152 switch (sysctl_ip_vs_drop_entry) {
154 atomic_set(&ip_vs_dropentry, 0);
158 atomic_set(&ip_vs_dropentry, 1);
159 sysctl_ip_vs_drop_entry = 2;
161 atomic_set(&ip_vs_dropentry, 0);
166 atomic_set(&ip_vs_dropentry, 1);
168 atomic_set(&ip_vs_dropentry, 0);
169 sysctl_ip_vs_drop_entry = 1;
173 atomic_set(&ip_vs_dropentry, 1);
176 spin_unlock(&__ip_vs_dropentry_lock);
179 spin_lock(&__ip_vs_droppacket_lock);
180 switch (sysctl_ip_vs_drop_packet) {
186 ip_vs_drop_rate = ip_vs_drop_counter
187 = sysctl_ip_vs_amemthresh /
188 (sysctl_ip_vs_amemthresh-availmem);
189 sysctl_ip_vs_drop_packet = 2;
196 ip_vs_drop_rate = ip_vs_drop_counter
197 = sysctl_ip_vs_amemthresh /
198 (sysctl_ip_vs_amemthresh-availmem);
201 sysctl_ip_vs_drop_packet = 1;
205 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
208 spin_unlock(&__ip_vs_droppacket_lock);
211 spin_lock(&ip_vs_securetcp_lock);
212 switch (sysctl_ip_vs_secure_tcp) {
214 if (old_secure_tcp >= 2)
219 if (old_secure_tcp < 2)
221 sysctl_ip_vs_secure_tcp = 2;
223 if (old_secure_tcp >= 2)
229 if (old_secure_tcp < 2)
232 if (old_secure_tcp >= 2)
234 sysctl_ip_vs_secure_tcp = 1;
238 if (old_secure_tcp < 2)
242 old_secure_tcp = sysctl_ip_vs_secure_tcp;
244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
245 spin_unlock(&ip_vs_securetcp_lock);
252 * Timer for checking the defense
254 #define DEFENSE_TIMER_PERIOD 1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
258 static void defense_work_handler(struct work_struct *work)
260 update_defense_level();
261 if (atomic_read(&ip_vs_dropentry))
262 ip_vs_random_dropentry();
264 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
268 ip_vs_use_count_inc(void)
270 return try_module_get(THIS_MODULE);
274 ip_vs_use_count_dec(void)
276 module_put(THIS_MODULE);
281 * Hash table: for virtual service lookups
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
293 * Hash table: for real service lookups
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
299 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
302 * Trash for destinations
304 static LIST_HEAD(ip_vs_dest_trash);
307 * FTP & NULL virtual service counters
309 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
314 * Returns hash value for virtual service
316 static __inline__ unsigned
317 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
320 register unsigned porth = ntohs(port);
321 __be32 addr_fold = addr->ip;
323 #ifdef CONFIG_IP_VS_IPV6
325 addr_fold = addr->ip6[0]^addr->ip6[1]^
326 addr->ip6[2]^addr->ip6[3];
329 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
330 & IP_VS_SVC_TAB_MASK;
334 * Returns hash value of fwmark for virtual service lookup
336 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
338 return fwmark & IP_VS_SVC_TAB_MASK;
342 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343 * or in the ip_vs_svc_fwm_table by fwmark.
344 * Should be called with locked tables.
346 static int ip_vs_svc_hash(struct ip_vs_service *svc)
350 if (svc->flags & IP_VS_SVC_F_HASHED) {
351 pr_err("%s(): request for already hashed, called from %pF\n",
352 __func__, __builtin_return_address(0));
356 if (svc->fwmark == 0) {
358 * Hash it by <protocol,addr,port> in ip_vs_svc_table
360 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
362 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
365 * Hash it by fwmark in ip_vs_svc_fwm_table
367 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
371 svc->flags |= IP_VS_SVC_F_HASHED;
372 /* increase its refcnt because it is referenced by the svc table */
373 atomic_inc(&svc->refcnt);
379 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380 * Should be called with locked tables.
382 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
384 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
385 pr_err("%s(): request for unhash flagged, called from %pF\n",
386 __func__, __builtin_return_address(0));
390 if (svc->fwmark == 0) {
391 /* Remove it from the ip_vs_svc_table table */
392 list_del(&svc->s_list);
394 /* Remove it from the ip_vs_svc_fwm_table table */
395 list_del(&svc->f_list);
398 svc->flags &= ~IP_VS_SVC_F_HASHED;
399 atomic_dec(&svc->refcnt);
405 * Get service by {proto,addr,port} in the service table.
407 static inline struct ip_vs_service *
408 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
412 struct ip_vs_service *svc;
414 /* Check for "full" addressed entries */
415 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
417 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
419 && ip_vs_addr_equal(af, &svc->addr, vaddr)
420 && (svc->port == vport)
421 && (svc->protocol == protocol)) {
432 * Get service by {fwmark} in the service table.
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
438 struct ip_vs_service *svc;
440 /* Check for fwmark addressed entries */
441 hash = ip_vs_svc_fwm_hashkey(fwmark);
443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444 if (svc->fwmark == fwmark && svc->af == af) {
453 struct ip_vs_service *
454 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
455 const union nf_inet_addr *vaddr, __be16 vport)
457 struct ip_vs_service *svc;
459 read_lock(&__ip_vs_svc_lock);
462 * Check the table hashed by fwmark first
464 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
468 * Check the table hashed by <protocol,addr,port>
469 * for "full" addressed entries
471 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
474 && protocol == IPPROTO_TCP
475 && atomic_read(&ip_vs_ftpsvc_counter)
476 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
478 * Check if ftp service entry exists, the packet
479 * might belong to FTP data connections.
481 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
485 && atomic_read(&ip_vs_nullsvc_counter)) {
487 * Check if the catch-all port (port zero) exists
489 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
494 atomic_inc(&svc->usecnt);
495 read_unlock(&__ip_vs_svc_lock);
497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498 fwmark, ip_vs_proto_name(protocol),
499 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500 svc ? "hit" : "not hit");
507 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
509 atomic_inc(&svc->refcnt);
514 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
516 struct ip_vs_service *svc = dest->svc;
519 if (atomic_dec_and_test(&svc->refcnt)) {
520 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
522 IP_VS_DBG_ADDR(svc->af, &svc->addr),
523 ntohs(svc->port), atomic_read(&svc->usecnt));
530 * Returns hash value for real service
532 static inline unsigned ip_vs_rs_hashkey(int af,
533 const union nf_inet_addr *addr,
536 register unsigned porth = ntohs(port);
537 __be32 addr_fold = addr->ip;
539 #ifdef CONFIG_IP_VS_IPV6
541 addr_fold = addr->ip6[0]^addr->ip6[1]^
542 addr->ip6[2]^addr->ip6[3];
545 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
550 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
551 * should be called with locked tables.
553 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
557 if (!list_empty(&dest->d_list)) {
562 * Hash by proto,addr,port,
563 * which are the parameters of the real service.
565 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
567 list_add(&dest->d_list, &ip_vs_rtable[hash]);
573 * UNhashes ip_vs_dest from ip_vs_rtable.
574 * should be called with locked tables.
576 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
579 * Remove it from the ip_vs_rtable table.
581 if (!list_empty(&dest->d_list)) {
582 list_del(&dest->d_list);
583 INIT_LIST_HEAD(&dest->d_list);
590 * Lookup real service by <proto,addr,port> in the real service table.
593 ip_vs_lookup_real_service(int af, __u16 protocol,
594 const union nf_inet_addr *daddr,
598 struct ip_vs_dest *dest;
601 * Check for "full" addressed entries
602 * Return the first found entry
604 hash = ip_vs_rs_hashkey(af, daddr, dport);
606 read_lock(&__ip_vs_rs_lock);
607 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
609 && ip_vs_addr_equal(af, &dest->addr, daddr)
610 && (dest->port == dport)
611 && ((dest->protocol == protocol) ||
614 read_unlock(&__ip_vs_rs_lock);
618 read_unlock(&__ip_vs_rs_lock);
624 * Lookup destination by {addr,port} in the given service
626 static struct ip_vs_dest *
627 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
630 struct ip_vs_dest *dest;
633 * Find the destination for the given service
635 list_for_each_entry(dest, &svc->destinations, n_list) {
636 if ((dest->af == svc->af)
637 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
638 && (dest->port == dport)) {
648 * Find destination by {daddr,dport,vaddr,protocol}
649 * Cretaed to be used in ip_vs_process_message() in
650 * the backup synchronization daemon. It finds the
651 * destination to be bound to the received connection
654 * ip_vs_lookup_real_service() looked promissing, but
655 * seems not working as expected.
657 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
659 const union nf_inet_addr *vaddr,
660 __be16 vport, __u16 protocol, __u32 fwmark)
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
665 svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
689 struct ip_vs_dest *dest, *nxt;
692 * Find the destination in trash
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708 dest->vport == svc->port))) {
714 * Try to purge the destination from trash if not referenced
716 if (atomic_read(&dest->refcnt) == 1) {
717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
742 static void ip_vs_trash_cleanup(void)
744 struct ip_vs_dest *dest, *nxt;
746 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747 list_del(&dest->n_list);
748 ip_vs_dst_reset(dest);
749 __ip_vs_unbind_svc(dest);
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
758 spin_lock_bh(&stats->lock);
760 memset(&stats->ustats, 0, sizeof(stats->ustats));
761 ip_vs_zero_estimator(stats);
763 spin_unlock_bh(&stats->lock);
767 * Update a destination in the given service
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add)
775 /* set the weight and the flags */
776 atomic_set(&dest->weight, udest->weight);
777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778 conn_flags |= IP_VS_CONN_F_INACTIVE;
780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
785 * Put the real service in ip_vs_rtable if not present.
786 * For now only for NAT!
788 write_lock_bh(&__ip_vs_rs_lock);
790 write_unlock_bh(&__ip_vs_rs_lock);
792 atomic_set(&dest->conn_flags, conn_flags);
794 /* bind the service */
796 __ip_vs_bind_svc(dest, svc);
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
805 /* set the dest status flags */
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
818 ip_vs_new_estimator(&dest->stats);
820 write_lock_bh(&__ip_vs_svc_lock);
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
826 list_add(&dest->n_list, &svc->destinations);
830 /* call the update_service, because server weight may be changed */
831 if (svc->scheduler->update_service)
832 svc->scheduler->update_service(svc);
834 write_unlock_bh(&__ip_vs_svc_lock);
839 * Create a destination for the given service
842 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 struct ip_vs_dest **dest_p)
845 struct ip_vs_dest *dest;
850 #ifdef CONFIG_IP_VS_IPV6
851 if (svc->af == AF_INET6) {
852 atype = ipv6_addr_type(&udest->addr.in6);
853 if ((!(atype & IPV6_ADDR_UNICAST) ||
854 atype & IPV6_ADDR_LINKLOCAL) &&
855 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
860 atype = inet_addr_type(&init_net, udest->addr.ip);
861 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
865 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
867 pr_err("%s(): no memory.\n", __func__);
872 dest->protocol = svc->protocol;
873 dest->vaddr = svc->addr;
874 dest->vport = svc->port;
875 dest->vfwmark = svc->fwmark;
876 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877 dest->port = udest->port;
879 atomic_set(&dest->activeconns, 0);
880 atomic_set(&dest->inactconns, 0);
881 atomic_set(&dest->persistconns, 0);
882 atomic_set(&dest->refcnt, 1);
884 INIT_LIST_HEAD(&dest->d_list);
885 spin_lock_init(&dest->dst_lock);
886 spin_lock_init(&dest->stats.lock);
887 __ip_vs_update_dest(svc, dest, udest, 1);
897 * Add a destination into an existing service
900 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
902 struct ip_vs_dest *dest;
903 union nf_inet_addr daddr;
904 __be16 dport = udest->port;
909 if (udest->weight < 0) {
910 pr_err("%s(): server weight less than zero\n", __func__);
914 if (udest->l_threshold > udest->u_threshold) {
915 pr_err("%s(): lower threshold is higher than upper threshold\n",
920 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
923 * Check if the dest already exists in the list
925 dest = ip_vs_lookup_dest(svc, &daddr, dport);
928 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
933 * Check if the dest already exists in the trash and
934 * is from the same service
936 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
939 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
940 "dest->refcnt=%d, service %u/%s:%u\n",
941 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
942 atomic_read(&dest->refcnt),
944 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
948 * Get the destination from the trash
950 list_del(&dest->n_list);
952 __ip_vs_update_dest(svc, dest, udest, 1);
956 * Allocate and initialize the dest structure
958 ret = ip_vs_new_dest(svc, udest, &dest);
967 * Edit a destination in the given service
970 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
972 struct ip_vs_dest *dest;
973 union nf_inet_addr daddr;
974 __be16 dport = udest->port;
978 if (udest->weight < 0) {
979 pr_err("%s(): server weight less than zero\n", __func__);
983 if (udest->l_threshold > udest->u_threshold) {
984 pr_err("%s(): lower threshold is higher than upper threshold\n",
989 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
992 * Lookup the destination list
994 dest = ip_vs_lookup_dest(svc, &daddr, dport);
997 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1001 __ip_vs_update_dest(svc, dest, udest, 0);
1009 * Delete a destination (must be already unlinked from the service)
1011 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1013 ip_vs_kill_estimator(&dest->stats);
1016 * Remove it from the d-linked list with the real services.
1018 write_lock_bh(&__ip_vs_rs_lock);
1019 ip_vs_rs_unhash(dest);
1020 write_unlock_bh(&__ip_vs_rs_lock);
1023 * Decrease the refcnt of the dest, and free the dest
1024 * if nobody refers to it (refcnt=0). Otherwise, throw
1025 * the destination into the trash.
1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ip_vs_dst_reset(dest);
1033 /* simply decrease svc->refcnt here, let the caller check
1034 and release the service if nobody refers to it.
1035 Only user context can release destination and service,
1036 and only one user context can update virtual service at a
1037 time, so the operation here is OK */
1038 atomic_dec(&dest->svc->refcnt);
1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 atomic_read(&dest->refcnt));
1046 list_add(&dest->n_list, &ip_vs_dest_trash);
1047 atomic_inc(&dest->refcnt);
1053 * Unlink a destination from the given service
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062 * Remove it from the d-linked destination list.
1064 list_del(&dest->n_list);
1068 * Call the update_service function of its scheduler
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
1076 * Delete a destination server in the given service
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1081 struct ip_vs_dest *dest;
1082 __be16 dport = udest->port;
1086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1093 write_lock_bh(&__ip_vs_svc_lock);
1096 * Wait until all other svc users go away.
1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1101 * Unlink dest from the service
1103 __ip_vs_unlink_dest(svc, dest, 1);
1105 write_unlock_bh(&__ip_vs_svc_lock);
1108 * Delete the destination
1110 __ip_vs_del_dest(dest);
1119 * Add a service into the service hash table
1122 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1123 struct ip_vs_service **svc_p)
1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1128 struct ip_vs_service *svc = NULL;
1130 /* increase the module use count */
1131 ip_vs_use_count_inc();
1133 /* Lookup the scheduler by 'u->sched_name' */
1134 sched = ip_vs_scheduler_get(u->sched_name);
1135 if (sched == NULL) {
1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_getbyname(u->pe_name);
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1151 #ifdef CONFIG_IP_VS_IPV6
1152 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1165 /* I'm the first user of the service */
1166 atomic_set(&svc->usecnt, 0);
1167 atomic_set(&svc->refcnt, 0);
1170 svc->protocol = u->protocol;
1171 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1172 svc->port = u->port;
1173 svc->fwmark = u->fwmark;
1174 svc->flags = u->flags;
1175 svc->timeout = u->timeout * HZ;
1176 svc->netmask = u->netmask;
1178 INIT_LIST_HEAD(&svc->destinations);
1179 rwlock_init(&svc->sched_lock);
1180 spin_lock_init(&svc->stats.lock);
1182 /* Bind the scheduler */
1183 ret = ip_vs_bind_scheduler(svc, sched);
1188 /* Bind the ct retriever */
1189 ip_vs_bind_pe(svc, pe);
1192 /* Update the virtual service counters */
1193 if (svc->port == FTPPORT)
1194 atomic_inc(&ip_vs_ftpsvc_counter);
1195 else if (svc->port == 0)
1196 atomic_inc(&ip_vs_nullsvc_counter);
1198 ip_vs_new_estimator(&svc->stats);
1200 /* Count only IPv4 services for old get/setsockopt interface */
1201 if (svc->af == AF_INET)
1202 ip_vs_num_services++;
1204 /* Hash the service into the service table */
1205 write_lock_bh(&__ip_vs_svc_lock);
1206 ip_vs_svc_hash(svc);
1207 write_unlock_bh(&__ip_vs_svc_lock);
1214 ip_vs_unbind_scheduler(svc);
1217 ip_vs_app_inc_put(svc->inc);
1222 ip_vs_scheduler_put(sched);
1225 /* decrease the module use count */
1226 ip_vs_use_count_dec();
1233 * Edit a service and bind it with a new scheduler
1236 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1238 struct ip_vs_scheduler *sched, *old_sched;
1239 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1243 * Lookup the scheduler, by 'u->sched_name'
1245 sched = ip_vs_scheduler_get(u->sched_name);
1246 if (sched == NULL) {
1247 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1252 if (u->pe_name && *u->pe_name) {
1253 pe = ip_vs_pe_getbyname(u->pe_name);
1255 pr_info("persistence engine module ip_vs_pe_%s "
1256 "not found\n", u->pe_name);
1263 #ifdef CONFIG_IP_VS_IPV6
1264 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1270 write_lock_bh(&__ip_vs_svc_lock);
1273 * Wait until all other svc users go away.
1275 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1278 * Set the flags and timeout value
1280 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1281 svc->timeout = u->timeout * HZ;
1282 svc->netmask = u->netmask;
1284 old_sched = svc->scheduler;
1285 if (sched != old_sched) {
1287 * Unbind the old scheduler
1289 if ((ret = ip_vs_unbind_scheduler(svc))) {
1295 * Bind the new scheduler
1297 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1299 * If ip_vs_bind_scheduler fails, restore the old
1301 * The main reason of failure is out of memory.
1303 * The question is if the old scheduler can be
1304 * restored all the time. TODO: if it cannot be
1305 * restored some time, we must delete the service,
1306 * otherwise the system may crash.
1308 ip_vs_bind_scheduler(svc, old_sched);
1316 ip_vs_unbind_pe(svc);
1317 ip_vs_bind_pe(svc, pe);
1321 write_unlock_bh(&__ip_vs_svc_lock);
1323 ip_vs_scheduler_put(old_sched);
1324 ip_vs_pe_put(old_pe);
1330 * Delete a service from the service list
1331 * - The service must be unlinked, unlocked and not referenced!
1332 * - We are called under _bh lock
1334 static void __ip_vs_del_service(struct ip_vs_service *svc)
1336 struct ip_vs_dest *dest, *nxt;
1337 struct ip_vs_scheduler *old_sched;
1338 struct ip_vs_pe *old_pe;
1340 pr_info("%s: enter\n", __func__);
1342 /* Count only IPv4 services for old get/setsockopt interface */
1343 if (svc->af == AF_INET)
1344 ip_vs_num_services--;
1346 ip_vs_kill_estimator(&svc->stats);
1348 /* Unbind scheduler */
1349 old_sched = svc->scheduler;
1350 ip_vs_unbind_scheduler(svc);
1351 ip_vs_scheduler_put(old_sched);
1353 /* Unbind persistence engine */
1355 ip_vs_unbind_pe(svc);
1356 ip_vs_pe_put(old_pe);
1358 /* Unbind app inc */
1360 ip_vs_app_inc_put(svc->inc);
1365 * Unlink the whole destination list
1367 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1368 __ip_vs_unlink_dest(svc, dest, 0);
1369 __ip_vs_del_dest(dest);
1373 * Update the virtual service counters
1375 if (svc->port == FTPPORT)
1376 atomic_dec(&ip_vs_ftpsvc_counter);
1377 else if (svc->port == 0)
1378 atomic_dec(&ip_vs_nullsvc_counter);
1381 * Free the service if nobody refers to it
1383 if (atomic_read(&svc->refcnt) == 0) {
1384 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1386 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387 ntohs(svc->port), atomic_read(&svc->usecnt));
1391 /* decrease the module use count */
1392 ip_vs_use_count_dec();
1396 * Unlink a service from list and try to delete it if its refcnt reached 0
1398 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1401 * Unhash it from the service table
1403 write_lock_bh(&__ip_vs_svc_lock);
1405 ip_vs_svc_unhash(svc);
1408 * Wait until all the svc users go away.
1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1412 __ip_vs_del_service(svc);
1414 write_unlock_bh(&__ip_vs_svc_lock);
1418 * Delete a service from the service list
1420 static int ip_vs_del_service(struct ip_vs_service *svc)
1424 ip_vs_unlink_service(svc);
1431 * Flush all the virtual services
1433 static int ip_vs_flush(void)
1436 struct ip_vs_service *svc, *nxt;
1439 * Flush the service table hashed by <protocol,addr,port>
1441 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1442 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1443 ip_vs_unlink_service(svc);
1448 * Flush the service table hashed by fwmark
1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1451 list_for_each_entry_safe(svc, nxt,
1452 &ip_vs_svc_fwm_table[idx], f_list) {
1453 ip_vs_unlink_service(svc);
1462 * Zero counters in a service or all services
1464 static int ip_vs_zero_service(struct ip_vs_service *svc)
1466 struct ip_vs_dest *dest;
1468 write_lock_bh(&__ip_vs_svc_lock);
1469 list_for_each_entry(dest, &svc->destinations, n_list) {
1470 ip_vs_zero_stats(&dest->stats);
1472 ip_vs_zero_stats(&svc->stats);
1473 write_unlock_bh(&__ip_vs_svc_lock);
1477 static int ip_vs_zero_all(void)
1480 struct ip_vs_service *svc;
1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1484 ip_vs_zero_service(svc);
1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1490 ip_vs_zero_service(svc);
1494 ip_vs_zero_stats(&ip_vs_stats);
1500 proc_do_defense_mode(ctl_table *table, int write,
1501 void __user *buffer, size_t *lenp, loff_t *ppos)
1503 int *valp = table->data;
1507 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1508 if (write && (*valp != val)) {
1509 if ((*valp < 0) || (*valp > 3)) {
1510 /* Restore the correct value */
1513 update_defense_level();
1521 proc_do_sync_threshold(ctl_table *table, int write,
1522 void __user *buffer, size_t *lenp, loff_t *ppos)
1524 int *valp = table->data;
1528 /* backup the value first */
1529 memcpy(val, valp, sizeof(val));
1531 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1532 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1533 /* Restore the correct value */
1534 memcpy(valp, val, sizeof(val));
1540 proc_do_sync_mode(ctl_table *table, int write,
1541 void __user *buffer, size_t *lenp, loff_t *ppos)
1543 int *valp = table->data;
1547 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1548 if (write && (*valp != val)) {
1549 if ((*valp < 0) || (*valp > 1)) {
1550 /* Restore the correct value */
1553 ip_vs_sync_switch_mode(val);
1560 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1563 static struct ctl_table vs_vars[] = {
1565 .procname = "amemthresh",
1566 .data = &sysctl_ip_vs_amemthresh,
1567 .maxlen = sizeof(int),
1569 .proc_handler = proc_dointvec,
1571 #ifdef CONFIG_IP_VS_DEBUG
1573 .procname = "debug_level",
1574 .data = &sysctl_ip_vs_debug_level,
1575 .maxlen = sizeof(int),
1577 .proc_handler = proc_dointvec,
1581 .procname = "am_droprate",
1582 .data = &sysctl_ip_vs_am_droprate,
1583 .maxlen = sizeof(int),
1585 .proc_handler = proc_dointvec,
1588 .procname = "drop_entry",
1589 .data = &sysctl_ip_vs_drop_entry,
1590 .maxlen = sizeof(int),
1592 .proc_handler = proc_do_defense_mode,
1595 .procname = "drop_packet",
1596 .data = &sysctl_ip_vs_drop_packet,
1597 .maxlen = sizeof(int),
1599 .proc_handler = proc_do_defense_mode,
1601 #ifdef CONFIG_IP_VS_NFCT
1603 .procname = "conntrack",
1604 .data = &sysctl_ip_vs_conntrack,
1605 .maxlen = sizeof(int),
1607 .proc_handler = &proc_dointvec,
1611 .procname = "secure_tcp",
1612 .data = &sysctl_ip_vs_secure_tcp,
1613 .maxlen = sizeof(int),
1615 .proc_handler = proc_do_defense_mode,
1618 .procname = "snat_reroute",
1619 .data = &sysctl_ip_vs_snat_reroute,
1620 .maxlen = sizeof(int),
1622 .proc_handler = &proc_dointvec,
1625 .procname = "sync_version",
1626 .data = &sysctl_ip_vs_sync_ver,
1627 .maxlen = sizeof(int),
1629 .proc_handler = &proc_do_sync_mode,
1633 .procname = "timeout_established",
1634 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1635 .maxlen = sizeof(int),
1637 .proc_handler = proc_dointvec_jiffies,
1640 .procname = "timeout_synsent",
1641 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1642 .maxlen = sizeof(int),
1644 .proc_handler = proc_dointvec_jiffies,
1647 .procname = "timeout_synrecv",
1648 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1649 .maxlen = sizeof(int),
1651 .proc_handler = proc_dointvec_jiffies,
1654 .procname = "timeout_finwait",
1655 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1656 .maxlen = sizeof(int),
1658 .proc_handler = proc_dointvec_jiffies,
1661 .procname = "timeout_timewait",
1662 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1663 .maxlen = sizeof(int),
1665 .proc_handler = proc_dointvec_jiffies,
1668 .procname = "timeout_close",
1669 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1670 .maxlen = sizeof(int),
1672 .proc_handler = proc_dointvec_jiffies,
1675 .procname = "timeout_closewait",
1676 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1677 .maxlen = sizeof(int),
1679 .proc_handler = proc_dointvec_jiffies,
1682 .procname = "timeout_lastack",
1683 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1684 .maxlen = sizeof(int),
1686 .proc_handler = proc_dointvec_jiffies,
1689 .procname = "timeout_listen",
1690 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1691 .maxlen = sizeof(int),
1693 .proc_handler = proc_dointvec_jiffies,
1696 .procname = "timeout_synack",
1697 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1698 .maxlen = sizeof(int),
1700 .proc_handler = proc_dointvec_jiffies,
1703 .procname = "timeout_udp",
1704 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1705 .maxlen = sizeof(int),
1707 .proc_handler = proc_dointvec_jiffies,
1710 .procname = "timeout_icmp",
1711 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1712 .maxlen = sizeof(int),
1714 .proc_handler = proc_dointvec_jiffies,
1718 .procname = "cache_bypass",
1719 .data = &sysctl_ip_vs_cache_bypass,
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec,
1725 .procname = "expire_nodest_conn",
1726 .data = &sysctl_ip_vs_expire_nodest_conn,
1727 .maxlen = sizeof(int),
1729 .proc_handler = proc_dointvec,
1732 .procname = "expire_quiescent_template",
1733 .data = &sysctl_ip_vs_expire_quiescent_template,
1734 .maxlen = sizeof(int),
1736 .proc_handler = proc_dointvec,
1739 .procname = "sync_threshold",
1740 .data = &sysctl_ip_vs_sync_threshold,
1741 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1743 .proc_handler = proc_do_sync_threshold,
1746 .procname = "nat_icmp_send",
1747 .data = &sysctl_ip_vs_nat_icmp_send,
1748 .maxlen = sizeof(int),
1750 .proc_handler = proc_dointvec,
1755 const struct ctl_path net_vs_ctl_path[] = {
1756 { .procname = "net", },
1757 { .procname = "ipv4", },
1758 { .procname = "vs", },
1761 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1763 static struct ctl_table_header * sysctl_header;
1765 #ifdef CONFIG_PROC_FS
1768 struct list_head *table;
1773 * Write the contents of the VS rule table to a PROCfs file.
1774 * (It is kept just for backward compatibility)
1776 static inline const char *ip_vs_fwd_name(unsigned flags)
1778 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1779 case IP_VS_CONN_F_LOCALNODE:
1781 case IP_VS_CONN_F_TUNNEL:
1783 case IP_VS_CONN_F_DROUTE:
1791 /* Get the Nth entry in the two lists */
1792 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1794 struct ip_vs_iter *iter = seq->private;
1796 struct ip_vs_service *svc;
1798 /* look in hash by protocol */
1799 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1800 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1802 iter->table = ip_vs_svc_table;
1809 /* keep looking in fwmark */
1810 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1811 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1813 iter->table = ip_vs_svc_fwm_table;
1823 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1824 __acquires(__ip_vs_svc_lock)
1827 read_lock_bh(&__ip_vs_svc_lock);
1828 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1832 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1834 struct list_head *e;
1835 struct ip_vs_iter *iter;
1836 struct ip_vs_service *svc;
1839 if (v == SEQ_START_TOKEN)
1840 return ip_vs_info_array(seq,0);
1843 iter = seq->private;
1845 if (iter->table == ip_vs_svc_table) {
1846 /* next service in table hashed by protocol */
1847 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1848 return list_entry(e, struct ip_vs_service, s_list);
1851 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1852 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1858 iter->table = ip_vs_svc_fwm_table;
1863 /* next service in hashed by fwmark */
1864 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1865 return list_entry(e, struct ip_vs_service, f_list);
1868 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1869 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1877 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1878 __releases(__ip_vs_svc_lock)
1880 read_unlock_bh(&__ip_vs_svc_lock);
1884 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1886 if (v == SEQ_START_TOKEN) {
1888 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1889 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1891 "Prot LocalAddress:Port Scheduler Flags\n");
1893 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1895 const struct ip_vs_service *svc = v;
1896 const struct ip_vs_iter *iter = seq->private;
1897 const struct ip_vs_dest *dest;
1899 if (iter->table == ip_vs_svc_table) {
1900 #ifdef CONFIG_IP_VS_IPV6
1901 if (svc->af == AF_INET6)
1902 seq_printf(seq, "%s [%pI6]:%04X %s ",
1903 ip_vs_proto_name(svc->protocol),
1906 svc->scheduler->name);
1909 seq_printf(seq, "%s %08X:%04X %s %s ",
1910 ip_vs_proto_name(svc->protocol),
1911 ntohl(svc->addr.ip),
1913 svc->scheduler->name,
1914 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1916 seq_printf(seq, "FWM %08X %s %s",
1917 svc->fwmark, svc->scheduler->name,
1918 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1921 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1922 seq_printf(seq, "persistent %d %08X\n",
1924 ntohl(svc->netmask));
1926 seq_putc(seq, '\n');
1928 list_for_each_entry(dest, &svc->destinations, n_list) {
1929 #ifdef CONFIG_IP_VS_IPV6
1930 if (dest->af == AF_INET6)
1933 " %-7s %-6d %-10d %-10d\n",
1936 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1937 atomic_read(&dest->weight),
1938 atomic_read(&dest->activeconns),
1939 atomic_read(&dest->inactconns));
1944 "%-7s %-6d %-10d %-10d\n",
1945 ntohl(dest->addr.ip),
1947 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1948 atomic_read(&dest->weight),
1949 atomic_read(&dest->activeconns),
1950 atomic_read(&dest->inactconns));
1957 static const struct seq_operations ip_vs_info_seq_ops = {
1958 .start = ip_vs_info_seq_start,
1959 .next = ip_vs_info_seq_next,
1960 .stop = ip_vs_info_seq_stop,
1961 .show = ip_vs_info_seq_show,
1964 static int ip_vs_info_open(struct inode *inode, struct file *file)
1966 return seq_open_private(file, &ip_vs_info_seq_ops,
1967 sizeof(struct ip_vs_iter));
1970 static const struct file_operations ip_vs_info_fops = {
1971 .owner = THIS_MODULE,
1972 .open = ip_vs_info_open,
1974 .llseek = seq_lseek,
1975 .release = seq_release_private,
1980 struct ip_vs_stats ip_vs_stats = {
1981 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1984 #ifdef CONFIG_PROC_FS
1985 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1988 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1990 " Total Incoming Outgoing Incoming Outgoing\n");
1992 " Conns Packets Packets Bytes Bytes\n");
1994 spin_lock_bh(&ip_vs_stats.lock);
1995 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1996 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1997 (unsigned long long) ip_vs_stats.ustats.inbytes,
1998 (unsigned long long) ip_vs_stats.ustats.outbytes);
2000 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2002 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2003 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
2004 ip_vs_stats.ustats.cps,
2005 ip_vs_stats.ustats.inpps,
2006 ip_vs_stats.ustats.outpps,
2007 ip_vs_stats.ustats.inbps,
2008 ip_vs_stats.ustats.outbps);
2009 spin_unlock_bh(&ip_vs_stats.lock);
2014 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2016 return single_open(file, ip_vs_stats_show, NULL);
2019 static const struct file_operations ip_vs_stats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = ip_vs_stats_seq_open,
2023 .llseek = seq_lseek,
2024 .release = single_release,
2030 * Set timeout values for tcp tcpfin udp in the timeout_table.
2032 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2034 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2039 #ifdef CONFIG_IP_VS_PROTO_TCP
2040 if (u->tcp_timeout) {
2041 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2042 = u->tcp_timeout * HZ;
2045 if (u->tcp_fin_timeout) {
2046 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2047 = u->tcp_fin_timeout * HZ;
2051 #ifdef CONFIG_IP_VS_PROTO_UDP
2052 if (u->udp_timeout) {
2053 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2054 = u->udp_timeout * HZ;
2061 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2062 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2063 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2064 sizeof(struct ip_vs_dest_user))
2065 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2066 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2067 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2069 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2070 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2071 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2072 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2073 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2074 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2075 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2076 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2077 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2078 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2079 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2080 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2083 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2084 struct ip_vs_service_user *usvc_compat)
2086 memset(usvc, 0, sizeof(*usvc));
2089 usvc->protocol = usvc_compat->protocol;
2090 usvc->addr.ip = usvc_compat->addr;
2091 usvc->port = usvc_compat->port;
2092 usvc->fwmark = usvc_compat->fwmark;
2094 /* Deep copy of sched_name is not needed here */
2095 usvc->sched_name = usvc_compat->sched_name;
2097 usvc->flags = usvc_compat->flags;
2098 usvc->timeout = usvc_compat->timeout;
2099 usvc->netmask = usvc_compat->netmask;
2102 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2103 struct ip_vs_dest_user *udest_compat)
2105 memset(udest, 0, sizeof(*udest));
2107 udest->addr.ip = udest_compat->addr;
2108 udest->port = udest_compat->port;
2109 udest->conn_flags = udest_compat->conn_flags;
2110 udest->weight = udest_compat->weight;
2111 udest->u_threshold = udest_compat->u_threshold;
2112 udest->l_threshold = udest_compat->l_threshold;
2116 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2119 unsigned char arg[MAX_ARG_LEN];
2120 struct ip_vs_service_user *usvc_compat;
2121 struct ip_vs_service_user_kern usvc;
2122 struct ip_vs_service *svc;
2123 struct ip_vs_dest_user *udest_compat;
2124 struct ip_vs_dest_user_kern udest;
2126 if (!capable(CAP_NET_ADMIN))
2129 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2131 if (len < 0 || len > MAX_ARG_LEN)
2133 if (len != set_arglen[SET_CMDID(cmd)]) {
2134 pr_err("set_ctl: len %u != %u\n",
2135 len, set_arglen[SET_CMDID(cmd)]);
2139 if (copy_from_user(arg, user, len) != 0)
2142 /* increase the module use count */
2143 ip_vs_use_count_inc();
2145 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2150 if (cmd == IP_VS_SO_SET_FLUSH) {
2151 /* Flush the virtual service */
2152 ret = ip_vs_flush();
2154 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2155 /* Set timeout values for (tcp tcpfin udp) */
2156 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2158 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2159 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2160 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2162 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2163 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2164 ret = stop_sync_thread(dm->state);
2168 usvc_compat = (struct ip_vs_service_user *)arg;
2169 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2171 /* We only use the new structs internally, so copy userspace compat
2172 * structs to extended internal versions */
2173 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2174 ip_vs_copy_udest_compat(&udest, udest_compat);
2176 if (cmd == IP_VS_SO_SET_ZERO) {
2177 /* if no service address is set, zero counters in all */
2178 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2179 ret = ip_vs_zero_all();
2184 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2185 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2186 usvc.protocol != IPPROTO_SCTP) {
2187 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2188 usvc.protocol, &usvc.addr.ip,
2189 ntohs(usvc.port), usvc.sched_name);
2194 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2195 if (usvc.fwmark == 0)
2196 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2197 &usvc.addr, usvc.port);
2199 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2201 if (cmd != IP_VS_SO_SET_ADD
2202 && (svc == NULL || svc->protocol != usvc.protocol)) {
2208 case IP_VS_SO_SET_ADD:
2212 ret = ip_vs_add_service(&usvc, &svc);
2214 case IP_VS_SO_SET_EDIT:
2215 ret = ip_vs_edit_service(svc, &usvc);
2217 case IP_VS_SO_SET_DEL:
2218 ret = ip_vs_del_service(svc);
2222 case IP_VS_SO_SET_ZERO:
2223 ret = ip_vs_zero_service(svc);
2225 case IP_VS_SO_SET_ADDDEST:
2226 ret = ip_vs_add_dest(svc, &udest);
2228 case IP_VS_SO_SET_EDITDEST:
2229 ret = ip_vs_edit_dest(svc, &udest);
2231 case IP_VS_SO_SET_DELDEST:
2232 ret = ip_vs_del_dest(svc, &udest);
2239 mutex_unlock(&__ip_vs_mutex);
2241 /* decrease the module use count */
2242 ip_vs_use_count_dec();
2249 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2251 spin_lock_bh(&src->lock);
2252 memcpy(dst, &src->ustats, sizeof(*dst));
2253 spin_unlock_bh(&src->lock);
2257 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2259 dst->protocol = src->protocol;
2260 dst->addr = src->addr.ip;
2261 dst->port = src->port;
2262 dst->fwmark = src->fwmark;
2263 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2264 dst->flags = src->flags;
2265 dst->timeout = src->timeout / HZ;
2266 dst->netmask = src->netmask;
2267 dst->num_dests = src->num_dests;
2268 ip_vs_copy_stats(&dst->stats, &src->stats);
2272 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2273 struct ip_vs_get_services __user *uptr)
2276 struct ip_vs_service *svc;
2277 struct ip_vs_service_entry entry;
2280 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2281 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2282 /* Only expose IPv4 entries to old interface */
2283 if (svc->af != AF_INET)
2286 if (count >= get->num_services)
2288 memset(&entry, 0, sizeof(entry));
2289 ip_vs_copy_service(&entry, svc);
2290 if (copy_to_user(&uptr->entrytable[count],
2291 &entry, sizeof(entry))) {
2299 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2300 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2301 /* Only expose IPv4 entries to old interface */
2302 if (svc->af != AF_INET)
2305 if (count >= get->num_services)
2307 memset(&entry, 0, sizeof(entry));
2308 ip_vs_copy_service(&entry, svc);
2309 if (copy_to_user(&uptr->entrytable[count],
2310 &entry, sizeof(entry))) {
2322 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2323 struct ip_vs_get_dests __user *uptr)
2325 struct ip_vs_service *svc;
2326 union nf_inet_addr addr = { .ip = get->addr };
2330 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2332 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2337 struct ip_vs_dest *dest;
2338 struct ip_vs_dest_entry entry;
2340 list_for_each_entry(dest, &svc->destinations, n_list) {
2341 if (count >= get->num_dests)
2344 entry.addr = dest->addr.ip;
2345 entry.port = dest->port;
2346 entry.conn_flags = atomic_read(&dest->conn_flags);
2347 entry.weight = atomic_read(&dest->weight);
2348 entry.u_threshold = dest->u_threshold;
2349 entry.l_threshold = dest->l_threshold;
2350 entry.activeconns = atomic_read(&dest->activeconns);
2351 entry.inactconns = atomic_read(&dest->inactconns);
2352 entry.persistconns = atomic_read(&dest->persistconns);
2353 ip_vs_copy_stats(&entry.stats, &dest->stats);
2354 if (copy_to_user(&uptr->entrytable[count],
2355 &entry, sizeof(entry))) {
2367 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2369 #ifdef CONFIG_IP_VS_PROTO_TCP
2371 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2372 u->tcp_fin_timeout =
2373 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2375 #ifdef CONFIG_IP_VS_PROTO_UDP
2377 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2382 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2383 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2384 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2385 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2386 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2387 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2388 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2390 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2391 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2392 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2393 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2394 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2395 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2396 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2397 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2401 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2403 unsigned char arg[128];
2405 unsigned int copylen;
2407 if (!capable(CAP_NET_ADMIN))
2410 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2413 if (*len < get_arglen[GET_CMDID(cmd)]) {
2414 pr_err("get_ctl: len %u < %u\n",
2415 *len, get_arglen[GET_CMDID(cmd)]);
2419 copylen = get_arglen[GET_CMDID(cmd)];
2423 if (copy_from_user(arg, user, copylen) != 0)
2426 if (mutex_lock_interruptible(&__ip_vs_mutex))
2427 return -ERESTARTSYS;
2430 case IP_VS_SO_GET_VERSION:
2434 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2435 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2436 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2440 *len = strlen(buf)+1;
2444 case IP_VS_SO_GET_INFO:
2446 struct ip_vs_getinfo info;
2447 info.version = IP_VS_VERSION_CODE;
2448 info.size = ip_vs_conn_tab_size;
2449 info.num_services = ip_vs_num_services;
2450 if (copy_to_user(user, &info, sizeof(info)) != 0)
2455 case IP_VS_SO_GET_SERVICES:
2457 struct ip_vs_get_services *get;
2460 get = (struct ip_vs_get_services *)arg;
2461 size = sizeof(*get) +
2462 sizeof(struct ip_vs_service_entry) * get->num_services;
2464 pr_err("length: %u != %u\n", *len, size);
2468 ret = __ip_vs_get_service_entries(get, user);
2472 case IP_VS_SO_GET_SERVICE:
2474 struct ip_vs_service_entry *entry;
2475 struct ip_vs_service *svc;
2476 union nf_inet_addr addr;
2478 entry = (struct ip_vs_service_entry *)arg;
2479 addr.ip = entry->addr;
2481 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2483 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2484 &addr, entry->port);
2486 ip_vs_copy_service(entry, svc);
2487 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2494 case IP_VS_SO_GET_DESTS:
2496 struct ip_vs_get_dests *get;
2499 get = (struct ip_vs_get_dests *)arg;
2500 size = sizeof(*get) +
2501 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2503 pr_err("length: %u != %u\n", *len, size);
2507 ret = __ip_vs_get_dest_entries(get, user);
2511 case IP_VS_SO_GET_TIMEOUT:
2513 struct ip_vs_timeout_user t;
2515 __ip_vs_get_timeouts(&t);
2516 if (copy_to_user(user, &t, sizeof(t)) != 0)
2521 case IP_VS_SO_GET_DAEMON:
2523 struct ip_vs_daemon_user d[2];
2525 memset(&d, 0, sizeof(d));
2526 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2527 d[0].state = IP_VS_STATE_MASTER;
2528 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2529 d[0].syncid = ip_vs_master_syncid;
2531 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2532 d[1].state = IP_VS_STATE_BACKUP;
2533 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2534 d[1].syncid = ip_vs_backup_syncid;
2536 if (copy_to_user(user, &d, sizeof(d)) != 0)
2546 mutex_unlock(&__ip_vs_mutex);
2551 static struct nf_sockopt_ops ip_vs_sockopts = {
2553 .set_optmin = IP_VS_BASE_CTL,
2554 .set_optmax = IP_VS_SO_SET_MAX+1,
2555 .set = do_ip_vs_set_ctl,
2556 .get_optmin = IP_VS_BASE_CTL,
2557 .get_optmax = IP_VS_SO_GET_MAX+1,
2558 .get = do_ip_vs_get_ctl,
2559 .owner = THIS_MODULE,
2563 * Generic Netlink interface
2566 /* IPVS genetlink family */
2567 static struct genl_family ip_vs_genl_family = {
2568 .id = GENL_ID_GENERATE,
2570 .name = IPVS_GENL_NAME,
2571 .version = IPVS_GENL_VERSION,
2572 .maxattr = IPVS_CMD_MAX,
2575 /* Policy used for first-level command attributes */
2576 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2577 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2578 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2579 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2580 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2581 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2582 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2585 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2586 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2587 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2588 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2589 .len = IP_VS_IFNAME_MAXLEN },
2590 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2593 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2594 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2595 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2596 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2597 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2598 .len = sizeof(union nf_inet_addr) },
2599 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2600 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2601 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2602 .len = IP_VS_SCHEDNAME_MAXLEN },
2603 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2604 .len = IP_VS_PENAME_MAXLEN },
2605 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2606 .len = sizeof(struct ip_vs_flags) },
2607 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2608 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2609 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2612 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2613 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2614 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2615 .len = sizeof(union nf_inet_addr) },
2616 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2617 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2618 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2619 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2620 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2621 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2622 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2623 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2624 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2627 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2628 struct ip_vs_stats *stats)
2630 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2634 spin_lock_bh(&stats->lock);
2636 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2637 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2638 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2639 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2640 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2641 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2642 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2643 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2644 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2645 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2647 spin_unlock_bh(&stats->lock);
2649 nla_nest_end(skb, nl_stats);
2654 spin_unlock_bh(&stats->lock);
2655 nla_nest_cancel(skb, nl_stats);
2659 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2660 struct ip_vs_service *svc)
2662 struct nlattr *nl_service;
2663 struct ip_vs_flags flags = { .flags = svc->flags,
2666 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2670 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2673 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2675 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2676 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2677 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2680 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2682 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2683 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2684 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2685 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2687 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2688 goto nla_put_failure;
2690 nla_nest_end(skb, nl_service);
2695 nla_nest_cancel(skb, nl_service);
2699 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2700 struct ip_vs_service *svc,
2701 struct netlink_callback *cb)
2705 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2706 &ip_vs_genl_family, NLM_F_MULTI,
2707 IPVS_CMD_NEW_SERVICE);
2711 if (ip_vs_genl_fill_service(skb, svc) < 0)
2712 goto nla_put_failure;
2714 return genlmsg_end(skb, hdr);
2717 genlmsg_cancel(skb, hdr);
2721 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2722 struct netlink_callback *cb)
2725 int start = cb->args[0];
2726 struct ip_vs_service *svc;
2728 mutex_lock(&__ip_vs_mutex);
2729 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2730 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2733 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2735 goto nla_put_failure;
2740 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2741 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2744 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2746 goto nla_put_failure;
2752 mutex_unlock(&__ip_vs_mutex);
2758 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2759 struct nlattr *nla, int full_entry,
2760 struct ip_vs_service **ret_svc)
2762 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2763 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2764 struct ip_vs_service *svc;
2766 /* Parse mandatory identifying service fields first */
2768 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2771 nla_af = attrs[IPVS_SVC_ATTR_AF];
2772 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2773 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2774 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2775 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2777 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2780 memset(usvc, 0, sizeof(*usvc));
2782 usvc->af = nla_get_u16(nla_af);
2783 #ifdef CONFIG_IP_VS_IPV6
2784 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2786 if (usvc->af != AF_INET)
2788 return -EAFNOSUPPORT;
2791 usvc->protocol = IPPROTO_TCP;
2792 usvc->fwmark = nla_get_u32(nla_fwmark);
2794 usvc->protocol = nla_get_u16(nla_protocol);
2795 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2796 usvc->port = nla_get_u16(nla_port);
2801 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2803 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2804 &usvc->addr, usvc->port);
2807 /* If a full entry was requested, check for the additional fields */
2809 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2811 struct ip_vs_flags flags;
2813 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2814 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2815 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2816 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2817 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2819 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2822 nla_memcpy(&flags, nla_flags, sizeof(flags));
2824 /* prefill flags from service if it already exists */
2826 usvc->flags = svc->flags;
2828 /* set new flags from userland */
2829 usvc->flags = (usvc->flags & ~flags.mask) |
2830 (flags.flags & flags.mask);
2831 usvc->sched_name = nla_data(nla_sched);
2832 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2833 usvc->timeout = nla_get_u32(nla_timeout);
2834 usvc->netmask = nla_get_u32(nla_netmask);
2840 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2842 struct ip_vs_service_user_kern usvc;
2843 struct ip_vs_service *svc;
2846 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2847 return ret ? ERR_PTR(ret) : svc;
2850 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2852 struct nlattr *nl_dest;
2854 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2858 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2859 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2861 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2862 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2863 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2864 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2865 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2866 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2867 atomic_read(&dest->activeconns));
2868 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2869 atomic_read(&dest->inactconns));
2870 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2871 atomic_read(&dest->persistconns));
2873 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2874 goto nla_put_failure;
2876 nla_nest_end(skb, nl_dest);
2881 nla_nest_cancel(skb, nl_dest);
2885 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2886 struct netlink_callback *cb)
2890 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2891 &ip_vs_genl_family, NLM_F_MULTI,
2896 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2897 goto nla_put_failure;
2899 return genlmsg_end(skb, hdr);
2902 genlmsg_cancel(skb, hdr);
2906 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2907 struct netlink_callback *cb)
2910 int start = cb->args[0];
2911 struct ip_vs_service *svc;
2912 struct ip_vs_dest *dest;
2913 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2915 mutex_lock(&__ip_vs_mutex);
2917 /* Try to find the service for which to dump destinations */
2918 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2919 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2922 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2923 if (IS_ERR(svc) || svc == NULL)
2926 /* Dump the destinations */
2927 list_for_each_entry(dest, &svc->destinations, n_list) {
2930 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2932 goto nla_put_failure;
2940 mutex_unlock(&__ip_vs_mutex);
2945 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2946 struct nlattr *nla, int full_entry)
2948 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2949 struct nlattr *nla_addr, *nla_port;
2951 /* Parse mandatory identifying destination fields first */
2953 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2956 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2957 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2959 if (!(nla_addr && nla_port))
2962 memset(udest, 0, sizeof(*udest));
2964 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2965 udest->port = nla_get_u16(nla_port);
2967 /* If a full entry was requested, check for the additional fields */
2969 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2972 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2973 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2974 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2975 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2977 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2980 udest->conn_flags = nla_get_u32(nla_fwd)
2981 & IP_VS_CONN_F_FWD_MASK;
2982 udest->weight = nla_get_u32(nla_weight);
2983 udest->u_threshold = nla_get_u32(nla_u_thresh);
2984 udest->l_threshold = nla_get_u32(nla_l_thresh);
2990 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2991 const char *mcast_ifn, __be32 syncid)
2993 struct nlattr *nl_daemon;
2995 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2999 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3000 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3001 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3003 nla_nest_end(skb, nl_daemon);
3008 nla_nest_cancel(skb, nl_daemon);
3012 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3013 const char *mcast_ifn, __be32 syncid,
3014 struct netlink_callback *cb)
3017 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3018 &ip_vs_genl_family, NLM_F_MULTI,
3019 IPVS_CMD_NEW_DAEMON);
3023 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3024 goto nla_put_failure;
3026 return genlmsg_end(skb, hdr);
3029 genlmsg_cancel(skb, hdr);
3033 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3034 struct netlink_callback *cb)
3036 mutex_lock(&__ip_vs_mutex);
3037 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3038 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3039 ip_vs_master_mcast_ifn,
3040 ip_vs_master_syncid, cb) < 0)
3041 goto nla_put_failure;
3046 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3047 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3048 ip_vs_backup_mcast_ifn,
3049 ip_vs_backup_syncid, cb) < 0)
3050 goto nla_put_failure;
3056 mutex_unlock(&__ip_vs_mutex);
3061 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3063 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3064 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3065 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3068 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3069 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3070 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3073 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3075 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3078 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3081 static int ip_vs_genl_set_config(struct nlattr **attrs)
3083 struct ip_vs_timeout_user t;
3085 __ip_vs_get_timeouts(&t);
3087 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3088 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3090 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3092 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3094 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3095 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3097 return ip_vs_set_timeout(&t);
3100 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3102 struct ip_vs_service *svc = NULL;
3103 struct ip_vs_service_user_kern usvc;
3104 struct ip_vs_dest_user_kern udest;
3106 int need_full_svc = 0, need_full_dest = 0;
3108 cmd = info->genlhdr->cmd;
3110 mutex_lock(&__ip_vs_mutex);
3112 if (cmd == IPVS_CMD_FLUSH) {
3113 ret = ip_vs_flush();
3115 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3116 ret = ip_vs_genl_set_config(info->attrs);
3118 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3119 cmd == IPVS_CMD_DEL_DAEMON) {
3121 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3123 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3124 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3125 info->attrs[IPVS_CMD_ATTR_DAEMON],
3126 ip_vs_daemon_policy)) {
3131 if (cmd == IPVS_CMD_NEW_DAEMON)
3132 ret = ip_vs_genl_new_daemon(daemon_attrs);
3134 ret = ip_vs_genl_del_daemon(daemon_attrs);
3136 } else if (cmd == IPVS_CMD_ZERO &&
3137 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3138 ret = ip_vs_zero_all();
3142 /* All following commands require a service argument, so check if we
3143 * received a valid one. We need a full service specification when
3144 * adding / editing a service. Only identifying members otherwise. */
3145 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3148 ret = ip_vs_genl_parse_service(&usvc,
3149 info->attrs[IPVS_CMD_ATTR_SERVICE],
3150 need_full_svc, &svc);
3154 /* Unless we're adding a new service, the service must already exist */
3155 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3160 /* Destination commands require a valid destination argument. For
3161 * adding / editing a destination, we need a full destination
3163 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3164 cmd == IPVS_CMD_DEL_DEST) {
3165 if (cmd != IPVS_CMD_DEL_DEST)
3168 ret = ip_vs_genl_parse_dest(&udest,
3169 info->attrs[IPVS_CMD_ATTR_DEST],
3176 case IPVS_CMD_NEW_SERVICE:
3178 ret = ip_vs_add_service(&usvc, &svc);
3182 case IPVS_CMD_SET_SERVICE:
3183 ret = ip_vs_edit_service(svc, &usvc);
3185 case IPVS_CMD_DEL_SERVICE:
3186 ret = ip_vs_del_service(svc);
3187 /* do not use svc, it can be freed */
3189 case IPVS_CMD_NEW_DEST:
3190 ret = ip_vs_add_dest(svc, &udest);
3192 case IPVS_CMD_SET_DEST:
3193 ret = ip_vs_edit_dest(svc, &udest);
3195 case IPVS_CMD_DEL_DEST:
3196 ret = ip_vs_del_dest(svc, &udest);
3199 ret = ip_vs_zero_service(svc);
3206 mutex_unlock(&__ip_vs_mutex);
3211 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3213 struct sk_buff *msg;
3215 int ret, cmd, reply_cmd;
3217 cmd = info->genlhdr->cmd;
3219 if (cmd == IPVS_CMD_GET_SERVICE)
3220 reply_cmd = IPVS_CMD_NEW_SERVICE;
3221 else if (cmd == IPVS_CMD_GET_INFO)
3222 reply_cmd = IPVS_CMD_SET_INFO;
3223 else if (cmd == IPVS_CMD_GET_CONFIG)
3224 reply_cmd = IPVS_CMD_SET_CONFIG;
3226 pr_err("unknown Generic Netlink command\n");
3230 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3234 mutex_lock(&__ip_vs_mutex);
3236 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3238 goto nla_put_failure;
3241 case IPVS_CMD_GET_SERVICE:
3243 struct ip_vs_service *svc;
3245 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3250 ret = ip_vs_genl_fill_service(msg, svc);
3252 goto nla_put_failure;
3261 case IPVS_CMD_GET_CONFIG:
3263 struct ip_vs_timeout_user t;
3265 __ip_vs_get_timeouts(&t);
3266 #ifdef CONFIG_IP_VS_PROTO_TCP
3267 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3268 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3271 #ifdef CONFIG_IP_VS_PROTO_UDP
3272 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3278 case IPVS_CMD_GET_INFO:
3279 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3280 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3281 ip_vs_conn_tab_size);
3285 genlmsg_end(msg, reply);
3286 ret = genlmsg_reply(msg, info);
3290 pr_err("not enough space in Netlink message\n");
3296 mutex_unlock(&__ip_vs_mutex);
3302 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3304 .cmd = IPVS_CMD_NEW_SERVICE,
3305 .flags = GENL_ADMIN_PERM,
3306 .policy = ip_vs_cmd_policy,
3307 .doit = ip_vs_genl_set_cmd,
3310 .cmd = IPVS_CMD_SET_SERVICE,
3311 .flags = GENL_ADMIN_PERM,
3312 .policy = ip_vs_cmd_policy,
3313 .doit = ip_vs_genl_set_cmd,
3316 .cmd = IPVS_CMD_DEL_SERVICE,
3317 .flags = GENL_ADMIN_PERM,
3318 .policy = ip_vs_cmd_policy,
3319 .doit = ip_vs_genl_set_cmd,
3322 .cmd = IPVS_CMD_GET_SERVICE,
3323 .flags = GENL_ADMIN_PERM,
3324 .doit = ip_vs_genl_get_cmd,
3325 .dumpit = ip_vs_genl_dump_services,
3326 .policy = ip_vs_cmd_policy,
3329 .cmd = IPVS_CMD_NEW_DEST,
3330 .flags = GENL_ADMIN_PERM,
3331 .policy = ip_vs_cmd_policy,
3332 .doit = ip_vs_genl_set_cmd,
3335 .cmd = IPVS_CMD_SET_DEST,
3336 .flags = GENL_ADMIN_PERM,
3337 .policy = ip_vs_cmd_policy,
3338 .doit = ip_vs_genl_set_cmd,
3341 .cmd = IPVS_CMD_DEL_DEST,
3342 .flags = GENL_ADMIN_PERM,
3343 .policy = ip_vs_cmd_policy,
3344 .doit = ip_vs_genl_set_cmd,
3347 .cmd = IPVS_CMD_GET_DEST,
3348 .flags = GENL_ADMIN_PERM,
3349 .policy = ip_vs_cmd_policy,
3350 .dumpit = ip_vs_genl_dump_dests,
3353 .cmd = IPVS_CMD_NEW_DAEMON,
3354 .flags = GENL_ADMIN_PERM,
3355 .policy = ip_vs_cmd_policy,
3356 .doit = ip_vs_genl_set_cmd,
3359 .cmd = IPVS_CMD_DEL_DAEMON,
3360 .flags = GENL_ADMIN_PERM,
3361 .policy = ip_vs_cmd_policy,
3362 .doit = ip_vs_genl_set_cmd,
3365 .cmd = IPVS_CMD_GET_DAEMON,
3366 .flags = GENL_ADMIN_PERM,
3367 .dumpit = ip_vs_genl_dump_daemons,
3370 .cmd = IPVS_CMD_SET_CONFIG,
3371 .flags = GENL_ADMIN_PERM,
3372 .policy = ip_vs_cmd_policy,
3373 .doit = ip_vs_genl_set_cmd,
3376 .cmd = IPVS_CMD_GET_CONFIG,
3377 .flags = GENL_ADMIN_PERM,
3378 .doit = ip_vs_genl_get_cmd,
3381 .cmd = IPVS_CMD_GET_INFO,
3382 .flags = GENL_ADMIN_PERM,
3383 .doit = ip_vs_genl_get_cmd,
3386 .cmd = IPVS_CMD_ZERO,
3387 .flags = GENL_ADMIN_PERM,
3388 .policy = ip_vs_cmd_policy,
3389 .doit = ip_vs_genl_set_cmd,
3392 .cmd = IPVS_CMD_FLUSH,
3393 .flags = GENL_ADMIN_PERM,
3394 .doit = ip_vs_genl_set_cmd,
3398 static int __init ip_vs_genl_register(void)
3400 return genl_register_family_with_ops(&ip_vs_genl_family,
3401 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3404 static void ip_vs_genl_unregister(void)
3406 genl_unregister_family(&ip_vs_genl_family);
3409 /* End of Generic Netlink interface definitions */
3412 int __init ip_vs_control_init(void)
3419 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3420 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3421 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3422 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3424 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3425 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3429 ret = nf_register_sockopt(&ip_vs_sockopts);
3431 pr_err("cannot register sockopt.\n");
3435 ret = ip_vs_genl_register();
3437 pr_err("cannot register Generic Netlink interface.\n");
3438 nf_unregister_sockopt(&ip_vs_sockopts);
3442 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3443 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3445 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3447 ip_vs_new_estimator(&ip_vs_stats);
3449 /* Hook the defense timer */
3450 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3457 void ip_vs_control_cleanup(void)
3460 ip_vs_trash_cleanup();
3461 cancel_rearming_delayed_work(&defense_work);
3462 cancel_work_sync(&defense_work.work);
3463 ip_vs_kill_estimator(&ip_vs_stats);
3464 unregister_sysctl_table(sysctl_header);
3465 proc_net_remove(&init_net, "ip_vs_stats");
3466 proc_net_remove(&init_net, "ip_vs");
3467 ip_vs_genl_unregister();
3468 nf_unregister_sockopt(&ip_vs_sockopts);