2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
42 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ip6_route.h>
46 #include <net/route.h>
48 #include <net/genetlink.h>
50 #include <asm/uaccess.h>
52 #include <net/ip_vs.h>
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
94 int sysctl_ip_vs_snat_reroute = 1;
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
100 int ip_vs_get_debug_level(void)
102 return sysctl_ip_vs_debug_level;
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
125 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs
128 static void update_defense_level(void)
131 static int old_secure_tcp = 0;
136 /* we only count free and buffered memory (in pages) */
138 availmem = i.freeram + i.bufferram;
139 /* however in linux 2.5 the i.bufferram is total page cache size,
141 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */
144 nomem = (availmem < sysctl_ip_vs_amemthresh);
149 spin_lock(&__ip_vs_dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) {
152 atomic_set(&ip_vs_dropentry, 0);
156 atomic_set(&ip_vs_dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2;
159 atomic_set(&ip_vs_dropentry, 0);
164 atomic_set(&ip_vs_dropentry, 1);
166 atomic_set(&ip_vs_dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1;
171 atomic_set(&ip_vs_dropentry, 1);
174 spin_unlock(&__ip_vs_dropentry_lock);
177 spin_lock(&__ip_vs_droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) {
184 ip_vs_drop_rate = ip_vs_drop_counter
185 = sysctl_ip_vs_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2;
194 ip_vs_drop_rate = ip_vs_drop_counter
195 = sysctl_ip_vs_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem);
199 sysctl_ip_vs_drop_packet = 1;
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
206 spin_unlock(&__ip_vs_droppacket_lock);
209 spin_lock(&ip_vs_securetcp_lock);
210 switch (sysctl_ip_vs_secure_tcp) {
212 if (old_secure_tcp >= 2)
217 if (old_secure_tcp < 2)
219 sysctl_ip_vs_secure_tcp = 2;
221 if (old_secure_tcp >= 2)
227 if (old_secure_tcp < 2)
230 if (old_secure_tcp >= 2)
232 sysctl_ip_vs_secure_tcp = 1;
236 if (old_secure_tcp < 2)
240 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
243 spin_unlock(&ip_vs_securetcp_lock);
250 * Timer for checking the defense
252 #define DEFENSE_TIMER_PERIOD 1*HZ
253 static void defense_work_handler(struct work_struct *work);
254 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
256 static void defense_work_handler(struct work_struct *work)
258 update_defense_level();
259 if (atomic_read(&ip_vs_dropentry))
260 ip_vs_random_dropentry();
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
266 ip_vs_use_count_inc(void)
268 return try_module_get(THIS_MODULE);
272 ip_vs_use_count_dec(void)
274 module_put(THIS_MODULE);
279 * Hash table: for virtual service lookups
281 #define IP_VS_SVC_TAB_BITS 8
282 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
283 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
285 /* the service table hashed by <protocol, addr, port> */
286 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287 /* the service table hashed by fwmark */
288 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
291 * Hash table: for real service lookups
293 #define IP_VS_RTAB_BITS 4
294 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
297 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
300 * Trash for destinations
302 static LIST_HEAD(ip_vs_dest_trash);
305 * FTP & NULL virtual service counters
307 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
312 * Returns hash value for virtual service
314 static __inline__ unsigned
315 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
318 register unsigned porth = ntohs(port);
319 __be32 addr_fold = addr->ip;
321 #ifdef CONFIG_IP_VS_IPV6
323 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3];
327 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328 & IP_VS_SVC_TAB_MASK;
332 * Returns hash value of fwmark for virtual service lookup
334 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
336 return fwmark & IP_VS_SVC_TAB_MASK;
340 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
341 * or in the ip_vs_svc_fwm_table by fwmark.
342 * Should be called with locked tables.
344 static int ip_vs_svc_hash(struct ip_vs_service *svc)
348 if (svc->flags & IP_VS_SVC_F_HASHED) {
349 pr_err("%s(): request for already hashed, called from %pF\n",
350 __func__, __builtin_return_address(0));
354 if (svc->fwmark == 0) {
356 * Hash it by <protocol,addr,port> in ip_vs_svc_table
358 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
360 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
363 * Hash it by fwmark in ip_vs_svc_fwm_table
365 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
369 svc->flags |= IP_VS_SVC_F_HASHED;
370 /* increase its refcnt because it is referenced by the svc table */
371 atomic_inc(&svc->refcnt);
377 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
378 * Should be called with locked tables.
380 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
382 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
383 pr_err("%s(): request for unhash flagged, called from %pF\n",
384 __func__, __builtin_return_address(0));
388 if (svc->fwmark == 0) {
389 /* Remove it from the ip_vs_svc_table table */
390 list_del(&svc->s_list);
392 /* Remove it from the ip_vs_svc_fwm_table table */
393 list_del(&svc->f_list);
396 svc->flags &= ~IP_VS_SVC_F_HASHED;
397 atomic_dec(&svc->refcnt);
403 * Get service by {proto,addr,port} in the service table.
405 static inline struct ip_vs_service *
406 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
410 struct ip_vs_service *svc;
412 /* Check for "full" addressed entries */
413 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
417 && ip_vs_addr_equal(af, &svc->addr, vaddr)
418 && (svc->port == vport)
419 && (svc->protocol == protocol)) {
430 * Get service by {fwmark} in the service table.
432 static inline struct ip_vs_service *
433 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
436 struct ip_vs_service *svc;
438 /* Check for fwmark addressed entries */
439 hash = ip_vs_svc_fwm_hashkey(fwmark);
441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442 if (svc->fwmark == fwmark && svc->af == af) {
451 struct ip_vs_service *
452 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
453 const union nf_inet_addr *vaddr, __be16 vport)
455 struct ip_vs_service *svc;
457 read_lock(&__ip_vs_svc_lock);
460 * Check the table hashed by fwmark first
462 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
469 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
472 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
479 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
483 && atomic_read(&ip_vs_nullsvc_counter)) {
485 * Check if the catch-all port (port zero) exists
487 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
492 atomic_inc(&svc->usecnt);
493 read_unlock(&__ip_vs_svc_lock);
495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark, ip_vs_proto_name(protocol),
497 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498 svc ? "hit" : "not hit");
505 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
507 atomic_inc(&svc->refcnt);
512 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
514 struct ip_vs_service *svc = dest->svc;
517 if (atomic_dec_and_test(&svc->refcnt)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
520 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt));
528 * Returns hash value for real service
530 static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
534 register unsigned porth = ntohs(port);
535 __be32 addr_fold = addr->ip;
537 #ifdef CONFIG_IP_VS_IPV6
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
548 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
549 * should be called with locked tables.
551 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
555 if (!list_empty(&dest->d_list)) {
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
565 list_add(&dest->d_list, &ip_vs_rtable[hash]);
571 * UNhashes ip_vs_dest from ip_vs_rtable.
572 * should be called with locked tables.
574 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
577 * Remove it from the ip_vs_rtable table.
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
588 * Lookup real service by <proto,addr,port> in the real service table.
591 ip_vs_lookup_real_service(int af, __u16 protocol,
592 const union nf_inet_addr *daddr,
596 struct ip_vs_dest *dest;
599 * Check for "full" addressed entries
600 * Return the first found entry
602 hash = ip_vs_rs_hashkey(af, daddr, dport);
604 read_lock(&__ip_vs_rs_lock);
605 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
607 && ip_vs_addr_equal(af, &dest->addr, daddr)
608 && (dest->port == dport)
609 && ((dest->protocol == protocol) ||
612 read_unlock(&__ip_vs_rs_lock);
616 read_unlock(&__ip_vs_rs_lock);
622 * Lookup destination by {addr,port} in the given service
624 static struct ip_vs_dest *
625 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
628 struct ip_vs_dest *dest;
631 * Find the destination for the given service
633 list_for_each_entry(dest, &svc->destinations, n_list) {
634 if ((dest->af == svc->af)
635 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
636 && (dest->port == dport)) {
646 * Find destination by {daddr,dport,vaddr,protocol}
647 * Cretaed to be used in ip_vs_process_message() in
648 * the backup synchronization daemon. It finds the
649 * destination to be bound to the received connection
652 * ip_vs_lookup_real_service() looked promissing, but
653 * seems not working as expected.
655 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
657 const union nf_inet_addr *vaddr,
658 __be16 vport, __u16 protocol)
660 struct ip_vs_dest *dest;
661 struct ip_vs_service *svc;
663 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
666 dest = ip_vs_lookup_dest(svc, daddr, dport);
668 atomic_inc(&dest->refcnt);
669 ip_vs_service_put(svc);
674 * Lookup dest by {svc,addr,port} in the destination trash.
675 * The destination trash is used to hold the destinations that are removed
676 * from the service table but are still referenced by some conn entries.
677 * The reason to add the destination trash is when the dest is temporary
678 * down (either by administrator or by monitor program), the dest can be
679 * picked back from the trash, the remaining connections to the dest can
680 * continue, and the counting information of the dest is also useful for
683 static struct ip_vs_dest *
684 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687 struct ip_vs_dest *dest, *nxt;
690 * Find the destination in trash
692 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
693 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 IP_VS_DBG_ADDR(svc->af, &dest->addr),
698 atomic_read(&dest->refcnt));
699 if (dest->af == svc->af &&
700 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
701 dest->port == dport &&
702 dest->vfwmark == svc->fwmark &&
703 dest->protocol == svc->protocol &&
705 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
706 dest->vport == svc->port))) {
712 * Try to purge the destination from trash if not referenced
714 if (atomic_read(&dest->refcnt) == 1) {
715 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718 IP_VS_DBG_ADDR(svc->af, &dest->addr),
720 list_del(&dest->n_list);
721 ip_vs_dst_reset(dest);
722 __ip_vs_unbind_svc(dest);
732 * Clean up all the destinations in the trash
733 * Called by the ip_vs_control_cleanup()
735 * When the ip_vs_control_clearup is activated by ipvs module exit,
736 * the service tables must have been flushed and all the connections
737 * are expired, and the refcnt of each destination in the trash must
738 * be 1, so we simply release them here.
740 static void ip_vs_trash_cleanup(void)
742 struct ip_vs_dest *dest, *nxt;
744 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
745 list_del(&dest->n_list);
746 ip_vs_dst_reset(dest);
747 __ip_vs_unbind_svc(dest);
754 ip_vs_zero_stats(struct ip_vs_stats *stats)
756 spin_lock_bh(&stats->lock);
758 memset(&stats->ustats, 0, sizeof(stats->ustats));
759 ip_vs_zero_estimator(stats);
761 spin_unlock_bh(&stats->lock);
765 * Update a destination in the given service
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add)
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776 conn_flags |= IP_VS_CONN_F_INACTIVE;
778 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783 * Put the real service in ip_vs_rtable if not present.
784 * For now only for NAT!
786 write_lock_bh(&__ip_vs_rs_lock);
788 write_unlock_bh(&__ip_vs_rs_lock);
790 atomic_set(&dest->conn_flags, conn_flags);
792 /* bind the service */
794 __ip_vs_bind_svc(dest, svc);
796 if (dest->svc != svc) {
797 __ip_vs_unbind_svc(dest);
798 ip_vs_zero_stats(&dest->stats);
799 __ip_vs_bind_svc(dest, svc);
803 /* set the dest status flags */
804 dest->flags |= IP_VS_DEST_F_AVAILABLE;
806 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808 dest->u_threshold = udest->u_threshold;
809 dest->l_threshold = udest->l_threshold;
811 spin_lock(&dest->dst_lock);
812 ip_vs_dst_reset(dest);
813 spin_unlock(&dest->dst_lock);
816 ip_vs_new_estimator(&dest->stats);
818 write_lock_bh(&__ip_vs_svc_lock);
820 /* Wait until all other svc users go away */
821 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824 list_add(&dest->n_list, &svc->destinations);
828 /* call the update_service, because server weight may be changed */
829 if (svc->scheduler->update_service)
830 svc->scheduler->update_service(svc);
832 write_unlock_bh(&__ip_vs_svc_lock);
837 * Create a destination for the given service
840 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841 struct ip_vs_dest **dest_p)
843 struct ip_vs_dest *dest;
848 #ifdef CONFIG_IP_VS_IPV6
849 if (svc->af == AF_INET6) {
850 atype = ipv6_addr_type(&udest->addr.in6);
851 if ((!(atype & IPV6_ADDR_UNICAST) ||
852 atype & IPV6_ADDR_LINKLOCAL) &&
853 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
858 atype = inet_addr_type(&init_net, udest->addr.ip);
859 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
863 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865 pr_err("%s(): no memory.\n", __func__);
870 dest->protocol = svc->protocol;
871 dest->vaddr = svc->addr;
872 dest->vport = svc->port;
873 dest->vfwmark = svc->fwmark;
874 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
875 dest->port = udest->port;
877 atomic_set(&dest->activeconns, 0);
878 atomic_set(&dest->inactconns, 0);
879 atomic_set(&dest->persistconns, 0);
880 atomic_set(&dest->refcnt, 1);
882 INIT_LIST_HEAD(&dest->d_list);
883 spin_lock_init(&dest->dst_lock);
884 spin_lock_init(&dest->stats.lock);
885 __ip_vs_update_dest(svc, dest, udest, 1);
895 * Add a destination into an existing service
898 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
900 struct ip_vs_dest *dest;
901 union nf_inet_addr daddr;
902 __be16 dport = udest->port;
907 if (udest->weight < 0) {
908 pr_err("%s(): server weight less than zero\n", __func__);
912 if (udest->l_threshold > udest->u_threshold) {
913 pr_err("%s(): lower threshold is higher than upper threshold\n",
918 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
921 * Check if the dest already exists in the list
923 dest = ip_vs_lookup_dest(svc, &daddr, dport);
926 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
931 * Check if the dest already exists in the trash and
932 * is from the same service
934 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
937 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
938 "dest->refcnt=%d, service %u/%s:%u\n",
939 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
940 atomic_read(&dest->refcnt),
942 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
946 * Get the destination from the trash
948 list_del(&dest->n_list);
950 __ip_vs_update_dest(svc, dest, udest, 1);
954 * Allocate and initialize the dest structure
956 ret = ip_vs_new_dest(svc, udest, &dest);
965 * Edit a destination in the given service
968 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
970 struct ip_vs_dest *dest;
971 union nf_inet_addr daddr;
972 __be16 dport = udest->port;
976 if (udest->weight < 0) {
977 pr_err("%s(): server weight less than zero\n", __func__);
981 if (udest->l_threshold > udest->u_threshold) {
982 pr_err("%s(): lower threshold is higher than upper threshold\n",
987 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
990 * Lookup the destination list
992 dest = ip_vs_lookup_dest(svc, &daddr, dport);
995 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
999 __ip_vs_update_dest(svc, dest, udest, 0);
1007 * Delete a destination (must be already unlinked from the service)
1009 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1011 ip_vs_kill_estimator(&dest->stats);
1014 * Remove it from the d-linked list with the real services.
1016 write_lock_bh(&__ip_vs_rs_lock);
1017 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&__ip_vs_rs_lock);
1021 * Decrease the refcnt of the dest, and free the dest
1022 * if nobody refers to it (refcnt=0). Otherwise, throw
1023 * the destination into the trash.
1025 if (atomic_dec_and_test(&dest->refcnt)) {
1026 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1028 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1030 ip_vs_dst_reset(dest);
1031 /* simply decrease svc->refcnt here, let the caller check
1032 and release the service if nobody refers to it.
1033 Only user context can release destination and service,
1034 and only one user context can update virtual service at a
1035 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt);
1039 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1040 "dest->refcnt=%d\n",
1041 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1043 atomic_read(&dest->refcnt));
1044 list_add(&dest->n_list, &ip_vs_dest_trash);
1045 atomic_inc(&dest->refcnt);
1051 * Unlink a destination from the given service
1053 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1054 struct ip_vs_dest *dest,
1057 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060 * Remove it from the d-linked destination list.
1062 list_del(&dest->n_list);
1066 * Call the update_service function of its scheduler
1068 if (svcupd && svc->scheduler->update_service)
1069 svc->scheduler->update_service(svc);
1074 * Delete a destination server in the given service
1077 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1079 struct ip_vs_dest *dest;
1080 __be16 dport = udest->port;
1084 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1087 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1091 write_lock_bh(&__ip_vs_svc_lock);
1094 * Wait until all other svc users go away.
1096 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1099 * Unlink dest from the service
1101 __ip_vs_unlink_dest(svc, dest, 1);
1103 write_unlock_bh(&__ip_vs_svc_lock);
1106 * Delete the destination
1108 __ip_vs_del_dest(dest);
1117 * Add a service into the service hash table
1120 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1121 struct ip_vs_service **svc_p)
1124 struct ip_vs_scheduler *sched = NULL;
1125 struct ip_vs_pe *pe = NULL;
1126 struct ip_vs_service *svc = NULL;
1128 /* increase the module use count */
1129 ip_vs_use_count_inc();
1131 /* Lookup the scheduler by 'u->sched_name' */
1132 sched = ip_vs_scheduler_get(u->sched_name);
1133 if (sched == NULL) {
1134 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1139 if (u->pe_name && *u->pe_name) {
1140 pe = ip_vs_pe_get(u->pe_name);
1142 pr_info("persistence engine module ip_vs_pe_%s "
1143 "not found\n", u->pe_name);
1149 #ifdef CONFIG_IP_VS_IPV6
1150 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1156 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1158 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1163 /* I'm the first user of the service */
1164 atomic_set(&svc->usecnt, 0);
1165 atomic_set(&svc->refcnt, 0);
1168 svc->protocol = u->protocol;
1169 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1170 svc->port = u->port;
1171 svc->fwmark = u->fwmark;
1172 svc->flags = u->flags;
1173 svc->timeout = u->timeout * HZ;
1174 svc->netmask = u->netmask;
1176 INIT_LIST_HEAD(&svc->destinations);
1177 rwlock_init(&svc->sched_lock);
1178 spin_lock_init(&svc->stats.lock);
1180 /* Bind the scheduler */
1181 ret = ip_vs_bind_scheduler(svc, sched);
1186 /* Bind the ct retriever */
1187 ip_vs_bind_pe(svc, pe);
1190 /* Update the virtual service counters */
1191 if (svc->port == FTPPORT)
1192 atomic_inc(&ip_vs_ftpsvc_counter);
1193 else if (svc->port == 0)
1194 atomic_inc(&ip_vs_nullsvc_counter);
1196 ip_vs_new_estimator(&svc->stats);
1198 /* Count only IPv4 services for old get/setsockopt interface */
1199 if (svc->af == AF_INET)
1200 ip_vs_num_services++;
1202 /* Hash the service into the service table */
1203 write_lock_bh(&__ip_vs_svc_lock);
1204 ip_vs_svc_hash(svc);
1205 write_unlock_bh(&__ip_vs_svc_lock);
1212 ip_vs_unbind_scheduler(svc);
1215 ip_vs_app_inc_put(svc->inc);
1220 ip_vs_scheduler_put(sched);
1223 /* decrease the module use count */
1224 ip_vs_use_count_dec();
1231 * Edit a service and bind it with a new scheduler
1234 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1236 struct ip_vs_scheduler *sched, *old_sched;
1237 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1241 * Lookup the scheduler, by 'u->sched_name'
1243 sched = ip_vs_scheduler_get(u->sched_name);
1244 if (sched == NULL) {
1245 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1250 if (u->pe_name && *u->pe_name) {
1251 pe = ip_vs_pe_get(u->pe_name);
1253 pr_info("persistence engine module ip_vs_pe_%s "
1254 "not found\n", u->pe_name);
1261 #ifdef CONFIG_IP_VS_IPV6
1262 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1268 write_lock_bh(&__ip_vs_svc_lock);
1271 * Wait until all other svc users go away.
1273 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1276 * Set the flags and timeout value
1278 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279 svc->timeout = u->timeout * HZ;
1280 svc->netmask = u->netmask;
1282 old_sched = svc->scheduler;
1283 if (sched != old_sched) {
1285 * Unbind the old scheduler
1287 if ((ret = ip_vs_unbind_scheduler(svc))) {
1293 * Bind the new scheduler
1295 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1297 * If ip_vs_bind_scheduler fails, restore the old
1299 * The main reason of failure is out of memory.
1301 * The question is if the old scheduler can be
1302 * restored all the time. TODO: if it cannot be
1303 * restored some time, we must delete the service,
1304 * otherwise the system may crash.
1306 ip_vs_bind_scheduler(svc, old_sched);
1314 ip_vs_unbind_pe(svc);
1315 ip_vs_bind_pe(svc, pe);
1319 write_unlock_bh(&__ip_vs_svc_lock);
1321 ip_vs_scheduler_put(old_sched);
1322 ip_vs_pe_put(old_pe);
1328 * Delete a service from the service list
1329 * - The service must be unlinked, unlocked and not referenced!
1330 * - We are called under _bh lock
1332 static void __ip_vs_del_service(struct ip_vs_service *svc)
1334 struct ip_vs_dest *dest, *nxt;
1335 struct ip_vs_scheduler *old_sched;
1336 struct ip_vs_pe *old_pe;
1338 pr_info("%s: enter\n", __func__);
1340 /* Count only IPv4 services for old get/setsockopt interface */
1341 if (svc->af == AF_INET)
1342 ip_vs_num_services--;
1344 ip_vs_kill_estimator(&svc->stats);
1346 /* Unbind scheduler */
1347 old_sched = svc->scheduler;
1348 ip_vs_unbind_scheduler(svc);
1349 ip_vs_scheduler_put(old_sched);
1351 /* Unbind persistence engine */
1353 ip_vs_unbind_pe(svc);
1354 ip_vs_pe_put(old_pe);
1356 /* Unbind app inc */
1358 ip_vs_app_inc_put(svc->inc);
1363 * Unlink the whole destination list
1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366 __ip_vs_unlink_dest(svc, dest, 0);
1367 __ip_vs_del_dest(dest);
1371 * Update the virtual service counters
1373 if (svc->port == FTPPORT)
1374 atomic_dec(&ip_vs_ftpsvc_counter);
1375 else if (svc->port == 0)
1376 atomic_dec(&ip_vs_nullsvc_counter);
1379 * Free the service if nobody refers to it
1381 if (atomic_read(&svc->refcnt) == 0) {
1382 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1384 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385 ntohs(svc->port), atomic_read(&svc->usecnt));
1389 /* decrease the module use count */
1390 ip_vs_use_count_dec();
1394 * Unlink a service from list and try to delete it if its refcnt reached 0
1396 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1399 * Unhash it from the service table
1401 write_lock_bh(&__ip_vs_svc_lock);
1403 ip_vs_svc_unhash(svc);
1406 * Wait until all the svc users go away.
1408 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1410 __ip_vs_del_service(svc);
1412 write_unlock_bh(&__ip_vs_svc_lock);
1416 * Delete a service from the service list
1418 static int ip_vs_del_service(struct ip_vs_service *svc)
1422 ip_vs_unlink_service(svc);
1429 * Flush all the virtual services
1431 static int ip_vs_flush(void)
1434 struct ip_vs_service *svc, *nxt;
1437 * Flush the service table hashed by <protocol,addr,port>
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1441 ip_vs_unlink_service(svc);
1446 * Flush the service table hashed by fwmark
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) {
1451 ip_vs_unlink_service(svc);
1460 * Zero counters in a service or all services
1462 static int ip_vs_zero_service(struct ip_vs_service *svc)
1464 struct ip_vs_dest *dest;
1466 write_lock_bh(&__ip_vs_svc_lock);
1467 list_for_each_entry(dest, &svc->destinations, n_list) {
1468 ip_vs_zero_stats(&dest->stats);
1470 ip_vs_zero_stats(&svc->stats);
1471 write_unlock_bh(&__ip_vs_svc_lock);
1475 static int ip_vs_zero_all(void)
1478 struct ip_vs_service *svc;
1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482 ip_vs_zero_service(svc);
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488 ip_vs_zero_service(svc);
1492 ip_vs_zero_stats(&ip_vs_stats);
1498 proc_do_defense_mode(ctl_table *table, int write,
1499 void __user *buffer, size_t *lenp, loff_t *ppos)
1501 int *valp = table->data;
1505 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1506 if (write && (*valp != val)) {
1507 if ((*valp < 0) || (*valp > 3)) {
1508 /* Restore the correct value */
1511 update_defense_level();
1519 proc_do_sync_threshold(ctl_table *table, int write,
1520 void __user *buffer, size_t *lenp, loff_t *ppos)
1522 int *valp = table->data;
1526 /* backup the value first */
1527 memcpy(val, valp, sizeof(val));
1529 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1530 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1531 /* Restore the correct value */
1532 memcpy(valp, val, sizeof(val));
1539 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1542 static struct ctl_table vs_vars[] = {
1544 .procname = "amemthresh",
1545 .data = &sysctl_ip_vs_amemthresh,
1546 .maxlen = sizeof(int),
1548 .proc_handler = proc_dointvec,
1550 #ifdef CONFIG_IP_VS_DEBUG
1552 .procname = "debug_level",
1553 .data = &sysctl_ip_vs_debug_level,
1554 .maxlen = sizeof(int),
1556 .proc_handler = proc_dointvec,
1560 .procname = "am_droprate",
1561 .data = &sysctl_ip_vs_am_droprate,
1562 .maxlen = sizeof(int),
1564 .proc_handler = proc_dointvec,
1567 .procname = "drop_entry",
1568 .data = &sysctl_ip_vs_drop_entry,
1569 .maxlen = sizeof(int),
1571 .proc_handler = proc_do_defense_mode,
1574 .procname = "drop_packet",
1575 .data = &sysctl_ip_vs_drop_packet,
1576 .maxlen = sizeof(int),
1578 .proc_handler = proc_do_defense_mode,
1580 #ifdef CONFIG_IP_VS_NFCT
1582 .procname = "conntrack",
1583 .data = &sysctl_ip_vs_conntrack,
1584 .maxlen = sizeof(int),
1586 .proc_handler = &proc_dointvec,
1590 .procname = "secure_tcp",
1591 .data = &sysctl_ip_vs_secure_tcp,
1592 .maxlen = sizeof(int),
1594 .proc_handler = proc_do_defense_mode,
1597 .procname = "snat_reroute",
1598 .data = &sysctl_ip_vs_snat_reroute,
1599 .maxlen = sizeof(int),
1601 .proc_handler = &proc_dointvec,
1605 .procname = "timeout_established",
1606 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1607 .maxlen = sizeof(int),
1609 .proc_handler = proc_dointvec_jiffies,
1612 .procname = "timeout_synsent",
1613 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1614 .maxlen = sizeof(int),
1616 .proc_handler = proc_dointvec_jiffies,
1619 .procname = "timeout_synrecv",
1620 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1621 .maxlen = sizeof(int),
1623 .proc_handler = proc_dointvec_jiffies,
1626 .procname = "timeout_finwait",
1627 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1628 .maxlen = sizeof(int),
1630 .proc_handler = proc_dointvec_jiffies,
1633 .procname = "timeout_timewait",
1634 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1635 .maxlen = sizeof(int),
1637 .proc_handler = proc_dointvec_jiffies,
1640 .procname = "timeout_close",
1641 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1642 .maxlen = sizeof(int),
1644 .proc_handler = proc_dointvec_jiffies,
1647 .procname = "timeout_closewait",
1648 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1649 .maxlen = sizeof(int),
1651 .proc_handler = proc_dointvec_jiffies,
1654 .procname = "timeout_lastack",
1655 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1656 .maxlen = sizeof(int),
1658 .proc_handler = proc_dointvec_jiffies,
1661 .procname = "timeout_listen",
1662 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1663 .maxlen = sizeof(int),
1665 .proc_handler = proc_dointvec_jiffies,
1668 .procname = "timeout_synack",
1669 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1670 .maxlen = sizeof(int),
1672 .proc_handler = proc_dointvec_jiffies,
1675 .procname = "timeout_udp",
1676 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1677 .maxlen = sizeof(int),
1679 .proc_handler = proc_dointvec_jiffies,
1682 .procname = "timeout_icmp",
1683 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1684 .maxlen = sizeof(int),
1686 .proc_handler = proc_dointvec_jiffies,
1690 .procname = "cache_bypass",
1691 .data = &sysctl_ip_vs_cache_bypass,
1692 .maxlen = sizeof(int),
1694 .proc_handler = proc_dointvec,
1697 .procname = "expire_nodest_conn",
1698 .data = &sysctl_ip_vs_expire_nodest_conn,
1699 .maxlen = sizeof(int),
1701 .proc_handler = proc_dointvec,
1704 .procname = "expire_quiescent_template",
1705 .data = &sysctl_ip_vs_expire_quiescent_template,
1706 .maxlen = sizeof(int),
1708 .proc_handler = proc_dointvec,
1711 .procname = "sync_threshold",
1712 .data = &sysctl_ip_vs_sync_threshold,
1713 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1715 .proc_handler = proc_do_sync_threshold,
1718 .procname = "nat_icmp_send",
1719 .data = &sysctl_ip_vs_nat_icmp_send,
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec,
1727 const struct ctl_path net_vs_ctl_path[] = {
1728 { .procname = "net", },
1729 { .procname = "ipv4", },
1730 { .procname = "vs", },
1733 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1735 static struct ctl_table_header * sysctl_header;
1737 #ifdef CONFIG_PROC_FS
1740 struct list_head *table;
1745 * Write the contents of the VS rule table to a PROCfs file.
1746 * (It is kept just for backward compatibility)
1748 static inline const char *ip_vs_fwd_name(unsigned flags)
1750 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1751 case IP_VS_CONN_F_LOCALNODE:
1753 case IP_VS_CONN_F_TUNNEL:
1755 case IP_VS_CONN_F_DROUTE:
1763 /* Get the Nth entry in the two lists */
1764 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1766 struct ip_vs_iter *iter = seq->private;
1768 struct ip_vs_service *svc;
1770 /* look in hash by protocol */
1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1774 iter->table = ip_vs_svc_table;
1781 /* keep looking in fwmark */
1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1785 iter->table = ip_vs_svc_fwm_table;
1795 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1796 __acquires(__ip_vs_svc_lock)
1799 read_lock_bh(&__ip_vs_svc_lock);
1800 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1804 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1806 struct list_head *e;
1807 struct ip_vs_iter *iter;
1808 struct ip_vs_service *svc;
1811 if (v == SEQ_START_TOKEN)
1812 return ip_vs_info_array(seq,0);
1815 iter = seq->private;
1817 if (iter->table == ip_vs_svc_table) {
1818 /* next service in table hashed by protocol */
1819 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1820 return list_entry(e, struct ip_vs_service, s_list);
1823 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1824 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1830 iter->table = ip_vs_svc_fwm_table;
1835 /* next service in hashed by fwmark */
1836 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1837 return list_entry(e, struct ip_vs_service, f_list);
1840 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1841 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1849 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1850 __releases(__ip_vs_svc_lock)
1852 read_unlock_bh(&__ip_vs_svc_lock);
1856 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1858 if (v == SEQ_START_TOKEN) {
1860 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1861 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1863 "Prot LocalAddress:Port Scheduler Flags\n");
1865 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1867 const struct ip_vs_service *svc = v;
1868 const struct ip_vs_iter *iter = seq->private;
1869 const struct ip_vs_dest *dest;
1871 if (iter->table == ip_vs_svc_table) {
1872 #ifdef CONFIG_IP_VS_IPV6
1873 if (svc->af == AF_INET6)
1874 seq_printf(seq, "%s [%pI6]:%04X %s ",
1875 ip_vs_proto_name(svc->protocol),
1878 svc->scheduler->name);
1881 seq_printf(seq, "%s %08X:%04X %s %s ",
1882 ip_vs_proto_name(svc->protocol),
1883 ntohl(svc->addr.ip),
1885 svc->scheduler->name,
1886 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1888 seq_printf(seq, "FWM %08X %s %s",
1889 svc->fwmark, svc->scheduler->name,
1890 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1893 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1894 seq_printf(seq, "persistent %d %08X\n",
1896 ntohl(svc->netmask));
1898 seq_putc(seq, '\n');
1900 list_for_each_entry(dest, &svc->destinations, n_list) {
1901 #ifdef CONFIG_IP_VS_IPV6
1902 if (dest->af == AF_INET6)
1905 " %-7s %-6d %-10d %-10d\n",
1908 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1909 atomic_read(&dest->weight),
1910 atomic_read(&dest->activeconns),
1911 atomic_read(&dest->inactconns));
1916 "%-7s %-6d %-10d %-10d\n",
1917 ntohl(dest->addr.ip),
1919 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1920 atomic_read(&dest->weight),
1921 atomic_read(&dest->activeconns),
1922 atomic_read(&dest->inactconns));
1929 static const struct seq_operations ip_vs_info_seq_ops = {
1930 .start = ip_vs_info_seq_start,
1931 .next = ip_vs_info_seq_next,
1932 .stop = ip_vs_info_seq_stop,
1933 .show = ip_vs_info_seq_show,
1936 static int ip_vs_info_open(struct inode *inode, struct file *file)
1938 return seq_open_private(file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter));
1942 static const struct file_operations ip_vs_info_fops = {
1943 .owner = THIS_MODULE,
1944 .open = ip_vs_info_open,
1946 .llseek = seq_lseek,
1947 .release = seq_release_private,
1952 struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1956 #ifdef CONFIG_PROC_FS
1957 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1960 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1962 " Total Incoming Outgoing Incoming Outgoing\n");
1964 " Conns Packets Packets Bytes Bytes\n");
1966 spin_lock_bh(&ip_vs_stats.lock);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1968 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1969 (unsigned long long) ip_vs_stats.ustats.inbytes,
1970 (unsigned long long) ip_vs_stats.ustats.outbytes);
1972 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.ustats.cps,
1977 ip_vs_stats.ustats.inpps,
1978 ip_vs_stats.ustats.outpps,
1979 ip_vs_stats.ustats.inbps,
1980 ip_vs_stats.ustats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1986 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1988 return single_open(file, ip_vs_stats_show, NULL);
1991 static const struct file_operations ip_vs_stats_fops = {
1992 .owner = THIS_MODULE,
1993 .open = ip_vs_stats_seq_open,
1995 .llseek = seq_lseek,
1996 .release = single_release,
2002 * Set timeout values for tcp tcpfin udp in the timeout_table.
2004 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2011 #ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ;
2017 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ;
2023 #ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ;
2033 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2034 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2035 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2036 sizeof(struct ip_vs_dest_user))
2037 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2038 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2039 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2041 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2042 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2046 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2055 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2056 struct ip_vs_service_user *usvc_compat)
2058 memset(usvc, 0, sizeof(*usvc));
2061 usvc->protocol = usvc_compat->protocol;
2062 usvc->addr.ip = usvc_compat->addr;
2063 usvc->port = usvc_compat->port;
2064 usvc->fwmark = usvc_compat->fwmark;
2066 /* Deep copy of sched_name is not needed here */
2067 usvc->sched_name = usvc_compat->sched_name;
2069 usvc->flags = usvc_compat->flags;
2070 usvc->timeout = usvc_compat->timeout;
2071 usvc->netmask = usvc_compat->netmask;
2074 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2075 struct ip_vs_dest_user *udest_compat)
2077 memset(udest, 0, sizeof(*udest));
2079 udest->addr.ip = udest_compat->addr;
2080 udest->port = udest_compat->port;
2081 udest->conn_flags = udest_compat->conn_flags;
2082 udest->weight = udest_compat->weight;
2083 udest->u_threshold = udest_compat->u_threshold;
2084 udest->l_threshold = udest_compat->l_threshold;
2088 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2091 unsigned char arg[MAX_ARG_LEN];
2092 struct ip_vs_service_user *usvc_compat;
2093 struct ip_vs_service_user_kern usvc;
2094 struct ip_vs_service *svc;
2095 struct ip_vs_dest_user *udest_compat;
2096 struct ip_vs_dest_user_kern udest;
2098 if (!capable(CAP_NET_ADMIN))
2101 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2103 if (len < 0 || len > MAX_ARG_LEN)
2105 if (len != set_arglen[SET_CMDID(cmd)]) {
2106 pr_err("set_ctl: len %u != %u\n",
2107 len, set_arglen[SET_CMDID(cmd)]);
2111 if (copy_from_user(arg, user, len) != 0)
2114 /* increase the module use count */
2115 ip_vs_use_count_inc();
2117 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2122 if (cmd == IP_VS_SO_SET_FLUSH) {
2123 /* Flush the virtual service */
2124 ret = ip_vs_flush();
2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2127 /* Set timeout values for (tcp tcpfin udp) */
2128 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2132 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2134 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2135 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2136 ret = stop_sync_thread(dm->state);
2140 usvc_compat = (struct ip_vs_service_user *)arg;
2141 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2143 /* We only use the new structs internally, so copy userspace compat
2144 * structs to extended internal versions */
2145 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2146 ip_vs_copy_udest_compat(&udest, udest_compat);
2148 if (cmd == IP_VS_SO_SET_ZERO) {
2149 /* if no service address is set, zero counters in all */
2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2151 ret = ip_vs_zero_all();
2156 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2157 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2158 usvc.protocol != IPPROTO_SCTP) {
2159 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2160 usvc.protocol, &usvc.addr.ip,
2161 ntohs(usvc.port), usvc.sched_name);
2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2167 if (usvc.fwmark == 0)
2168 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2169 &usvc.addr, usvc.port);
2171 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2173 if (cmd != IP_VS_SO_SET_ADD
2174 && (svc == NULL || svc->protocol != usvc.protocol)) {
2180 case IP_VS_SO_SET_ADD:
2184 ret = ip_vs_add_service(&usvc, &svc);
2186 case IP_VS_SO_SET_EDIT:
2187 ret = ip_vs_edit_service(svc, &usvc);
2189 case IP_VS_SO_SET_DEL:
2190 ret = ip_vs_del_service(svc);
2194 case IP_VS_SO_SET_ZERO:
2195 ret = ip_vs_zero_service(svc);
2197 case IP_VS_SO_SET_ADDDEST:
2198 ret = ip_vs_add_dest(svc, &udest);
2200 case IP_VS_SO_SET_EDITDEST:
2201 ret = ip_vs_edit_dest(svc, &udest);
2203 case IP_VS_SO_SET_DELDEST:
2204 ret = ip_vs_del_dest(svc, &udest);
2211 mutex_unlock(&__ip_vs_mutex);
2213 /* decrease the module use count */
2214 ip_vs_use_count_dec();
2221 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2223 spin_lock_bh(&src->lock);
2224 memcpy(dst, &src->ustats, sizeof(*dst));
2225 spin_unlock_bh(&src->lock);
2229 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2231 dst->protocol = src->protocol;
2232 dst->addr = src->addr.ip;
2233 dst->port = src->port;
2234 dst->fwmark = src->fwmark;
2235 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2236 dst->flags = src->flags;
2237 dst->timeout = src->timeout / HZ;
2238 dst->netmask = src->netmask;
2239 dst->num_dests = src->num_dests;
2240 ip_vs_copy_stats(&dst->stats, &src->stats);
2244 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2245 struct ip_vs_get_services __user *uptr)
2248 struct ip_vs_service *svc;
2249 struct ip_vs_service_entry entry;
2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2254 /* Only expose IPv4 entries to old interface */
2255 if (svc->af != AF_INET)
2258 if (count >= get->num_services)
2260 memset(&entry, 0, sizeof(entry));
2261 ip_vs_copy_service(&entry, svc);
2262 if (copy_to_user(&uptr->entrytable[count],
2263 &entry, sizeof(entry))) {
2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2273 /* Only expose IPv4 entries to old interface */
2274 if (svc->af != AF_INET)
2277 if (count >= get->num_services)
2279 memset(&entry, 0, sizeof(entry));
2280 ip_vs_copy_service(&entry, svc);
2281 if (copy_to_user(&uptr->entrytable[count],
2282 &entry, sizeof(entry))) {
2294 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2295 struct ip_vs_get_dests __user *uptr)
2297 struct ip_vs_service *svc;
2298 union nf_inet_addr addr = { .ip = get->addr };
2302 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2304 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2309 struct ip_vs_dest *dest;
2310 struct ip_vs_dest_entry entry;
2312 list_for_each_entry(dest, &svc->destinations, n_list) {
2313 if (count >= get->num_dests)
2316 entry.addr = dest->addr.ip;
2317 entry.port = dest->port;
2318 entry.conn_flags = atomic_read(&dest->conn_flags);
2319 entry.weight = atomic_read(&dest->weight);
2320 entry.u_threshold = dest->u_threshold;
2321 entry.l_threshold = dest->l_threshold;
2322 entry.activeconns = atomic_read(&dest->activeconns);
2323 entry.inactconns = atomic_read(&dest->inactconns);
2324 entry.persistconns = atomic_read(&dest->persistconns);
2325 ip_vs_copy_stats(&entry.stats, &dest->stats);
2326 if (copy_to_user(&uptr->entrytable[count],
2327 &entry, sizeof(entry))) {
2339 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2341 #ifdef CONFIG_IP_VS_PROTO_TCP
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout =
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2347 #ifdef CONFIG_IP_VS_PROTO_UDP
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2354 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2355 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2356 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2357 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2358 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2359 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2360 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2362 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2363 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2364 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2365 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2366 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2367 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2368 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2369 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2373 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2375 unsigned char arg[128];
2377 unsigned int copylen;
2379 if (!capable(CAP_NET_ADMIN))
2382 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2385 if (*len < get_arglen[GET_CMDID(cmd)]) {
2386 pr_err("get_ctl: len %u < %u\n",
2387 *len, get_arglen[GET_CMDID(cmd)]);
2391 copylen = get_arglen[GET_CMDID(cmd)];
2395 if (copy_from_user(arg, user, copylen) != 0)
2398 if (mutex_lock_interruptible(&__ip_vs_mutex))
2399 return -ERESTARTSYS;
2402 case IP_VS_SO_GET_VERSION:
2406 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2407 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2408 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2412 *len = strlen(buf)+1;
2416 case IP_VS_SO_GET_INFO:
2418 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE;
2420 info.size = ip_vs_conn_tab_size;
2421 info.num_services = ip_vs_num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0)
2427 case IP_VS_SO_GET_SERVICES:
2429 struct ip_vs_get_services *get;
2432 get = (struct ip_vs_get_services *)arg;
2433 size = sizeof(*get) +
2434 sizeof(struct ip_vs_service_entry) * get->num_services;
2436 pr_err("length: %u != %u\n", *len, size);
2440 ret = __ip_vs_get_service_entries(get, user);
2444 case IP_VS_SO_GET_SERVICE:
2446 struct ip_vs_service_entry *entry;
2447 struct ip_vs_service *svc;
2448 union nf_inet_addr addr;
2450 entry = (struct ip_vs_service_entry *)arg;
2451 addr.ip = entry->addr;
2453 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2455 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2456 &addr, entry->port);
2458 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2466 case IP_VS_SO_GET_DESTS:
2468 struct ip_vs_get_dests *get;
2471 get = (struct ip_vs_get_dests *)arg;
2472 size = sizeof(*get) +
2473 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2475 pr_err("length: %u != %u\n", *len, size);
2479 ret = __ip_vs_get_dest_entries(get, user);
2483 case IP_VS_SO_GET_TIMEOUT:
2485 struct ip_vs_timeout_user t;
2487 __ip_vs_get_timeouts(&t);
2488 if (copy_to_user(user, &t, sizeof(t)) != 0)
2493 case IP_VS_SO_GET_DAEMON:
2495 struct ip_vs_daemon_user d[2];
2497 memset(&d, 0, sizeof(d));
2498 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2499 d[0].state = IP_VS_STATE_MASTER;
2500 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2501 d[0].syncid = ip_vs_master_syncid;
2503 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2504 d[1].state = IP_VS_STATE_BACKUP;
2505 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2506 d[1].syncid = ip_vs_backup_syncid;
2508 if (copy_to_user(user, &d, sizeof(d)) != 0)
2518 mutex_unlock(&__ip_vs_mutex);
2523 static struct nf_sockopt_ops ip_vs_sockopts = {
2525 .set_optmin = IP_VS_BASE_CTL,
2526 .set_optmax = IP_VS_SO_SET_MAX+1,
2527 .set = do_ip_vs_set_ctl,
2528 .get_optmin = IP_VS_BASE_CTL,
2529 .get_optmax = IP_VS_SO_GET_MAX+1,
2530 .get = do_ip_vs_get_ctl,
2531 .owner = THIS_MODULE,
2535 * Generic Netlink interface
2538 /* IPVS genetlink family */
2539 static struct genl_family ip_vs_genl_family = {
2540 .id = GENL_ID_GENERATE,
2542 .name = IPVS_GENL_NAME,
2543 .version = IPVS_GENL_VERSION,
2544 .maxattr = IPVS_CMD_MAX,
2547 /* Policy used for first-level command attributes */
2548 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2549 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2550 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2551 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2552 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2553 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2554 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2557 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2558 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2559 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2560 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2561 .len = IP_VS_IFNAME_MAXLEN },
2562 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2565 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2566 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2567 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2568 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2569 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2570 .len = sizeof(union nf_inet_addr) },
2571 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2572 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2573 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2574 .len = IP_VS_SCHEDNAME_MAXLEN },
2575 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2576 .len = IP_VS_PENAME_MAXLEN },
2577 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2578 .len = sizeof(struct ip_vs_flags) },
2579 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2580 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2581 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2584 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2585 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2586 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2587 .len = sizeof(union nf_inet_addr) },
2588 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2589 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2590 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2591 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2592 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2593 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2594 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2595 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2596 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2599 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2600 struct ip_vs_stats *stats)
2602 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2606 spin_lock_bh(&stats->lock);
2608 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2609 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2610 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2611 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2612 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2613 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2614 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2619 spin_unlock_bh(&stats->lock);
2621 nla_nest_end(skb, nl_stats);
2626 spin_unlock_bh(&stats->lock);
2627 nla_nest_cancel(skb, nl_stats);
2631 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2632 struct ip_vs_service *svc)
2634 struct nlattr *nl_service;
2635 struct ip_vs_flags flags = { .flags = svc->flags,
2638 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2642 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2645 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2647 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2648 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2649 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2652 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2655 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2656 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2657 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2659 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2660 goto nla_put_failure;
2662 nla_nest_end(skb, nl_service);
2667 nla_nest_cancel(skb, nl_service);
2671 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2672 struct ip_vs_service *svc,
2673 struct netlink_callback *cb)
2677 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2678 &ip_vs_genl_family, NLM_F_MULTI,
2679 IPVS_CMD_NEW_SERVICE);
2683 if (ip_vs_genl_fill_service(skb, svc) < 0)
2684 goto nla_put_failure;
2686 return genlmsg_end(skb, hdr);
2689 genlmsg_cancel(skb, hdr);
2693 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2694 struct netlink_callback *cb)
2697 int start = cb->args[0];
2698 struct ip_vs_service *svc;
2700 mutex_lock(&__ip_vs_mutex);
2701 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2702 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2705 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2707 goto nla_put_failure;
2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2718 goto nla_put_failure;
2724 mutex_unlock(&__ip_vs_mutex);
2730 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2731 struct nlattr *nla, int full_entry,
2732 struct ip_vs_service **ret_svc)
2734 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2735 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2736 struct ip_vs_service *svc;
2738 /* Parse mandatory identifying service fields first */
2740 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2743 nla_af = attrs[IPVS_SVC_ATTR_AF];
2744 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2745 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2746 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2747 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2749 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2752 memset(usvc, 0, sizeof(*usvc));
2754 usvc->af = nla_get_u16(nla_af);
2755 #ifdef CONFIG_IP_VS_IPV6
2756 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2758 if (usvc->af != AF_INET)
2760 return -EAFNOSUPPORT;
2763 usvc->protocol = IPPROTO_TCP;
2764 usvc->fwmark = nla_get_u32(nla_fwmark);
2766 usvc->protocol = nla_get_u16(nla_protocol);
2767 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2768 usvc->port = nla_get_u16(nla_port);
2773 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2775 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port);
2779 /* If a full entry was requested, check for the additional fields */
2781 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2783 struct ip_vs_flags flags;
2785 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2786 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2787 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2788 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2789 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2791 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2794 nla_memcpy(&flags, nla_flags, sizeof(flags));
2796 /* prefill flags from service if it already exists */
2798 usvc->flags = svc->flags;
2800 /* set new flags from userland */
2801 usvc->flags = (usvc->flags & ~flags.mask) |
2802 (flags.flags & flags.mask);
2803 usvc->sched_name = nla_data(nla_sched);
2804 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2805 usvc->timeout = nla_get_u32(nla_timeout);
2806 usvc->netmask = nla_get_u32(nla_netmask);
2812 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2814 struct ip_vs_service_user_kern usvc;
2815 struct ip_vs_service *svc;
2818 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2819 return ret ? ERR_PTR(ret) : svc;
2822 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2824 struct nlattr *nl_dest;
2826 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2830 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2831 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2833 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2834 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2835 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2836 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2837 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2838 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2839 atomic_read(&dest->activeconns));
2840 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2841 atomic_read(&dest->inactconns));
2842 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2843 atomic_read(&dest->persistconns));
2845 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2846 goto nla_put_failure;
2848 nla_nest_end(skb, nl_dest);
2853 nla_nest_cancel(skb, nl_dest);
2857 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2858 struct netlink_callback *cb)
2862 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2863 &ip_vs_genl_family, NLM_F_MULTI,
2868 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2869 goto nla_put_failure;
2871 return genlmsg_end(skb, hdr);
2874 genlmsg_cancel(skb, hdr);
2878 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2879 struct netlink_callback *cb)
2882 int start = cb->args[0];
2883 struct ip_vs_service *svc;
2884 struct ip_vs_dest *dest;
2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2887 mutex_lock(&__ip_vs_mutex);
2889 /* Try to find the service for which to dump destinations */
2890 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2894 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2895 if (IS_ERR(svc) || svc == NULL)
2898 /* Dump the destinations */
2899 list_for_each_entry(dest, &svc->destinations, n_list) {
2902 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2904 goto nla_put_failure;
2912 mutex_unlock(&__ip_vs_mutex);
2917 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2918 struct nlattr *nla, int full_entry)
2920 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2921 struct nlattr *nla_addr, *nla_port;
2923 /* Parse mandatory identifying destination fields first */
2925 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2928 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2929 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2931 if (!(nla_addr && nla_port))
2934 memset(udest, 0, sizeof(*udest));
2936 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2937 udest->port = nla_get_u16(nla_port);
2939 /* If a full entry was requested, check for the additional fields */
2941 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2944 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2945 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2946 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2947 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2949 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2952 udest->conn_flags = nla_get_u32(nla_fwd)
2953 & IP_VS_CONN_F_FWD_MASK;
2954 udest->weight = nla_get_u32(nla_weight);
2955 udest->u_threshold = nla_get_u32(nla_u_thresh);
2956 udest->l_threshold = nla_get_u32(nla_l_thresh);
2962 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2963 const char *mcast_ifn, __be32 syncid)
2965 struct nlattr *nl_daemon;
2967 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2971 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2972 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2973 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2975 nla_nest_end(skb, nl_daemon);
2980 nla_nest_cancel(skb, nl_daemon);
2984 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2985 const char *mcast_ifn, __be32 syncid,
2986 struct netlink_callback *cb)
2989 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2990 &ip_vs_genl_family, NLM_F_MULTI,
2991 IPVS_CMD_NEW_DAEMON);
2995 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2996 goto nla_put_failure;
2998 return genlmsg_end(skb, hdr);
3001 genlmsg_cancel(skb, hdr);
3005 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3006 struct netlink_callback *cb)
3008 mutex_lock(&__ip_vs_mutex);
3009 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3010 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3011 ip_vs_master_mcast_ifn,
3012 ip_vs_master_syncid, cb) < 0)
3013 goto nla_put_failure;
3018 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3019 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3020 ip_vs_backup_mcast_ifn,
3021 ip_vs_backup_syncid, cb) < 0)
3022 goto nla_put_failure;
3028 mutex_unlock(&__ip_vs_mutex);
3033 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3040 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3041 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3042 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3045 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3047 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3050 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3053 static int ip_vs_genl_set_config(struct nlattr **attrs)
3055 struct ip_vs_timeout_user t;
3057 __ip_vs_get_timeouts(&t);
3059 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3060 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3062 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3064 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3069 return ip_vs_set_timeout(&t);
3072 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3074 struct ip_vs_service *svc = NULL;
3075 struct ip_vs_service_user_kern usvc;
3076 struct ip_vs_dest_user_kern udest;
3078 int need_full_svc = 0, need_full_dest = 0;
3080 cmd = info->genlhdr->cmd;
3082 mutex_lock(&__ip_vs_mutex);
3084 if (cmd == IPVS_CMD_FLUSH) {
3085 ret = ip_vs_flush();
3087 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3088 ret = ip_vs_genl_set_config(info->attrs);
3090 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3091 cmd == IPVS_CMD_DEL_DAEMON) {
3093 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3095 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3096 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3097 info->attrs[IPVS_CMD_ATTR_DAEMON],
3098 ip_vs_daemon_policy)) {
3103 if (cmd == IPVS_CMD_NEW_DAEMON)
3104 ret = ip_vs_genl_new_daemon(daemon_attrs);
3106 ret = ip_vs_genl_del_daemon(daemon_attrs);
3108 } else if (cmd == IPVS_CMD_ZERO &&
3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3110 ret = ip_vs_zero_all();
3114 /* All following commands require a service argument, so check if we
3115 * received a valid one. We need a full service specification when
3116 * adding / editing a service. Only identifying members otherwise. */
3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3120 ret = ip_vs_genl_parse_service(&usvc,
3121 info->attrs[IPVS_CMD_ATTR_SERVICE],
3122 need_full_svc, &svc);
3126 /* Unless we're adding a new service, the service must already exist */
3127 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3132 /* Destination commands require a valid destination argument. For
3133 * adding / editing a destination, we need a full destination
3135 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3136 cmd == IPVS_CMD_DEL_DEST) {
3137 if (cmd != IPVS_CMD_DEL_DEST)
3140 ret = ip_vs_genl_parse_dest(&udest,
3141 info->attrs[IPVS_CMD_ATTR_DEST],
3148 case IPVS_CMD_NEW_SERVICE:
3150 ret = ip_vs_add_service(&usvc, &svc);
3154 case IPVS_CMD_SET_SERVICE:
3155 ret = ip_vs_edit_service(svc, &usvc);
3157 case IPVS_CMD_DEL_SERVICE:
3158 ret = ip_vs_del_service(svc);
3159 /* do not use svc, it can be freed */
3161 case IPVS_CMD_NEW_DEST:
3162 ret = ip_vs_add_dest(svc, &udest);
3164 case IPVS_CMD_SET_DEST:
3165 ret = ip_vs_edit_dest(svc, &udest);
3167 case IPVS_CMD_DEL_DEST:
3168 ret = ip_vs_del_dest(svc, &udest);
3171 ret = ip_vs_zero_service(svc);
3178 mutex_unlock(&__ip_vs_mutex);
3183 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3185 struct sk_buff *msg;
3187 int ret, cmd, reply_cmd;
3189 cmd = info->genlhdr->cmd;
3191 if (cmd == IPVS_CMD_GET_SERVICE)
3192 reply_cmd = IPVS_CMD_NEW_SERVICE;
3193 else if (cmd == IPVS_CMD_GET_INFO)
3194 reply_cmd = IPVS_CMD_SET_INFO;
3195 else if (cmd == IPVS_CMD_GET_CONFIG)
3196 reply_cmd = IPVS_CMD_SET_CONFIG;
3198 pr_err("unknown Generic Netlink command\n");
3202 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3206 mutex_lock(&__ip_vs_mutex);
3208 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3210 goto nla_put_failure;
3213 case IPVS_CMD_GET_SERVICE:
3215 struct ip_vs_service *svc;
3217 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3222 ret = ip_vs_genl_fill_service(msg, svc);
3224 goto nla_put_failure;
3233 case IPVS_CMD_GET_CONFIG:
3235 struct ip_vs_timeout_user t;
3237 __ip_vs_get_timeouts(&t);
3238 #ifdef CONFIG_IP_VS_PROTO_TCP
3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3243 #ifdef CONFIG_IP_VS_PROTO_UDP
3244 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3250 case IPVS_CMD_GET_INFO:
3251 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3252 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3253 ip_vs_conn_tab_size);
3257 genlmsg_end(msg, reply);
3258 ret = genlmsg_reply(msg, info);
3262 pr_err("not enough space in Netlink message\n");
3268 mutex_unlock(&__ip_vs_mutex);
3274 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3276 .cmd = IPVS_CMD_NEW_SERVICE,
3277 .flags = GENL_ADMIN_PERM,
3278 .policy = ip_vs_cmd_policy,
3279 .doit = ip_vs_genl_set_cmd,
3282 .cmd = IPVS_CMD_SET_SERVICE,
3283 .flags = GENL_ADMIN_PERM,
3284 .policy = ip_vs_cmd_policy,
3285 .doit = ip_vs_genl_set_cmd,
3288 .cmd = IPVS_CMD_DEL_SERVICE,
3289 .flags = GENL_ADMIN_PERM,
3290 .policy = ip_vs_cmd_policy,
3291 .doit = ip_vs_genl_set_cmd,
3294 .cmd = IPVS_CMD_GET_SERVICE,
3295 .flags = GENL_ADMIN_PERM,
3296 .doit = ip_vs_genl_get_cmd,
3297 .dumpit = ip_vs_genl_dump_services,
3298 .policy = ip_vs_cmd_policy,
3301 .cmd = IPVS_CMD_NEW_DEST,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3307 .cmd = IPVS_CMD_SET_DEST,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .doit = ip_vs_genl_set_cmd,
3313 .cmd = IPVS_CMD_DEL_DEST,
3314 .flags = GENL_ADMIN_PERM,
3315 .policy = ip_vs_cmd_policy,
3316 .doit = ip_vs_genl_set_cmd,
3319 .cmd = IPVS_CMD_GET_DEST,
3320 .flags = GENL_ADMIN_PERM,
3321 .policy = ip_vs_cmd_policy,
3322 .dumpit = ip_vs_genl_dump_dests,
3325 .cmd = IPVS_CMD_NEW_DAEMON,
3326 .flags = GENL_ADMIN_PERM,
3327 .policy = ip_vs_cmd_policy,
3328 .doit = ip_vs_genl_set_cmd,
3331 .cmd = IPVS_CMD_DEL_DAEMON,
3332 .flags = GENL_ADMIN_PERM,
3333 .policy = ip_vs_cmd_policy,
3334 .doit = ip_vs_genl_set_cmd,
3337 .cmd = IPVS_CMD_GET_DAEMON,
3338 .flags = GENL_ADMIN_PERM,
3339 .dumpit = ip_vs_genl_dump_daemons,
3342 .cmd = IPVS_CMD_SET_CONFIG,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3348 .cmd = IPVS_CMD_GET_CONFIG,
3349 .flags = GENL_ADMIN_PERM,
3350 .doit = ip_vs_genl_get_cmd,
3353 .cmd = IPVS_CMD_GET_INFO,
3354 .flags = GENL_ADMIN_PERM,
3355 .doit = ip_vs_genl_get_cmd,
3358 .cmd = IPVS_CMD_ZERO,
3359 .flags = GENL_ADMIN_PERM,
3360 .policy = ip_vs_cmd_policy,
3361 .doit = ip_vs_genl_set_cmd,
3364 .cmd = IPVS_CMD_FLUSH,
3365 .flags = GENL_ADMIN_PERM,
3366 .doit = ip_vs_genl_set_cmd,
3370 static int __init ip_vs_genl_register(void)
3372 return genl_register_family_with_ops(&ip_vs_genl_family,
3373 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3376 static void ip_vs_genl_unregister(void)
3378 genl_unregister_family(&ip_vs_genl_family);
3381 /* End of Generic Netlink interface definitions */
3384 int __init ip_vs_control_init(void)
3391 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3393 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3394 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3396 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3397 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3401 ret = nf_register_sockopt(&ip_vs_sockopts);
3403 pr_err("cannot register sockopt.\n");
3407 ret = ip_vs_genl_register();
3409 pr_err("cannot register Generic Netlink interface.\n");
3410 nf_unregister_sockopt(&ip_vs_sockopts);
3414 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3415 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3417 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3419 ip_vs_new_estimator(&ip_vs_stats);
3421 /* Hook the defense timer */
3422 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3429 void ip_vs_control_cleanup(void)
3432 ip_vs_trash_cleanup();
3433 cancel_delayed_work_sync(&defense_work);
3434 cancel_work_sync(&defense_work.work);
3435 ip_vs_kill_estimator(&ip_vs_stats);
3436 unregister_sysctl_table(sysctl_header);
3437 proc_net_remove(&init_net, "ip_vs_stats");
3438 proc_net_remove(&init_net, "ip_vs");
3439 ip_vs_genl_unregister();
3440 nf_unregister_sockopt(&ip_vs_sockopts);