]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/netfilter/ipvs/ip_vs_ctl.c
d12a13c497ba2374f402e14e22c5671ccf5d1f51
[karo-tx-linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <net/ip.h>
42 #ifdef CONFIG_IP_VS_IPV6
43 #include <net/ipv6.h>
44 #include <net/ip6_route.h>
45 #endif
46 #include <net/route.h>
47 #include <net/sock.h>
48 #include <net/genetlink.h>
49
50 #include <asm/uaccess.h>
51
52 #include <net/ip_vs.h>
53
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
56
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
79
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
93 #endif
94 int sysctl_ip_vs_snat_reroute = 1;
95 int sysctl_ip_vs_sync_ver = 1;          /* Default version of sync proto */
96
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
99
100 int ip_vs_get_debug_level(void)
101 {
102         return sysctl_ip_vs_debug_level;
103 }
104 #endif
105
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109 {
110         struct rt6_info *rt;
111         struct flowi fl = {
112                 .oif = 0,
113                 .nl_u = {
114                         .ip6_u = {
115                                 .daddr = *addr,
116                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
117         };
118
119         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
121                         return 1;
122
123         return 0;
124 }
125 #endif
126 /*
127  *      update_defense_level is called from keventd and from sysctl,
128  *      so it needs to protect itself from softirqs
129  */
130 static void update_defense_level(void)
131 {
132         struct sysinfo i;
133         static int old_secure_tcp = 0;
134         int availmem;
135         int nomem;
136         int to_change = -1;
137
138         /* we only count free and buffered memory (in pages) */
139         si_meminfo(&i);
140         availmem = i.freeram + i.bufferram;
141         /* however in linux 2.5 the i.bufferram is total page cache size,
142            we need adjust it */
143         /* si_swapinfo(&i); */
144         /* availmem = availmem - (i.totalswap - i.freeswap); */
145
146         nomem = (availmem < sysctl_ip_vs_amemthresh);
147
148         local_bh_disable();
149
150         /* drop_entry */
151         spin_lock(&__ip_vs_dropentry_lock);
152         switch (sysctl_ip_vs_drop_entry) {
153         case 0:
154                 atomic_set(&ip_vs_dropentry, 0);
155                 break;
156         case 1:
157                 if (nomem) {
158                         atomic_set(&ip_vs_dropentry, 1);
159                         sysctl_ip_vs_drop_entry = 2;
160                 } else {
161                         atomic_set(&ip_vs_dropentry, 0);
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         atomic_set(&ip_vs_dropentry, 1);
167                 } else {
168                         atomic_set(&ip_vs_dropentry, 0);
169                         sysctl_ip_vs_drop_entry = 1;
170                 };
171                 break;
172         case 3:
173                 atomic_set(&ip_vs_dropentry, 1);
174                 break;
175         }
176         spin_unlock(&__ip_vs_dropentry_lock);
177
178         /* drop_packet */
179         spin_lock(&__ip_vs_droppacket_lock);
180         switch (sysctl_ip_vs_drop_packet) {
181         case 0:
182                 ip_vs_drop_rate = 0;
183                 break;
184         case 1:
185                 if (nomem) {
186                         ip_vs_drop_rate = ip_vs_drop_counter
187                                 = sysctl_ip_vs_amemthresh /
188                                 (sysctl_ip_vs_amemthresh-availmem);
189                         sysctl_ip_vs_drop_packet = 2;
190                 } else {
191                         ip_vs_drop_rate = 0;
192                 }
193                 break;
194         case 2:
195                 if (nomem) {
196                         ip_vs_drop_rate = ip_vs_drop_counter
197                                 = sysctl_ip_vs_amemthresh /
198                                 (sysctl_ip_vs_amemthresh-availmem);
199                 } else {
200                         ip_vs_drop_rate = 0;
201                         sysctl_ip_vs_drop_packet = 1;
202                 }
203                 break;
204         case 3:
205                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
206                 break;
207         }
208         spin_unlock(&__ip_vs_droppacket_lock);
209
210         /* secure_tcp */
211         spin_lock(&ip_vs_securetcp_lock);
212         switch (sysctl_ip_vs_secure_tcp) {
213         case 0:
214                 if (old_secure_tcp >= 2)
215                         to_change = 0;
216                 break;
217         case 1:
218                 if (nomem) {
219                         if (old_secure_tcp < 2)
220                                 to_change = 1;
221                         sysctl_ip_vs_secure_tcp = 2;
222                 } else {
223                         if (old_secure_tcp >= 2)
224                                 to_change = 0;
225                 }
226                 break;
227         case 2:
228                 if (nomem) {
229                         if (old_secure_tcp < 2)
230                                 to_change = 1;
231                 } else {
232                         if (old_secure_tcp >= 2)
233                                 to_change = 0;
234                         sysctl_ip_vs_secure_tcp = 1;
235                 }
236                 break;
237         case 3:
238                 if (old_secure_tcp < 2)
239                         to_change = 1;
240                 break;
241         }
242         old_secure_tcp = sysctl_ip_vs_secure_tcp;
243         if (to_change >= 0)
244                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
245         spin_unlock(&ip_vs_securetcp_lock);
246
247         local_bh_enable();
248 }
249
250
251 /*
252  *      Timer for checking the defense
253  */
254 #define DEFENSE_TIMER_PERIOD    1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
257
258 static void defense_work_handler(struct work_struct *work)
259 {
260         update_defense_level();
261         if (atomic_read(&ip_vs_dropentry))
262                 ip_vs_random_dropentry();
263
264         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
265 }
266
267 int
268 ip_vs_use_count_inc(void)
269 {
270         return try_module_get(THIS_MODULE);
271 }
272
273 void
274 ip_vs_use_count_dec(void)
275 {
276         module_put(THIS_MODULE);
277 }
278
279
280 /*
281  *      Hash table: for virtual service lookups
282  */
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
286
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
291
292 /*
293  *      Hash table: for real service lookups
294  */
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
298
299 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
300
301 /*
302  *      Trash for destinations
303  */
304 static LIST_HEAD(ip_vs_dest_trash);
305
306 /*
307  *      FTP & NULL virtual service counters
308  */
309 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
311
312
313 /*
314  *      Returns hash value for virtual service
315  */
316 static __inline__ unsigned
317 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
318                   __be16 port)
319 {
320         register unsigned porth = ntohs(port);
321         __be32 addr_fold = addr->ip;
322
323 #ifdef CONFIG_IP_VS_IPV6
324         if (af == AF_INET6)
325                 addr_fold = addr->ip6[0]^addr->ip6[1]^
326                             addr->ip6[2]^addr->ip6[3];
327 #endif
328
329         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
330                 & IP_VS_SVC_TAB_MASK;
331 }
332
333 /*
334  *      Returns hash value of fwmark for virtual service lookup
335  */
336 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
337 {
338         return fwmark & IP_VS_SVC_TAB_MASK;
339 }
340
341 /*
342  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343  *      or in the ip_vs_svc_fwm_table by fwmark.
344  *      Should be called with locked tables.
345  */
346 static int ip_vs_svc_hash(struct ip_vs_service *svc)
347 {
348         unsigned hash;
349
350         if (svc->flags & IP_VS_SVC_F_HASHED) {
351                 pr_err("%s(): request for already hashed, called from %pF\n",
352                        __func__, __builtin_return_address(0));
353                 return 0;
354         }
355
356         if (svc->fwmark == 0) {
357                 /*
358                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
359                  */
360                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
361                                          svc->port);
362                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
363         } else {
364                 /*
365                  *  Hash it by fwmark in ip_vs_svc_fwm_table
366                  */
367                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
369         }
370
371         svc->flags |= IP_VS_SVC_F_HASHED;
372         /* increase its refcnt because it is referenced by the svc table */
373         atomic_inc(&svc->refcnt);
374         return 1;
375 }
376
377
378 /*
379  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380  *      Should be called with locked tables.
381  */
382 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
383 {
384         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
385                 pr_err("%s(): request for unhash flagged, called from %pF\n",
386                        __func__, __builtin_return_address(0));
387                 return 0;
388         }
389
390         if (svc->fwmark == 0) {
391                 /* Remove it from the ip_vs_svc_table table */
392                 list_del(&svc->s_list);
393         } else {
394                 /* Remove it from the ip_vs_svc_fwm_table table */
395                 list_del(&svc->f_list);
396         }
397
398         svc->flags &= ~IP_VS_SVC_F_HASHED;
399         atomic_dec(&svc->refcnt);
400         return 1;
401 }
402
403
404 /*
405  *      Get service by {proto,addr,port} in the service table.
406  */
407 static inline struct ip_vs_service *
408 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
409                     __be16 vport)
410 {
411         unsigned hash;
412         struct ip_vs_service *svc;
413
414         /* Check for "full" addressed entries */
415         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
416
417         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
418                 if ((svc->af == af)
419                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
420                     && (svc->port == vport)
421                     && (svc->protocol == protocol)) {
422                         /* HIT */
423                         return svc;
424                 }
425         }
426
427         return NULL;
428 }
429
430
431 /*
432  *      Get service by {fwmark} in the service table.
433  */
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
436 {
437         unsigned hash;
438         struct ip_vs_service *svc;
439
440         /* Check for fwmark addressed entries */
441         hash = ip_vs_svc_fwm_hashkey(fwmark);
442
443         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444                 if (svc->fwmark == fwmark && svc->af == af) {
445                         /* HIT */
446                         return svc;
447                 }
448         }
449
450         return NULL;
451 }
452
453 struct ip_vs_service *
454 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
455                   const union nf_inet_addr *vaddr, __be16 vport)
456 {
457         struct ip_vs_service *svc;
458
459         read_lock(&__ip_vs_svc_lock);
460
461         /*
462          *      Check the table hashed by fwmark first
463          */
464         if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
465                 goto out;
466
467         /*
468          *      Check the table hashed by <protocol,addr,port>
469          *      for "full" addressed entries
470          */
471         svc = __ip_vs_service_find(af, protocol, vaddr, vport);
472
473         if (svc == NULL
474             && protocol == IPPROTO_TCP
475             && atomic_read(&ip_vs_ftpsvc_counter)
476             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
477                 /*
478                  * Check if ftp service entry exists, the packet
479                  * might belong to FTP data connections.
480                  */
481                 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
482         }
483
484         if (svc == NULL
485             && atomic_read(&ip_vs_nullsvc_counter)) {
486                 /*
487                  * Check if the catch-all port (port zero) exists
488                  */
489                 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
490         }
491
492   out:
493         if (svc)
494                 atomic_inc(&svc->usecnt);
495         read_unlock(&__ip_vs_svc_lock);
496
497         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498                       fwmark, ip_vs_proto_name(protocol),
499                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500                       svc ? "hit" : "not hit");
501
502         return svc;
503 }
504
505
506 static inline void
507 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
508 {
509         atomic_inc(&svc->refcnt);
510         dest->svc = svc;
511 }
512
513 static void
514 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
515 {
516         struct ip_vs_service *svc = dest->svc;
517
518         dest->svc = NULL;
519         if (atomic_dec_and_test(&svc->refcnt)) {
520                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521                               svc->fwmark,
522                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
523                               ntohs(svc->port), atomic_read(&svc->usecnt));
524                 kfree(svc);
525         }
526 }
527
528
529 /*
530  *      Returns hash value for real service
531  */
532 static inline unsigned ip_vs_rs_hashkey(int af,
533                                             const union nf_inet_addr *addr,
534                                             __be16 port)
535 {
536         register unsigned porth = ntohs(port);
537         __be32 addr_fold = addr->ip;
538
539 #ifdef CONFIG_IP_VS_IPV6
540         if (af == AF_INET6)
541                 addr_fold = addr->ip6[0]^addr->ip6[1]^
542                             addr->ip6[2]^addr->ip6[3];
543 #endif
544
545         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
546                 & IP_VS_RTAB_MASK;
547 }
548
549 /*
550  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
551  *      should be called with locked tables.
552  */
553 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
554 {
555         unsigned hash;
556
557         if (!list_empty(&dest->d_list)) {
558                 return 0;
559         }
560
561         /*
562          *      Hash by proto,addr,port,
563          *      which are the parameters of the real service.
564          */
565         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
566
567         list_add(&dest->d_list, &ip_vs_rtable[hash]);
568
569         return 1;
570 }
571
572 /*
573  *      UNhashes ip_vs_dest from ip_vs_rtable.
574  *      should be called with locked tables.
575  */
576 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
577 {
578         /*
579          * Remove it from the ip_vs_rtable table.
580          */
581         if (!list_empty(&dest->d_list)) {
582                 list_del(&dest->d_list);
583                 INIT_LIST_HEAD(&dest->d_list);
584         }
585
586         return 1;
587 }
588
589 /*
590  *      Lookup real service by <proto,addr,port> in the real service table.
591  */
592 struct ip_vs_dest *
593 ip_vs_lookup_real_service(int af, __u16 protocol,
594                           const union nf_inet_addr *daddr,
595                           __be16 dport)
596 {
597         unsigned hash;
598         struct ip_vs_dest *dest;
599
600         /*
601          *      Check for "full" addressed entries
602          *      Return the first found entry
603          */
604         hash = ip_vs_rs_hashkey(af, daddr, dport);
605
606         read_lock(&__ip_vs_rs_lock);
607         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
608                 if ((dest->af == af)
609                     && ip_vs_addr_equal(af, &dest->addr, daddr)
610                     && (dest->port == dport)
611                     && ((dest->protocol == protocol) ||
612                         dest->vfwmark)) {
613                         /* HIT */
614                         read_unlock(&__ip_vs_rs_lock);
615                         return dest;
616                 }
617         }
618         read_unlock(&__ip_vs_rs_lock);
619
620         return NULL;
621 }
622
623 /*
624  *      Lookup destination by {addr,port} in the given service
625  */
626 static struct ip_vs_dest *
627 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
628                   __be16 dport)
629 {
630         struct ip_vs_dest *dest;
631
632         /*
633          * Find the destination for the given service
634          */
635         list_for_each_entry(dest, &svc->destinations, n_list) {
636                 if ((dest->af == svc->af)
637                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
638                     && (dest->port == dport)) {
639                         /* HIT */
640                         return dest;
641                 }
642         }
643
644         return NULL;
645 }
646
647 /*
648  * Find destination by {daddr,dport,vaddr,protocol}
649  * Cretaed to be used in ip_vs_process_message() in
650  * the backup synchronization daemon. It finds the
651  * destination to be bound to the received connection
652  * on the backup.
653  *
654  * ip_vs_lookup_real_service() looked promissing, but
655  * seems not working as expected.
656  */
657 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
658                                    __be16 dport,
659                                    const union nf_inet_addr *vaddr,
660                                    __be16 vport, __u16 protocol, __u32 fwmark)
661 {
662         struct ip_vs_dest *dest;
663         struct ip_vs_service *svc;
664
665         svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
666         if (!svc)
667                 return NULL;
668         dest = ip_vs_lookup_dest(svc, daddr, dport);
669         if (dest)
670                 atomic_inc(&dest->refcnt);
671         ip_vs_service_put(svc);
672         return dest;
673 }
674
675 /*
676  *  Lookup dest by {svc,addr,port} in the destination trash.
677  *  The destination trash is used to hold the destinations that are removed
678  *  from the service table but are still referenced by some conn entries.
679  *  The reason to add the destination trash is when the dest is temporary
680  *  down (either by administrator or by monitor program), the dest can be
681  *  picked back from the trash, the remaining connections to the dest can
682  *  continue, and the counting information of the dest is also useful for
683  *  scheduling.
684  */
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687                      __be16 dport)
688 {
689         struct ip_vs_dest *dest, *nxt;
690
691         /*
692          * Find the destination in trash
693          */
694         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696                               "dest->refcnt=%d\n",
697                               dest->vfwmark,
698                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
699                               ntohs(dest->port),
700                               atomic_read(&dest->refcnt));
701                 if (dest->af == svc->af &&
702                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703                     dest->port == dport &&
704                     dest->vfwmark == svc->fwmark &&
705                     dest->protocol == svc->protocol &&
706                     (svc->fwmark ||
707                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708                       dest->vport == svc->port))) {
709                         /* HIT */
710                         return dest;
711                 }
712
713                 /*
714                  * Try to purge the destination from trash if not referenced
715                  */
716                 if (atomic_read(&dest->refcnt) == 1) {
717                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718                                       "from trash\n",
719                                       dest->vfwmark,
720                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
721                                       ntohs(dest->port));
722                         list_del(&dest->n_list);
723                         ip_vs_dst_reset(dest);
724                         __ip_vs_unbind_svc(dest);
725                         kfree(dest);
726                 }
727         }
728
729         return NULL;
730 }
731
732
733 /*
734  *  Clean up all the destinations in the trash
735  *  Called by the ip_vs_control_cleanup()
736  *
737  *  When the ip_vs_control_clearup is activated by ipvs module exit,
738  *  the service tables must have been flushed and all the connections
739  *  are expired, and the refcnt of each destination in the trash must
740  *  be 1, so we simply release them here.
741  */
742 static void ip_vs_trash_cleanup(void)
743 {
744         struct ip_vs_dest *dest, *nxt;
745
746         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747                 list_del(&dest->n_list);
748                 ip_vs_dst_reset(dest);
749                 __ip_vs_unbind_svc(dest);
750                 kfree(dest);
751         }
752 }
753
754
755 static void
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
757 {
758         spin_lock_bh(&stats->lock);
759
760         memset(&stats->ustats, 0, sizeof(stats->ustats));
761         ip_vs_zero_estimator(stats);
762
763         spin_unlock_bh(&stats->lock);
764 }
765
766 /*
767  *      Update a destination in the given service
768  */
769 static void
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771                     struct ip_vs_dest_user_kern *udest, int add)
772 {
773         int conn_flags;
774
775         /* set the weight and the flags */
776         atomic_set(&dest->weight, udest->weight);
777         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778         conn_flags |= IP_VS_CONN_F_INACTIVE;
779
780         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
782                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783         } else {
784                 /*
785                  *    Put the real service in ip_vs_rtable if not present.
786                  *    For now only for NAT!
787                  */
788                 write_lock_bh(&__ip_vs_rs_lock);
789                 ip_vs_rs_hash(dest);
790                 write_unlock_bh(&__ip_vs_rs_lock);
791         }
792         atomic_set(&dest->conn_flags, conn_flags);
793
794         /* bind the service */
795         if (!dest->svc) {
796                 __ip_vs_bind_svc(dest, svc);
797         } else {
798                 if (dest->svc != svc) {
799                         __ip_vs_unbind_svc(dest);
800                         ip_vs_zero_stats(&dest->stats);
801                         __ip_vs_bind_svc(dest, svc);
802                 }
803         }
804
805         /* set the dest status flags */
806         dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810         dest->u_threshold = udest->u_threshold;
811         dest->l_threshold = udest->l_threshold;
812
813         spin_lock(&dest->dst_lock);
814         ip_vs_dst_reset(dest);
815         spin_unlock(&dest->dst_lock);
816
817         if (add)
818                 ip_vs_new_estimator(&dest->stats);
819
820         write_lock_bh(&__ip_vs_svc_lock);
821
822         /* Wait until all other svc users go away */
823         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824
825         if (add) {
826                 list_add(&dest->n_list, &svc->destinations);
827                 svc->num_dests++;
828         }
829
830         /* call the update_service, because server weight may be changed */
831         if (svc->scheduler->update_service)
832                 svc->scheduler->update_service(svc);
833
834         write_unlock_bh(&__ip_vs_svc_lock);
835 }
836
837
838 /*
839  *      Create a destination for the given service
840  */
841 static int
842 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843                struct ip_vs_dest **dest_p)
844 {
845         struct ip_vs_dest *dest;
846         unsigned atype;
847
848         EnterFunction(2);
849
850 #ifdef CONFIG_IP_VS_IPV6
851         if (svc->af == AF_INET6) {
852                 atype = ipv6_addr_type(&udest->addr.in6);
853                 if ((!(atype & IPV6_ADDR_UNICAST) ||
854                         atype & IPV6_ADDR_LINKLOCAL) &&
855                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
856                         return -EINVAL;
857         } else
858 #endif
859         {
860                 atype = inet_addr_type(&init_net, udest->addr.ip);
861                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
862                         return -EINVAL;
863         }
864
865         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
866         if (dest == NULL) {
867                 pr_err("%s(): no memory.\n", __func__);
868                 return -ENOMEM;
869         }
870
871         dest->af = svc->af;
872         dest->protocol = svc->protocol;
873         dest->vaddr = svc->addr;
874         dest->vport = svc->port;
875         dest->vfwmark = svc->fwmark;
876         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877         dest->port = udest->port;
878
879         atomic_set(&dest->activeconns, 0);
880         atomic_set(&dest->inactconns, 0);
881         atomic_set(&dest->persistconns, 0);
882         atomic_set(&dest->refcnt, 1);
883
884         INIT_LIST_HEAD(&dest->d_list);
885         spin_lock_init(&dest->dst_lock);
886         spin_lock_init(&dest->stats.lock);
887         __ip_vs_update_dest(svc, dest, udest, 1);
888
889         *dest_p = dest;
890
891         LeaveFunction(2);
892         return 0;
893 }
894
895
896 /*
897  *      Add a destination into an existing service
898  */
899 static int
900 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
901 {
902         struct ip_vs_dest *dest;
903         union nf_inet_addr daddr;
904         __be16 dport = udest->port;
905         int ret;
906
907         EnterFunction(2);
908
909         if (udest->weight < 0) {
910                 pr_err("%s(): server weight less than zero\n", __func__);
911                 return -ERANGE;
912         }
913
914         if (udest->l_threshold > udest->u_threshold) {
915                 pr_err("%s(): lower threshold is higher than upper threshold\n",
916                         __func__);
917                 return -ERANGE;
918         }
919
920         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
921
922         /*
923          * Check if the dest already exists in the list
924          */
925         dest = ip_vs_lookup_dest(svc, &daddr, dport);
926
927         if (dest != NULL) {
928                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
929                 return -EEXIST;
930         }
931
932         /*
933          * Check if the dest already exists in the trash and
934          * is from the same service
935          */
936         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
937
938         if (dest != NULL) {
939                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
940                               "dest->refcnt=%d, service %u/%s:%u\n",
941                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
942                               atomic_read(&dest->refcnt),
943                               dest->vfwmark,
944                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
945                               ntohs(dest->vport));
946
947                 /*
948                  * Get the destination from the trash
949                  */
950                 list_del(&dest->n_list);
951
952                 __ip_vs_update_dest(svc, dest, udest, 1);
953                 ret = 0;
954         } else {
955                 /*
956                  * Allocate and initialize the dest structure
957                  */
958                 ret = ip_vs_new_dest(svc, udest, &dest);
959         }
960         LeaveFunction(2);
961
962         return ret;
963 }
964
965
966 /*
967  *      Edit a destination in the given service
968  */
969 static int
970 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
971 {
972         struct ip_vs_dest *dest;
973         union nf_inet_addr daddr;
974         __be16 dport = udest->port;
975
976         EnterFunction(2);
977
978         if (udest->weight < 0) {
979                 pr_err("%s(): server weight less than zero\n", __func__);
980                 return -ERANGE;
981         }
982
983         if (udest->l_threshold > udest->u_threshold) {
984                 pr_err("%s(): lower threshold is higher than upper threshold\n",
985                         __func__);
986                 return -ERANGE;
987         }
988
989         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
990
991         /*
992          *  Lookup the destination list
993          */
994         dest = ip_vs_lookup_dest(svc, &daddr, dport);
995
996         if (dest == NULL) {
997                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
998                 return -ENOENT;
999         }
1000
1001         __ip_vs_update_dest(svc, dest, udest, 0);
1002         LeaveFunction(2);
1003
1004         return 0;
1005 }
1006
1007
1008 /*
1009  *      Delete a destination (must be already unlinked from the service)
1010  */
1011 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1012 {
1013         ip_vs_kill_estimator(&dest->stats);
1014
1015         /*
1016          *  Remove it from the d-linked list with the real services.
1017          */
1018         write_lock_bh(&__ip_vs_rs_lock);
1019         ip_vs_rs_unhash(dest);
1020         write_unlock_bh(&__ip_vs_rs_lock);
1021
1022         /*
1023          *  Decrease the refcnt of the dest, and free the dest
1024          *  if nobody refers to it (refcnt=0). Otherwise, throw
1025          *  the destination into the trash.
1026          */
1027         if (atomic_dec_and_test(&dest->refcnt)) {
1028                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029                               dest->vfwmark,
1030                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031                               ntohs(dest->port));
1032                 ip_vs_dst_reset(dest);
1033                 /* simply decrease svc->refcnt here, let the caller check
1034                    and release the service if nobody refers to it.
1035                    Only user context can release destination and service,
1036                    and only one user context can update virtual service at a
1037                    time, so the operation here is OK */
1038                 atomic_dec(&dest->svc->refcnt);
1039                 kfree(dest);
1040         } else {
1041                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042                               "dest->refcnt=%d\n",
1043                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044                               ntohs(dest->port),
1045                               atomic_read(&dest->refcnt));
1046                 list_add(&dest->n_list, &ip_vs_dest_trash);
1047                 atomic_inc(&dest->refcnt);
1048         }
1049 }
1050
1051
1052 /*
1053  *      Unlink a destination from the given service
1054  */
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056                                 struct ip_vs_dest *dest,
1057                                 int svcupd)
1058 {
1059         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060
1061         /*
1062          *  Remove it from the d-linked destination list.
1063          */
1064         list_del(&dest->n_list);
1065         svc->num_dests--;
1066
1067         /*
1068          *  Call the update_service function of its scheduler
1069          */
1070         if (svcupd && svc->scheduler->update_service)
1071                         svc->scheduler->update_service(svc);
1072 }
1073
1074
1075 /*
1076  *      Delete a destination server in the given service
1077  */
1078 static int
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1080 {
1081         struct ip_vs_dest *dest;
1082         __be16 dport = udest->port;
1083
1084         EnterFunction(2);
1085
1086         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1087
1088         if (dest == NULL) {
1089                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1090                 return -ENOENT;
1091         }
1092
1093         write_lock_bh(&__ip_vs_svc_lock);
1094
1095         /*
1096          *      Wait until all other svc users go away.
1097          */
1098         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1099
1100         /*
1101          *      Unlink dest from the service
1102          */
1103         __ip_vs_unlink_dest(svc, dest, 1);
1104
1105         write_unlock_bh(&__ip_vs_svc_lock);
1106
1107         /*
1108          *      Delete the destination
1109          */
1110         __ip_vs_del_dest(dest);
1111
1112         LeaveFunction(2);
1113
1114         return 0;
1115 }
1116
1117
1118 /*
1119  *      Add a service into the service hash table
1120  */
1121 static int
1122 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1123                   struct ip_vs_service **svc_p)
1124 {
1125         int ret = 0;
1126         struct ip_vs_scheduler *sched = NULL;
1127         struct ip_vs_pe *pe = NULL;
1128         struct ip_vs_service *svc = NULL;
1129
1130         /* increase the module use count */
1131         ip_vs_use_count_inc();
1132
1133         /* Lookup the scheduler by 'u->sched_name' */
1134         sched = ip_vs_scheduler_get(u->sched_name);
1135         if (sched == NULL) {
1136                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1137                 ret = -ENOENT;
1138                 goto out_err;
1139         }
1140
1141         if (u->pe_name && *u->pe_name) {
1142                 pe = ip_vs_pe_getbyname(u->pe_name);
1143                 if (pe == NULL) {
1144                         pr_info("persistence engine module ip_vs_pe_%s "
1145                                 "not found\n", u->pe_name);
1146                         ret = -ENOENT;
1147                         goto out_err;
1148                 }
1149         }
1150
1151 #ifdef CONFIG_IP_VS_IPV6
1152         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1153                 ret = -EINVAL;
1154                 goto out_err;
1155         }
1156 #endif
1157
1158         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1159         if (svc == NULL) {
1160                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1161                 ret = -ENOMEM;
1162                 goto out_err;
1163         }
1164
1165         /* I'm the first user of the service */
1166         atomic_set(&svc->usecnt, 0);
1167         atomic_set(&svc->refcnt, 0);
1168
1169         svc->af = u->af;
1170         svc->protocol = u->protocol;
1171         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1172         svc->port = u->port;
1173         svc->fwmark = u->fwmark;
1174         svc->flags = u->flags;
1175         svc->timeout = u->timeout * HZ;
1176         svc->netmask = u->netmask;
1177
1178         INIT_LIST_HEAD(&svc->destinations);
1179         rwlock_init(&svc->sched_lock);
1180         spin_lock_init(&svc->stats.lock);
1181
1182         /* Bind the scheduler */
1183         ret = ip_vs_bind_scheduler(svc, sched);
1184         if (ret)
1185                 goto out_err;
1186         sched = NULL;
1187
1188         /* Bind the ct retriever */
1189         ip_vs_bind_pe(svc, pe);
1190         pe = NULL;
1191
1192         /* Update the virtual service counters */
1193         if (svc->port == FTPPORT)
1194                 atomic_inc(&ip_vs_ftpsvc_counter);
1195         else if (svc->port == 0)
1196                 atomic_inc(&ip_vs_nullsvc_counter);
1197
1198         ip_vs_new_estimator(&svc->stats);
1199
1200         /* Count only IPv4 services for old get/setsockopt interface */
1201         if (svc->af == AF_INET)
1202                 ip_vs_num_services++;
1203
1204         /* Hash the service into the service table */
1205         write_lock_bh(&__ip_vs_svc_lock);
1206         ip_vs_svc_hash(svc);
1207         write_unlock_bh(&__ip_vs_svc_lock);
1208
1209         *svc_p = svc;
1210         return 0;
1211
1212  out_err:
1213         if (svc != NULL) {
1214                 ip_vs_unbind_scheduler(svc);
1215                 if (svc->inc) {
1216                         local_bh_disable();
1217                         ip_vs_app_inc_put(svc->inc);
1218                         local_bh_enable();
1219                 }
1220                 kfree(svc);
1221         }
1222         ip_vs_scheduler_put(sched);
1223         ip_vs_pe_put(pe);
1224
1225         /* decrease the module use count */
1226         ip_vs_use_count_dec();
1227
1228         return ret;
1229 }
1230
1231
1232 /*
1233  *      Edit a service and bind it with a new scheduler
1234  */
1235 static int
1236 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1237 {
1238         struct ip_vs_scheduler *sched, *old_sched;
1239         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1240         int ret = 0;
1241
1242         /*
1243          * Lookup the scheduler, by 'u->sched_name'
1244          */
1245         sched = ip_vs_scheduler_get(u->sched_name);
1246         if (sched == NULL) {
1247                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1248                 return -ENOENT;
1249         }
1250         old_sched = sched;
1251
1252         if (u->pe_name && *u->pe_name) {
1253                 pe = ip_vs_pe_getbyname(u->pe_name);
1254                 if (pe == NULL) {
1255                         pr_info("persistence engine module ip_vs_pe_%s "
1256                                 "not found\n", u->pe_name);
1257                         ret = -ENOENT;
1258                         goto out;
1259                 }
1260                 old_pe = pe;
1261         }
1262
1263 #ifdef CONFIG_IP_VS_IPV6
1264         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1265                 ret = -EINVAL;
1266                 goto out;
1267         }
1268 #endif
1269
1270         write_lock_bh(&__ip_vs_svc_lock);
1271
1272         /*
1273          * Wait until all other svc users go away.
1274          */
1275         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1276
1277         /*
1278          * Set the flags and timeout value
1279          */
1280         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1281         svc->timeout = u->timeout * HZ;
1282         svc->netmask = u->netmask;
1283
1284         old_sched = svc->scheduler;
1285         if (sched != old_sched) {
1286                 /*
1287                  * Unbind the old scheduler
1288                  */
1289                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1290                         old_sched = sched;
1291                         goto out_unlock;
1292                 }
1293
1294                 /*
1295                  * Bind the new scheduler
1296                  */
1297                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1298                         /*
1299                          * If ip_vs_bind_scheduler fails, restore the old
1300                          * scheduler.
1301                          * The main reason of failure is out of memory.
1302                          *
1303                          * The question is if the old scheduler can be
1304                          * restored all the time. TODO: if it cannot be
1305                          * restored some time, we must delete the service,
1306                          * otherwise the system may crash.
1307                          */
1308                         ip_vs_bind_scheduler(svc, old_sched);
1309                         old_sched = sched;
1310                         goto out_unlock;
1311                 }
1312         }
1313
1314         old_pe = svc->pe;
1315         if (pe != old_pe) {
1316                 ip_vs_unbind_pe(svc);
1317                 ip_vs_bind_pe(svc, pe);
1318         }
1319
1320   out_unlock:
1321         write_unlock_bh(&__ip_vs_svc_lock);
1322   out:
1323         ip_vs_scheduler_put(old_sched);
1324         ip_vs_pe_put(old_pe);
1325         return ret;
1326 }
1327
1328
1329 /*
1330  *      Delete a service from the service list
1331  *      - The service must be unlinked, unlocked and not referenced!
1332  *      - We are called under _bh lock
1333  */
1334 static void __ip_vs_del_service(struct ip_vs_service *svc)
1335 {
1336         struct ip_vs_dest *dest, *nxt;
1337         struct ip_vs_scheduler *old_sched;
1338         struct ip_vs_pe *old_pe;
1339
1340         pr_info("%s: enter\n", __func__);
1341
1342         /* Count only IPv4 services for old get/setsockopt interface */
1343         if (svc->af == AF_INET)
1344                 ip_vs_num_services--;
1345
1346         ip_vs_kill_estimator(&svc->stats);
1347
1348         /* Unbind scheduler */
1349         old_sched = svc->scheduler;
1350         ip_vs_unbind_scheduler(svc);
1351         ip_vs_scheduler_put(old_sched);
1352
1353         /* Unbind persistence engine */
1354         old_pe = svc->pe;
1355         ip_vs_unbind_pe(svc);
1356         ip_vs_pe_put(old_pe);
1357
1358         /* Unbind app inc */
1359         if (svc->inc) {
1360                 ip_vs_app_inc_put(svc->inc);
1361                 svc->inc = NULL;
1362         }
1363
1364         /*
1365          *    Unlink the whole destination list
1366          */
1367         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1368                 __ip_vs_unlink_dest(svc, dest, 0);
1369                 __ip_vs_del_dest(dest);
1370         }
1371
1372         /*
1373          *    Update the virtual service counters
1374          */
1375         if (svc->port == FTPPORT)
1376                 atomic_dec(&ip_vs_ftpsvc_counter);
1377         else if (svc->port == 0)
1378                 atomic_dec(&ip_vs_nullsvc_counter);
1379
1380         /*
1381          *    Free the service if nobody refers to it
1382          */
1383         if (atomic_read(&svc->refcnt) == 0) {
1384                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1385                               svc->fwmark,
1386                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387                               ntohs(svc->port), atomic_read(&svc->usecnt));
1388                 kfree(svc);
1389         }
1390
1391         /* decrease the module use count */
1392         ip_vs_use_count_dec();
1393 }
1394
1395 /*
1396  * Unlink a service from list and try to delete it if its refcnt reached 0
1397  */
1398 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1399 {
1400         /*
1401          * Unhash it from the service table
1402          */
1403         write_lock_bh(&__ip_vs_svc_lock);
1404
1405         ip_vs_svc_unhash(svc);
1406
1407         /*
1408          * Wait until all the svc users go away.
1409          */
1410         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1411
1412         __ip_vs_del_service(svc);
1413
1414         write_unlock_bh(&__ip_vs_svc_lock);
1415 }
1416
1417 /*
1418  *      Delete a service from the service list
1419  */
1420 static int ip_vs_del_service(struct ip_vs_service *svc)
1421 {
1422         if (svc == NULL)
1423                 return -EEXIST;
1424         ip_vs_unlink_service(svc);
1425
1426         return 0;
1427 }
1428
1429
1430 /*
1431  *      Flush all the virtual services
1432  */
1433 static int ip_vs_flush(void)
1434 {
1435         int idx;
1436         struct ip_vs_service *svc, *nxt;
1437
1438         /*
1439          * Flush the service table hashed by <protocol,addr,port>
1440          */
1441         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1442                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1443                         ip_vs_unlink_service(svc);
1444                 }
1445         }
1446
1447         /*
1448          * Flush the service table hashed by fwmark
1449          */
1450         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1451                 list_for_each_entry_safe(svc, nxt,
1452                                          &ip_vs_svc_fwm_table[idx], f_list) {
1453                         ip_vs_unlink_service(svc);
1454                 }
1455         }
1456
1457         return 0;
1458 }
1459
1460
1461 /*
1462  *      Zero counters in a service or all services
1463  */
1464 static int ip_vs_zero_service(struct ip_vs_service *svc)
1465 {
1466         struct ip_vs_dest *dest;
1467
1468         write_lock_bh(&__ip_vs_svc_lock);
1469         list_for_each_entry(dest, &svc->destinations, n_list) {
1470                 ip_vs_zero_stats(&dest->stats);
1471         }
1472         ip_vs_zero_stats(&svc->stats);
1473         write_unlock_bh(&__ip_vs_svc_lock);
1474         return 0;
1475 }
1476
1477 static int ip_vs_zero_all(void)
1478 {
1479         int idx;
1480         struct ip_vs_service *svc;
1481
1482         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1484                         ip_vs_zero_service(svc);
1485                 }
1486         }
1487
1488         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1490                         ip_vs_zero_service(svc);
1491                 }
1492         }
1493
1494         ip_vs_zero_stats(&ip_vs_stats);
1495         return 0;
1496 }
1497
1498
1499 static int
1500 proc_do_defense_mode(ctl_table *table, int write,
1501                      void __user *buffer, size_t *lenp, loff_t *ppos)
1502 {
1503         int *valp = table->data;
1504         int val = *valp;
1505         int rc;
1506
1507         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1508         if (write && (*valp != val)) {
1509                 if ((*valp < 0) || (*valp > 3)) {
1510                         /* Restore the correct value */
1511                         *valp = val;
1512                 } else {
1513                         update_defense_level();
1514                 }
1515         }
1516         return rc;
1517 }
1518
1519
1520 static int
1521 proc_do_sync_threshold(ctl_table *table, int write,
1522                        void __user *buffer, size_t *lenp, loff_t *ppos)
1523 {
1524         int *valp = table->data;
1525         int val[2];
1526         int rc;
1527
1528         /* backup the value first */
1529         memcpy(val, valp, sizeof(val));
1530
1531         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1532         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1533                 /* Restore the correct value */
1534                 memcpy(valp, val, sizeof(val));
1535         }
1536         return rc;
1537 }
1538
1539 static int
1540 proc_do_sync_mode(ctl_table *table, int write,
1541                      void __user *buffer, size_t *lenp, loff_t *ppos)
1542 {
1543         int *valp = table->data;
1544         int val = *valp;
1545         int rc;
1546
1547         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1548         if (write && (*valp != val)) {
1549                 if ((*valp < 0) || (*valp > 1)) {
1550                         /* Restore the correct value */
1551                         *valp = val;
1552                 } else {
1553                         ip_vs_sync_switch_mode(val);
1554                 }
1555         }
1556         return rc;
1557 }
1558
1559 /*
1560  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1561  */
1562
1563 static struct ctl_table vs_vars[] = {
1564         {
1565                 .procname       = "amemthresh",
1566                 .data           = &sysctl_ip_vs_amemthresh,
1567                 .maxlen         = sizeof(int),
1568                 .mode           = 0644,
1569                 .proc_handler   = proc_dointvec,
1570         },
1571 #ifdef CONFIG_IP_VS_DEBUG
1572         {
1573                 .procname       = "debug_level",
1574                 .data           = &sysctl_ip_vs_debug_level,
1575                 .maxlen         = sizeof(int),
1576                 .mode           = 0644,
1577                 .proc_handler   = proc_dointvec,
1578         },
1579 #endif
1580         {
1581                 .procname       = "am_droprate",
1582                 .data           = &sysctl_ip_vs_am_droprate,
1583                 .maxlen         = sizeof(int),
1584                 .mode           = 0644,
1585                 .proc_handler   = proc_dointvec,
1586         },
1587         {
1588                 .procname       = "drop_entry",
1589                 .data           = &sysctl_ip_vs_drop_entry,
1590                 .maxlen         = sizeof(int),
1591                 .mode           = 0644,
1592                 .proc_handler   = proc_do_defense_mode,
1593         },
1594         {
1595                 .procname       = "drop_packet",
1596                 .data           = &sysctl_ip_vs_drop_packet,
1597                 .maxlen         = sizeof(int),
1598                 .mode           = 0644,
1599                 .proc_handler   = proc_do_defense_mode,
1600         },
1601 #ifdef CONFIG_IP_VS_NFCT
1602         {
1603                 .procname       = "conntrack",
1604                 .data           = &sysctl_ip_vs_conntrack,
1605                 .maxlen         = sizeof(int),
1606                 .mode           = 0644,
1607                 .proc_handler   = &proc_dointvec,
1608         },
1609 #endif
1610         {
1611                 .procname       = "secure_tcp",
1612                 .data           = &sysctl_ip_vs_secure_tcp,
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = proc_do_defense_mode,
1616         },
1617         {
1618                 .procname       = "snat_reroute",
1619                 .data           = &sysctl_ip_vs_snat_reroute,
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = &proc_dointvec,
1623         },
1624         {
1625                 .procname       = "sync_version",
1626                 .data           = &sysctl_ip_vs_sync_ver,
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = &proc_do_sync_mode,
1630         },
1631 #if 0
1632         {
1633                 .procname       = "timeout_established",
1634                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1635                 .maxlen         = sizeof(int),
1636                 .mode           = 0644,
1637                 .proc_handler   = proc_dointvec_jiffies,
1638         },
1639         {
1640                 .procname       = "timeout_synsent",
1641                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1642                 .maxlen         = sizeof(int),
1643                 .mode           = 0644,
1644                 .proc_handler   = proc_dointvec_jiffies,
1645         },
1646         {
1647                 .procname       = "timeout_synrecv",
1648                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1649                 .maxlen         = sizeof(int),
1650                 .mode           = 0644,
1651                 .proc_handler   = proc_dointvec_jiffies,
1652         },
1653         {
1654                 .procname       = "timeout_finwait",
1655                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1656                 .maxlen         = sizeof(int),
1657                 .mode           = 0644,
1658                 .proc_handler   = proc_dointvec_jiffies,
1659         },
1660         {
1661                 .procname       = "timeout_timewait",
1662                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1663                 .maxlen         = sizeof(int),
1664                 .mode           = 0644,
1665                 .proc_handler   = proc_dointvec_jiffies,
1666         },
1667         {
1668                 .procname       = "timeout_close",
1669                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1670                 .maxlen         = sizeof(int),
1671                 .mode           = 0644,
1672                 .proc_handler   = proc_dointvec_jiffies,
1673         },
1674         {
1675                 .procname       = "timeout_closewait",
1676                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1677                 .maxlen         = sizeof(int),
1678                 .mode           = 0644,
1679                 .proc_handler   = proc_dointvec_jiffies,
1680         },
1681         {
1682                 .procname       = "timeout_lastack",
1683                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1684                 .maxlen         = sizeof(int),
1685                 .mode           = 0644,
1686                 .proc_handler   = proc_dointvec_jiffies,
1687         },
1688         {
1689                 .procname       = "timeout_listen",
1690                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = proc_dointvec_jiffies,
1694         },
1695         {
1696                 .procname       = "timeout_synack",
1697                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1698                 .maxlen         = sizeof(int),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_dointvec_jiffies,
1701         },
1702         {
1703                 .procname       = "timeout_udp",
1704                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1705                 .maxlen         = sizeof(int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec_jiffies,
1708         },
1709         {
1710                 .procname       = "timeout_icmp",
1711                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1712                 .maxlen         = sizeof(int),
1713                 .mode           = 0644,
1714                 .proc_handler   = proc_dointvec_jiffies,
1715         },
1716 #endif
1717         {
1718                 .procname       = "cache_bypass",
1719                 .data           = &sysctl_ip_vs_cache_bypass,
1720                 .maxlen         = sizeof(int),
1721                 .mode           = 0644,
1722                 .proc_handler   = proc_dointvec,
1723         },
1724         {
1725                 .procname       = "expire_nodest_conn",
1726                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1727                 .maxlen         = sizeof(int),
1728                 .mode           = 0644,
1729                 .proc_handler   = proc_dointvec,
1730         },
1731         {
1732                 .procname       = "expire_quiescent_template",
1733                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1734                 .maxlen         = sizeof(int),
1735                 .mode           = 0644,
1736                 .proc_handler   = proc_dointvec,
1737         },
1738         {
1739                 .procname       = "sync_threshold",
1740                 .data           = &sysctl_ip_vs_sync_threshold,
1741                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1742                 .mode           = 0644,
1743                 .proc_handler   = proc_do_sync_threshold,
1744         },
1745         {
1746                 .procname       = "nat_icmp_send",
1747                 .data           = &sysctl_ip_vs_nat_icmp_send,
1748                 .maxlen         = sizeof(int),
1749                 .mode           = 0644,
1750                 .proc_handler   = proc_dointvec,
1751         },
1752         { }
1753 };
1754
1755 const struct ctl_path net_vs_ctl_path[] = {
1756         { .procname = "net", },
1757         { .procname = "ipv4", },
1758         { .procname = "vs", },
1759         { }
1760 };
1761 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1762
1763 static struct ctl_table_header * sysctl_header;
1764
1765 #ifdef CONFIG_PROC_FS
1766
1767 struct ip_vs_iter {
1768         struct list_head *table;
1769         int bucket;
1770 };
1771
1772 /*
1773  *      Write the contents of the VS rule table to a PROCfs file.
1774  *      (It is kept just for backward compatibility)
1775  */
1776 static inline const char *ip_vs_fwd_name(unsigned flags)
1777 {
1778         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1779         case IP_VS_CONN_F_LOCALNODE:
1780                 return "Local";
1781         case IP_VS_CONN_F_TUNNEL:
1782                 return "Tunnel";
1783         case IP_VS_CONN_F_DROUTE:
1784                 return "Route";
1785         default:
1786                 return "Masq";
1787         }
1788 }
1789
1790
1791 /* Get the Nth entry in the two lists */
1792 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1793 {
1794         struct ip_vs_iter *iter = seq->private;
1795         int idx;
1796         struct ip_vs_service *svc;
1797
1798         /* look in hash by protocol */
1799         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1800                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1801                         if (pos-- == 0){
1802                                 iter->table = ip_vs_svc_table;
1803                                 iter->bucket = idx;
1804                                 return svc;
1805                         }
1806                 }
1807         }
1808
1809         /* keep looking in fwmark */
1810         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1811                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1812                         if (pos-- == 0) {
1813                                 iter->table = ip_vs_svc_fwm_table;
1814                                 iter->bucket = idx;
1815                                 return svc;
1816                         }
1817                 }
1818         }
1819
1820         return NULL;
1821 }
1822
1823 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1824 __acquires(__ip_vs_svc_lock)
1825 {
1826
1827         read_lock_bh(&__ip_vs_svc_lock);
1828         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1829 }
1830
1831
1832 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1833 {
1834         struct list_head *e;
1835         struct ip_vs_iter *iter;
1836         struct ip_vs_service *svc;
1837
1838         ++*pos;
1839         if (v == SEQ_START_TOKEN)
1840                 return ip_vs_info_array(seq,0);
1841
1842         svc = v;
1843         iter = seq->private;
1844
1845         if (iter->table == ip_vs_svc_table) {
1846                 /* next service in table hashed by protocol */
1847                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1848                         return list_entry(e, struct ip_vs_service, s_list);
1849
1850
1851                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1852                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1853                                             s_list) {
1854                                 return svc;
1855                         }
1856                 }
1857
1858                 iter->table = ip_vs_svc_fwm_table;
1859                 iter->bucket = -1;
1860                 goto scan_fwmark;
1861         }
1862
1863         /* next service in hashed by fwmark */
1864         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1865                 return list_entry(e, struct ip_vs_service, f_list);
1866
1867  scan_fwmark:
1868         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1869                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1870                                     f_list)
1871                         return svc;
1872         }
1873
1874         return NULL;
1875 }
1876
1877 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1878 __releases(__ip_vs_svc_lock)
1879 {
1880         read_unlock_bh(&__ip_vs_svc_lock);
1881 }
1882
1883
1884 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1885 {
1886         if (v == SEQ_START_TOKEN) {
1887                 seq_printf(seq,
1888                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1889                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1890                 seq_puts(seq,
1891                          "Prot LocalAddress:Port Scheduler Flags\n");
1892                 seq_puts(seq,
1893                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1894         } else {
1895                 const struct ip_vs_service *svc = v;
1896                 const struct ip_vs_iter *iter = seq->private;
1897                 const struct ip_vs_dest *dest;
1898
1899                 if (iter->table == ip_vs_svc_table) {
1900 #ifdef CONFIG_IP_VS_IPV6
1901                         if (svc->af == AF_INET6)
1902                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1903                                            ip_vs_proto_name(svc->protocol),
1904                                            &svc->addr.in6,
1905                                            ntohs(svc->port),
1906                                            svc->scheduler->name);
1907                         else
1908 #endif
1909                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1910                                            ip_vs_proto_name(svc->protocol),
1911                                            ntohl(svc->addr.ip),
1912                                            ntohs(svc->port),
1913                                            svc->scheduler->name,
1914                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1915                 } else {
1916                         seq_printf(seq, "FWM  %08X %s %s",
1917                                    svc->fwmark, svc->scheduler->name,
1918                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1919                 }
1920
1921                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1922                         seq_printf(seq, "persistent %d %08X\n",
1923                                 svc->timeout,
1924                                 ntohl(svc->netmask));
1925                 else
1926                         seq_putc(seq, '\n');
1927
1928                 list_for_each_entry(dest, &svc->destinations, n_list) {
1929 #ifdef CONFIG_IP_VS_IPV6
1930                         if (dest->af == AF_INET6)
1931                                 seq_printf(seq,
1932                                            "  -> [%pI6]:%04X"
1933                                            "      %-7s %-6d %-10d %-10d\n",
1934                                            &dest->addr.in6,
1935                                            ntohs(dest->port),
1936                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1937                                            atomic_read(&dest->weight),
1938                                            atomic_read(&dest->activeconns),
1939                                            atomic_read(&dest->inactconns));
1940                         else
1941 #endif
1942                                 seq_printf(seq,
1943                                            "  -> %08X:%04X      "
1944                                            "%-7s %-6d %-10d %-10d\n",
1945                                            ntohl(dest->addr.ip),
1946                                            ntohs(dest->port),
1947                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1948                                            atomic_read(&dest->weight),
1949                                            atomic_read(&dest->activeconns),
1950                                            atomic_read(&dest->inactconns));
1951
1952                 }
1953         }
1954         return 0;
1955 }
1956
1957 static const struct seq_operations ip_vs_info_seq_ops = {
1958         .start = ip_vs_info_seq_start,
1959         .next  = ip_vs_info_seq_next,
1960         .stop  = ip_vs_info_seq_stop,
1961         .show  = ip_vs_info_seq_show,
1962 };
1963
1964 static int ip_vs_info_open(struct inode *inode, struct file *file)
1965 {
1966         return seq_open_private(file, &ip_vs_info_seq_ops,
1967                         sizeof(struct ip_vs_iter));
1968 }
1969
1970 static const struct file_operations ip_vs_info_fops = {
1971         .owner   = THIS_MODULE,
1972         .open    = ip_vs_info_open,
1973         .read    = seq_read,
1974         .llseek  = seq_lseek,
1975         .release = seq_release_private,
1976 };
1977
1978 #endif
1979
1980 struct ip_vs_stats ip_vs_stats = {
1981         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1982 };
1983
1984 #ifdef CONFIG_PROC_FS
1985 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1986 {
1987
1988 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1989         seq_puts(seq,
1990                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1991         seq_printf(seq,
1992                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1993
1994         spin_lock_bh(&ip_vs_stats.lock);
1995         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1996                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1997                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1998                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1999
2000 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2001         seq_puts(seq,
2002                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2003         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
2004                         ip_vs_stats.ustats.cps,
2005                         ip_vs_stats.ustats.inpps,
2006                         ip_vs_stats.ustats.outpps,
2007                         ip_vs_stats.ustats.inbps,
2008                         ip_vs_stats.ustats.outbps);
2009         spin_unlock_bh(&ip_vs_stats.lock);
2010
2011         return 0;
2012 }
2013
2014 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2015 {
2016         return single_open(file, ip_vs_stats_show, NULL);
2017 }
2018
2019 static const struct file_operations ip_vs_stats_fops = {
2020         .owner = THIS_MODULE,
2021         .open = ip_vs_stats_seq_open,
2022         .read = seq_read,
2023         .llseek = seq_lseek,
2024         .release = single_release,
2025 };
2026
2027 #endif
2028
2029 /*
2030  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2031  */
2032 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2033 {
2034         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2035                   u->tcp_timeout,
2036                   u->tcp_fin_timeout,
2037                   u->udp_timeout);
2038
2039 #ifdef CONFIG_IP_VS_PROTO_TCP
2040         if (u->tcp_timeout) {
2041                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2042                         = u->tcp_timeout * HZ;
2043         }
2044
2045         if (u->tcp_fin_timeout) {
2046                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2047                         = u->tcp_fin_timeout * HZ;
2048         }
2049 #endif
2050
2051 #ifdef CONFIG_IP_VS_PROTO_UDP
2052         if (u->udp_timeout) {
2053                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2054                         = u->udp_timeout * HZ;
2055         }
2056 #endif
2057         return 0;
2058 }
2059
2060
2061 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2062 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2063 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2064                                  sizeof(struct ip_vs_dest_user))
2065 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2066 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2067 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2068
2069 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2070         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2071         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2072         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2073         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2074         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2075         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2076         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2077         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2078         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2079         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2080         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2081 };
2082
2083 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2084                                   struct ip_vs_service_user *usvc_compat)
2085 {
2086         memset(usvc, 0, sizeof(*usvc));
2087
2088         usvc->af                = AF_INET;
2089         usvc->protocol          = usvc_compat->protocol;
2090         usvc->addr.ip           = usvc_compat->addr;
2091         usvc->port              = usvc_compat->port;
2092         usvc->fwmark            = usvc_compat->fwmark;
2093
2094         /* Deep copy of sched_name is not needed here */
2095         usvc->sched_name        = usvc_compat->sched_name;
2096
2097         usvc->flags             = usvc_compat->flags;
2098         usvc->timeout           = usvc_compat->timeout;
2099         usvc->netmask           = usvc_compat->netmask;
2100 }
2101
2102 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2103                                    struct ip_vs_dest_user *udest_compat)
2104 {
2105         memset(udest, 0, sizeof(*udest));
2106
2107         udest->addr.ip          = udest_compat->addr;
2108         udest->port             = udest_compat->port;
2109         udest->conn_flags       = udest_compat->conn_flags;
2110         udest->weight           = udest_compat->weight;
2111         udest->u_threshold      = udest_compat->u_threshold;
2112         udest->l_threshold      = udest_compat->l_threshold;
2113 }
2114
2115 static int
2116 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2117 {
2118         int ret;
2119         unsigned char arg[MAX_ARG_LEN];
2120         struct ip_vs_service_user *usvc_compat;
2121         struct ip_vs_service_user_kern usvc;
2122         struct ip_vs_service *svc;
2123         struct ip_vs_dest_user *udest_compat;
2124         struct ip_vs_dest_user_kern udest;
2125
2126         if (!capable(CAP_NET_ADMIN))
2127                 return -EPERM;
2128
2129         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2130                 return -EINVAL;
2131         if (len < 0 || len >  MAX_ARG_LEN)
2132                 return -EINVAL;
2133         if (len != set_arglen[SET_CMDID(cmd)]) {
2134                 pr_err("set_ctl: len %u != %u\n",
2135                        len, set_arglen[SET_CMDID(cmd)]);
2136                 return -EINVAL;
2137         }
2138
2139         if (copy_from_user(arg, user, len) != 0)
2140                 return -EFAULT;
2141
2142         /* increase the module use count */
2143         ip_vs_use_count_inc();
2144
2145         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2146                 ret = -ERESTARTSYS;
2147                 goto out_dec;
2148         }
2149
2150         if (cmd == IP_VS_SO_SET_FLUSH) {
2151                 /* Flush the virtual service */
2152                 ret = ip_vs_flush();
2153                 goto out_unlock;
2154         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2155                 /* Set timeout values for (tcp tcpfin udp) */
2156                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2157                 goto out_unlock;
2158         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2159                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2160                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2161                 goto out_unlock;
2162         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2163                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2164                 ret = stop_sync_thread(dm->state);
2165                 goto out_unlock;
2166         }
2167
2168         usvc_compat = (struct ip_vs_service_user *)arg;
2169         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2170
2171         /* We only use the new structs internally, so copy userspace compat
2172          * structs to extended internal versions */
2173         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2174         ip_vs_copy_udest_compat(&udest, udest_compat);
2175
2176         if (cmd == IP_VS_SO_SET_ZERO) {
2177                 /* if no service address is set, zero counters in all */
2178                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2179                         ret = ip_vs_zero_all();
2180                         goto out_unlock;
2181                 }
2182         }
2183
2184         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2185         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2186             usvc.protocol != IPPROTO_SCTP) {
2187                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2188                        usvc.protocol, &usvc.addr.ip,
2189                        ntohs(usvc.port), usvc.sched_name);
2190                 ret = -EFAULT;
2191                 goto out_unlock;
2192         }
2193
2194         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2195         if (usvc.fwmark == 0)
2196                 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2197                                            &usvc.addr, usvc.port);
2198         else
2199                 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2200
2201         if (cmd != IP_VS_SO_SET_ADD
2202             && (svc == NULL || svc->protocol != usvc.protocol)) {
2203                 ret = -ESRCH;
2204                 goto out_unlock;
2205         }
2206
2207         switch (cmd) {
2208         case IP_VS_SO_SET_ADD:
2209                 if (svc != NULL)
2210                         ret = -EEXIST;
2211                 else
2212                         ret = ip_vs_add_service(&usvc, &svc);
2213                 break;
2214         case IP_VS_SO_SET_EDIT:
2215                 ret = ip_vs_edit_service(svc, &usvc);
2216                 break;
2217         case IP_VS_SO_SET_DEL:
2218                 ret = ip_vs_del_service(svc);
2219                 if (!ret)
2220                         goto out_unlock;
2221                 break;
2222         case IP_VS_SO_SET_ZERO:
2223                 ret = ip_vs_zero_service(svc);
2224                 break;
2225         case IP_VS_SO_SET_ADDDEST:
2226                 ret = ip_vs_add_dest(svc, &udest);
2227                 break;
2228         case IP_VS_SO_SET_EDITDEST:
2229                 ret = ip_vs_edit_dest(svc, &udest);
2230                 break;
2231         case IP_VS_SO_SET_DELDEST:
2232                 ret = ip_vs_del_dest(svc, &udest);
2233                 break;
2234         default:
2235                 ret = -EINVAL;
2236         }
2237
2238   out_unlock:
2239         mutex_unlock(&__ip_vs_mutex);
2240   out_dec:
2241         /* decrease the module use count */
2242         ip_vs_use_count_dec();
2243
2244         return ret;
2245 }
2246
2247
2248 static void
2249 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2250 {
2251         spin_lock_bh(&src->lock);
2252         memcpy(dst, &src->ustats, sizeof(*dst));
2253         spin_unlock_bh(&src->lock);
2254 }
2255
2256 static void
2257 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2258 {
2259         dst->protocol = src->protocol;
2260         dst->addr = src->addr.ip;
2261         dst->port = src->port;
2262         dst->fwmark = src->fwmark;
2263         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2264         dst->flags = src->flags;
2265         dst->timeout = src->timeout / HZ;
2266         dst->netmask = src->netmask;
2267         dst->num_dests = src->num_dests;
2268         ip_vs_copy_stats(&dst->stats, &src->stats);
2269 }
2270
2271 static inline int
2272 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2273                             struct ip_vs_get_services __user *uptr)
2274 {
2275         int idx, count=0;
2276         struct ip_vs_service *svc;
2277         struct ip_vs_service_entry entry;
2278         int ret = 0;
2279
2280         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2281                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2282                         /* Only expose IPv4 entries to old interface */
2283                         if (svc->af != AF_INET)
2284                                 continue;
2285
2286                         if (count >= get->num_services)
2287                                 goto out;
2288                         memset(&entry, 0, sizeof(entry));
2289                         ip_vs_copy_service(&entry, svc);
2290                         if (copy_to_user(&uptr->entrytable[count],
2291                                          &entry, sizeof(entry))) {
2292                                 ret = -EFAULT;
2293                                 goto out;
2294                         }
2295                         count++;
2296                 }
2297         }
2298
2299         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2300                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2301                         /* Only expose IPv4 entries to old interface */
2302                         if (svc->af != AF_INET)
2303                                 continue;
2304
2305                         if (count >= get->num_services)
2306                                 goto out;
2307                         memset(&entry, 0, sizeof(entry));
2308                         ip_vs_copy_service(&entry, svc);
2309                         if (copy_to_user(&uptr->entrytable[count],
2310                                          &entry, sizeof(entry))) {
2311                                 ret = -EFAULT;
2312                                 goto out;
2313                         }
2314                         count++;
2315                 }
2316         }
2317   out:
2318         return ret;
2319 }
2320
2321 static inline int
2322 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2323                          struct ip_vs_get_dests __user *uptr)
2324 {
2325         struct ip_vs_service *svc;
2326         union nf_inet_addr addr = { .ip = get->addr };
2327         int ret = 0;
2328
2329         if (get->fwmark)
2330                 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2331         else
2332                 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2333                                            get->port);
2334
2335         if (svc) {
2336                 int count = 0;
2337                 struct ip_vs_dest *dest;
2338                 struct ip_vs_dest_entry entry;
2339
2340                 list_for_each_entry(dest, &svc->destinations, n_list) {
2341                         if (count >= get->num_dests)
2342                                 break;
2343
2344                         entry.addr = dest->addr.ip;
2345                         entry.port = dest->port;
2346                         entry.conn_flags = atomic_read(&dest->conn_flags);
2347                         entry.weight = atomic_read(&dest->weight);
2348                         entry.u_threshold = dest->u_threshold;
2349                         entry.l_threshold = dest->l_threshold;
2350                         entry.activeconns = atomic_read(&dest->activeconns);
2351                         entry.inactconns = atomic_read(&dest->inactconns);
2352                         entry.persistconns = atomic_read(&dest->persistconns);
2353                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2354                         if (copy_to_user(&uptr->entrytable[count],
2355                                          &entry, sizeof(entry))) {
2356                                 ret = -EFAULT;
2357                                 break;
2358                         }
2359                         count++;
2360                 }
2361         } else
2362                 ret = -ESRCH;
2363         return ret;
2364 }
2365
2366 static inline void
2367 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2368 {
2369 #ifdef CONFIG_IP_VS_PROTO_TCP
2370         u->tcp_timeout =
2371                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2372         u->tcp_fin_timeout =
2373                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2374 #endif
2375 #ifdef CONFIG_IP_VS_PROTO_UDP
2376         u->udp_timeout =
2377                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2378 #endif
2379 }
2380
2381
2382 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2383 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2384 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2385 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2386 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2387 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2388 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2389
2390 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2391         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2392         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2393         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2394         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2395         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2396         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2397         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2398 };
2399
2400 static int
2401 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2402 {
2403         unsigned char arg[128];
2404         int ret = 0;
2405         unsigned int copylen;
2406
2407         if (!capable(CAP_NET_ADMIN))
2408                 return -EPERM;
2409
2410         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2411                 return -EINVAL;
2412
2413         if (*len < get_arglen[GET_CMDID(cmd)]) {
2414                 pr_err("get_ctl: len %u < %u\n",
2415                        *len, get_arglen[GET_CMDID(cmd)]);
2416                 return -EINVAL;
2417         }
2418
2419         copylen = get_arglen[GET_CMDID(cmd)];
2420         if (copylen > 128)
2421                 return -EINVAL;
2422
2423         if (copy_from_user(arg, user, copylen) != 0)
2424                 return -EFAULT;
2425
2426         if (mutex_lock_interruptible(&__ip_vs_mutex))
2427                 return -ERESTARTSYS;
2428
2429         switch (cmd) {
2430         case IP_VS_SO_GET_VERSION:
2431         {
2432                 char buf[64];
2433
2434                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2435                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2436                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2437                         ret = -EFAULT;
2438                         goto out;
2439                 }
2440                 *len = strlen(buf)+1;
2441         }
2442         break;
2443
2444         case IP_VS_SO_GET_INFO:
2445         {
2446                 struct ip_vs_getinfo info;
2447                 info.version = IP_VS_VERSION_CODE;
2448                 info.size = ip_vs_conn_tab_size;
2449                 info.num_services = ip_vs_num_services;
2450                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2451                         ret = -EFAULT;
2452         }
2453         break;
2454
2455         case IP_VS_SO_GET_SERVICES:
2456         {
2457                 struct ip_vs_get_services *get;
2458                 int size;
2459
2460                 get = (struct ip_vs_get_services *)arg;
2461                 size = sizeof(*get) +
2462                         sizeof(struct ip_vs_service_entry) * get->num_services;
2463                 if (*len != size) {
2464                         pr_err("length: %u != %u\n", *len, size);
2465                         ret = -EINVAL;
2466                         goto out;
2467                 }
2468                 ret = __ip_vs_get_service_entries(get, user);
2469         }
2470         break;
2471
2472         case IP_VS_SO_GET_SERVICE:
2473         {
2474                 struct ip_vs_service_entry *entry;
2475                 struct ip_vs_service *svc;
2476                 union nf_inet_addr addr;
2477
2478                 entry = (struct ip_vs_service_entry *)arg;
2479                 addr.ip = entry->addr;
2480                 if (entry->fwmark)
2481                         svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2482                 else
2483                         svc = __ip_vs_service_find(AF_INET, entry->protocol,
2484                                                    &addr, entry->port);
2485                 if (svc) {
2486                         ip_vs_copy_service(entry, svc);
2487                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2488                                 ret = -EFAULT;
2489                 } else
2490                         ret = -ESRCH;
2491         }
2492         break;
2493
2494         case IP_VS_SO_GET_DESTS:
2495         {
2496                 struct ip_vs_get_dests *get;
2497                 int size;
2498
2499                 get = (struct ip_vs_get_dests *)arg;
2500                 size = sizeof(*get) +
2501                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2502                 if (*len != size) {
2503                         pr_err("length: %u != %u\n", *len, size);
2504                         ret = -EINVAL;
2505                         goto out;
2506                 }
2507                 ret = __ip_vs_get_dest_entries(get, user);
2508         }
2509         break;
2510
2511         case IP_VS_SO_GET_TIMEOUT:
2512         {
2513                 struct ip_vs_timeout_user t;
2514
2515                 __ip_vs_get_timeouts(&t);
2516                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2517                         ret = -EFAULT;
2518         }
2519         break;
2520
2521         case IP_VS_SO_GET_DAEMON:
2522         {
2523                 struct ip_vs_daemon_user d[2];
2524
2525                 memset(&d, 0, sizeof(d));
2526                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2527                         d[0].state = IP_VS_STATE_MASTER;
2528                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2529                         d[0].syncid = ip_vs_master_syncid;
2530                 }
2531                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2532                         d[1].state = IP_VS_STATE_BACKUP;
2533                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2534                         d[1].syncid = ip_vs_backup_syncid;
2535                 }
2536                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2537                         ret = -EFAULT;
2538         }
2539         break;
2540
2541         default:
2542                 ret = -EINVAL;
2543         }
2544
2545   out:
2546         mutex_unlock(&__ip_vs_mutex);
2547         return ret;
2548 }
2549
2550
2551 static struct nf_sockopt_ops ip_vs_sockopts = {
2552         .pf             = PF_INET,
2553         .set_optmin     = IP_VS_BASE_CTL,
2554         .set_optmax     = IP_VS_SO_SET_MAX+1,
2555         .set            = do_ip_vs_set_ctl,
2556         .get_optmin     = IP_VS_BASE_CTL,
2557         .get_optmax     = IP_VS_SO_GET_MAX+1,
2558         .get            = do_ip_vs_get_ctl,
2559         .owner          = THIS_MODULE,
2560 };
2561
2562 /*
2563  * Generic Netlink interface
2564  */
2565
2566 /* IPVS genetlink family */
2567 static struct genl_family ip_vs_genl_family = {
2568         .id             = GENL_ID_GENERATE,
2569         .hdrsize        = 0,
2570         .name           = IPVS_GENL_NAME,
2571         .version        = IPVS_GENL_VERSION,
2572         .maxattr        = IPVS_CMD_MAX,
2573 };
2574
2575 /* Policy used for first-level command attributes */
2576 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2577         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2578         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2579         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2580         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2581         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2582         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2583 };
2584
2585 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2586 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2587         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2588         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2589                                             .len = IP_VS_IFNAME_MAXLEN },
2590         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2591 };
2592
2593 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2594 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2595         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2596         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2597         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2598                                             .len = sizeof(union nf_inet_addr) },
2599         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2600         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2601         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2602                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2603         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2604                                             .len = IP_VS_PENAME_MAXLEN },
2605         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2606                                             .len = sizeof(struct ip_vs_flags) },
2607         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2608         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2609         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2610 };
2611
2612 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2613 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2614         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2615                                             .len = sizeof(union nf_inet_addr) },
2616         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2617         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2618         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2619         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2620         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2621         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2622         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2623         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2624         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2625 };
2626
2627 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2628                                  struct ip_vs_stats *stats)
2629 {
2630         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2631         if (!nl_stats)
2632                 return -EMSGSIZE;
2633
2634         spin_lock_bh(&stats->lock);
2635
2636         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2637         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2638         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2639         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2640         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2641         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2642         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2643         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2644         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2645         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2646
2647         spin_unlock_bh(&stats->lock);
2648
2649         nla_nest_end(skb, nl_stats);
2650
2651         return 0;
2652
2653 nla_put_failure:
2654         spin_unlock_bh(&stats->lock);
2655         nla_nest_cancel(skb, nl_stats);
2656         return -EMSGSIZE;
2657 }
2658
2659 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2660                                    struct ip_vs_service *svc)
2661 {
2662         struct nlattr *nl_service;
2663         struct ip_vs_flags flags = { .flags = svc->flags,
2664                                      .mask = ~0 };
2665
2666         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2667         if (!nl_service)
2668                 return -EMSGSIZE;
2669
2670         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2671
2672         if (svc->fwmark) {
2673                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2674         } else {
2675                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2676                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2677                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2678         }
2679
2680         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2681         if (svc->pe)
2682                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2683         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2684         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2685         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2686
2687         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2688                 goto nla_put_failure;
2689
2690         nla_nest_end(skb, nl_service);
2691
2692         return 0;
2693
2694 nla_put_failure:
2695         nla_nest_cancel(skb, nl_service);
2696         return -EMSGSIZE;
2697 }
2698
2699 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2700                                    struct ip_vs_service *svc,
2701                                    struct netlink_callback *cb)
2702 {
2703         void *hdr;
2704
2705         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2706                           &ip_vs_genl_family, NLM_F_MULTI,
2707                           IPVS_CMD_NEW_SERVICE);
2708         if (!hdr)
2709                 return -EMSGSIZE;
2710
2711         if (ip_vs_genl_fill_service(skb, svc) < 0)
2712                 goto nla_put_failure;
2713
2714         return genlmsg_end(skb, hdr);
2715
2716 nla_put_failure:
2717         genlmsg_cancel(skb, hdr);
2718         return -EMSGSIZE;
2719 }
2720
2721 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2722                                     struct netlink_callback *cb)
2723 {
2724         int idx = 0, i;
2725         int start = cb->args[0];
2726         struct ip_vs_service *svc;
2727
2728         mutex_lock(&__ip_vs_mutex);
2729         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2730                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2731                         if (++idx <= start)
2732                                 continue;
2733                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2734                                 idx--;
2735                                 goto nla_put_failure;
2736                         }
2737                 }
2738         }
2739
2740         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2741                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2742                         if (++idx <= start)
2743                                 continue;
2744                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2745                                 idx--;
2746                                 goto nla_put_failure;
2747                         }
2748                 }
2749         }
2750
2751 nla_put_failure:
2752         mutex_unlock(&__ip_vs_mutex);
2753         cb->args[0] = idx;
2754
2755         return skb->len;
2756 }
2757
2758 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2759                                     struct nlattr *nla, int full_entry,
2760                                     struct ip_vs_service **ret_svc)
2761 {
2762         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2763         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2764         struct ip_vs_service *svc;
2765
2766         /* Parse mandatory identifying service fields first */
2767         if (nla == NULL ||
2768             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2769                 return -EINVAL;
2770
2771         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2772         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2773         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2774         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2775         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2776
2777         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2778                 return -EINVAL;
2779
2780         memset(usvc, 0, sizeof(*usvc));
2781
2782         usvc->af = nla_get_u16(nla_af);
2783 #ifdef CONFIG_IP_VS_IPV6
2784         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2785 #else
2786         if (usvc->af != AF_INET)
2787 #endif
2788                 return -EAFNOSUPPORT;
2789
2790         if (nla_fwmark) {
2791                 usvc->protocol = IPPROTO_TCP;
2792                 usvc->fwmark = nla_get_u32(nla_fwmark);
2793         } else {
2794                 usvc->protocol = nla_get_u16(nla_protocol);
2795                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2796                 usvc->port = nla_get_u16(nla_port);
2797                 usvc->fwmark = 0;
2798         }
2799
2800         if (usvc->fwmark)
2801                 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2802         else
2803                 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2804                                            &usvc->addr, usvc->port);
2805         *ret_svc = svc;
2806
2807         /* If a full entry was requested, check for the additional fields */
2808         if (full_entry) {
2809                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2810                               *nla_netmask;
2811                 struct ip_vs_flags flags;
2812
2813                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2814                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2815                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2816                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2817                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2818
2819                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2820                         return -EINVAL;
2821
2822                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2823
2824                 /* prefill flags from service if it already exists */
2825                 if (svc)
2826                         usvc->flags = svc->flags;
2827
2828                 /* set new flags from userland */
2829                 usvc->flags = (usvc->flags & ~flags.mask) |
2830                               (flags.flags & flags.mask);
2831                 usvc->sched_name = nla_data(nla_sched);
2832                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2833                 usvc->timeout = nla_get_u32(nla_timeout);
2834                 usvc->netmask = nla_get_u32(nla_netmask);
2835         }
2836
2837         return 0;
2838 }
2839
2840 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2841 {
2842         struct ip_vs_service_user_kern usvc;
2843         struct ip_vs_service *svc;
2844         int ret;
2845
2846         ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2847         return ret ? ERR_PTR(ret) : svc;
2848 }
2849
2850 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2851 {
2852         struct nlattr *nl_dest;
2853
2854         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2855         if (!nl_dest)
2856                 return -EMSGSIZE;
2857
2858         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2859         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2860
2861         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2862                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2863         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2864         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2865         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2866         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2867                     atomic_read(&dest->activeconns));
2868         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2869                     atomic_read(&dest->inactconns));
2870         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2871                     atomic_read(&dest->persistconns));
2872
2873         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2874                 goto nla_put_failure;
2875
2876         nla_nest_end(skb, nl_dest);
2877
2878         return 0;
2879
2880 nla_put_failure:
2881         nla_nest_cancel(skb, nl_dest);
2882         return -EMSGSIZE;
2883 }
2884
2885 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2886                                 struct netlink_callback *cb)
2887 {
2888         void *hdr;
2889
2890         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2891                           &ip_vs_genl_family, NLM_F_MULTI,
2892                           IPVS_CMD_NEW_DEST);
2893         if (!hdr)
2894                 return -EMSGSIZE;
2895
2896         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2897                 goto nla_put_failure;
2898
2899         return genlmsg_end(skb, hdr);
2900
2901 nla_put_failure:
2902         genlmsg_cancel(skb, hdr);
2903         return -EMSGSIZE;
2904 }
2905
2906 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2907                                  struct netlink_callback *cb)
2908 {
2909         int idx = 0;
2910         int start = cb->args[0];
2911         struct ip_vs_service *svc;
2912         struct ip_vs_dest *dest;
2913         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2914
2915         mutex_lock(&__ip_vs_mutex);
2916
2917         /* Try to find the service for which to dump destinations */
2918         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2919                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2920                 goto out_err;
2921
2922         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2923         if (IS_ERR(svc) || svc == NULL)
2924                 goto out_err;
2925
2926         /* Dump the destinations */
2927         list_for_each_entry(dest, &svc->destinations, n_list) {
2928                 if (++idx <= start)
2929                         continue;
2930                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2931                         idx--;
2932                         goto nla_put_failure;
2933                 }
2934         }
2935
2936 nla_put_failure:
2937         cb->args[0] = idx;
2938
2939 out_err:
2940         mutex_unlock(&__ip_vs_mutex);
2941
2942         return skb->len;
2943 }
2944
2945 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2946                                  struct nlattr *nla, int full_entry)
2947 {
2948         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2949         struct nlattr *nla_addr, *nla_port;
2950
2951         /* Parse mandatory identifying destination fields first */
2952         if (nla == NULL ||
2953             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2954                 return -EINVAL;
2955
2956         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2957         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2958
2959         if (!(nla_addr && nla_port))
2960                 return -EINVAL;
2961
2962         memset(udest, 0, sizeof(*udest));
2963
2964         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2965         udest->port = nla_get_u16(nla_port);
2966
2967         /* If a full entry was requested, check for the additional fields */
2968         if (full_entry) {
2969                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2970                               *nla_l_thresh;
2971
2972                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2973                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2974                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2975                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2976
2977                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2978                         return -EINVAL;
2979
2980                 udest->conn_flags = nla_get_u32(nla_fwd)
2981                                     & IP_VS_CONN_F_FWD_MASK;
2982                 udest->weight = nla_get_u32(nla_weight);
2983                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2984                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2985         }
2986
2987         return 0;
2988 }
2989
2990 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2991                                   const char *mcast_ifn, __be32 syncid)
2992 {
2993         struct nlattr *nl_daemon;
2994
2995         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2996         if (!nl_daemon)
2997                 return -EMSGSIZE;
2998
2999         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3000         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3001         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3002
3003         nla_nest_end(skb, nl_daemon);
3004
3005         return 0;
3006
3007 nla_put_failure:
3008         nla_nest_cancel(skb, nl_daemon);
3009         return -EMSGSIZE;
3010 }
3011
3012 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3013                                   const char *mcast_ifn, __be32 syncid,
3014                                   struct netlink_callback *cb)
3015 {
3016         void *hdr;
3017         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3018                           &ip_vs_genl_family, NLM_F_MULTI,
3019                           IPVS_CMD_NEW_DAEMON);
3020         if (!hdr)
3021                 return -EMSGSIZE;
3022
3023         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3024                 goto nla_put_failure;
3025
3026         return genlmsg_end(skb, hdr);
3027
3028 nla_put_failure:
3029         genlmsg_cancel(skb, hdr);
3030         return -EMSGSIZE;
3031 }
3032
3033 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3034                                    struct netlink_callback *cb)
3035 {
3036         mutex_lock(&__ip_vs_mutex);
3037         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3038                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3039                                            ip_vs_master_mcast_ifn,
3040                                            ip_vs_master_syncid, cb) < 0)
3041                         goto nla_put_failure;
3042
3043                 cb->args[0] = 1;
3044         }
3045
3046         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3047                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3048                                            ip_vs_backup_mcast_ifn,
3049                                            ip_vs_backup_syncid, cb) < 0)
3050                         goto nla_put_failure;
3051
3052                 cb->args[1] = 1;
3053         }
3054
3055 nla_put_failure:
3056         mutex_unlock(&__ip_vs_mutex);
3057
3058         return skb->len;
3059 }
3060
3061 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3062 {
3063         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3064               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3065               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3066                 return -EINVAL;
3067
3068         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3069                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3070                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3071 }
3072
3073 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3074 {
3075         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3076                 return -EINVAL;
3077
3078         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3079 }
3080
3081 static int ip_vs_genl_set_config(struct nlattr **attrs)
3082 {
3083         struct ip_vs_timeout_user t;
3084
3085         __ip_vs_get_timeouts(&t);
3086
3087         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3088                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3089
3090         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3091                 t.tcp_fin_timeout =
3092                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3093
3094         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3095                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3096
3097         return ip_vs_set_timeout(&t);
3098 }
3099
3100 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3101 {
3102         struct ip_vs_service *svc = NULL;
3103         struct ip_vs_service_user_kern usvc;
3104         struct ip_vs_dest_user_kern udest;
3105         int ret = 0, cmd;
3106         int need_full_svc = 0, need_full_dest = 0;
3107
3108         cmd = info->genlhdr->cmd;
3109
3110         mutex_lock(&__ip_vs_mutex);
3111
3112         if (cmd == IPVS_CMD_FLUSH) {
3113                 ret = ip_vs_flush();
3114                 goto out;
3115         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3116                 ret = ip_vs_genl_set_config(info->attrs);
3117                 goto out;
3118         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3119                    cmd == IPVS_CMD_DEL_DAEMON) {
3120
3121                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3122
3123                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3124                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3125                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3126                                      ip_vs_daemon_policy)) {
3127                         ret = -EINVAL;
3128                         goto out;
3129                 }
3130
3131                 if (cmd == IPVS_CMD_NEW_DAEMON)
3132                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3133                 else
3134                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3135                 goto out;
3136         } else if (cmd == IPVS_CMD_ZERO &&
3137                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3138                 ret = ip_vs_zero_all();
3139                 goto out;
3140         }
3141
3142         /* All following commands require a service argument, so check if we
3143          * received a valid one. We need a full service specification when
3144          * adding / editing a service. Only identifying members otherwise. */
3145         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3146                 need_full_svc = 1;
3147
3148         ret = ip_vs_genl_parse_service(&usvc,
3149                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3150                                        need_full_svc, &svc);
3151         if (ret)
3152                 goto out;
3153
3154         /* Unless we're adding a new service, the service must already exist */
3155         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3156                 ret = -ESRCH;
3157                 goto out;
3158         }
3159
3160         /* Destination commands require a valid destination argument. For
3161          * adding / editing a destination, we need a full destination
3162          * specification. */
3163         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3164             cmd == IPVS_CMD_DEL_DEST) {
3165                 if (cmd != IPVS_CMD_DEL_DEST)
3166                         need_full_dest = 1;
3167
3168                 ret = ip_vs_genl_parse_dest(&udest,
3169                                             info->attrs[IPVS_CMD_ATTR_DEST],
3170                                             need_full_dest);
3171                 if (ret)
3172                         goto out;
3173         }
3174
3175         switch (cmd) {
3176         case IPVS_CMD_NEW_SERVICE:
3177                 if (svc == NULL)
3178                         ret = ip_vs_add_service(&usvc, &svc);
3179                 else
3180                         ret = -EEXIST;
3181                 break;
3182         case IPVS_CMD_SET_SERVICE:
3183                 ret = ip_vs_edit_service(svc, &usvc);
3184                 break;
3185         case IPVS_CMD_DEL_SERVICE:
3186                 ret = ip_vs_del_service(svc);
3187                 /* do not use svc, it can be freed */
3188                 break;
3189         case IPVS_CMD_NEW_DEST:
3190                 ret = ip_vs_add_dest(svc, &udest);
3191                 break;
3192         case IPVS_CMD_SET_DEST:
3193                 ret = ip_vs_edit_dest(svc, &udest);
3194                 break;
3195         case IPVS_CMD_DEL_DEST:
3196                 ret = ip_vs_del_dest(svc, &udest);
3197                 break;
3198         case IPVS_CMD_ZERO:
3199                 ret = ip_vs_zero_service(svc);
3200                 break;
3201         default:
3202                 ret = -EINVAL;
3203         }
3204
3205 out:
3206         mutex_unlock(&__ip_vs_mutex);
3207
3208         return ret;
3209 }
3210
3211 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3212 {
3213         struct sk_buff *msg;
3214         void *reply;
3215         int ret, cmd, reply_cmd;
3216
3217         cmd = info->genlhdr->cmd;
3218
3219         if (cmd == IPVS_CMD_GET_SERVICE)
3220                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3221         else if (cmd == IPVS_CMD_GET_INFO)
3222                 reply_cmd = IPVS_CMD_SET_INFO;
3223         else if (cmd == IPVS_CMD_GET_CONFIG)
3224                 reply_cmd = IPVS_CMD_SET_CONFIG;
3225         else {
3226                 pr_err("unknown Generic Netlink command\n");
3227                 return -EINVAL;
3228         }
3229
3230         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3231         if (!msg)
3232                 return -ENOMEM;
3233
3234         mutex_lock(&__ip_vs_mutex);
3235
3236         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3237         if (reply == NULL)
3238                 goto nla_put_failure;
3239
3240         switch (cmd) {
3241         case IPVS_CMD_GET_SERVICE:
3242         {
3243                 struct ip_vs_service *svc;
3244
3245                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3246                 if (IS_ERR(svc)) {
3247                         ret = PTR_ERR(svc);
3248                         goto out_err;
3249                 } else if (svc) {
3250                         ret = ip_vs_genl_fill_service(msg, svc);
3251                         if (ret)
3252                                 goto nla_put_failure;
3253                 } else {
3254                         ret = -ESRCH;
3255                         goto out_err;
3256                 }
3257
3258                 break;
3259         }
3260
3261         case IPVS_CMD_GET_CONFIG:
3262         {
3263                 struct ip_vs_timeout_user t;
3264
3265                 __ip_vs_get_timeouts(&t);
3266 #ifdef CONFIG_IP_VS_PROTO_TCP
3267                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3268                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3269                             t.tcp_fin_timeout);
3270 #endif
3271 #ifdef CONFIG_IP_VS_PROTO_UDP
3272                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3273 #endif
3274
3275                 break;
3276         }
3277
3278         case IPVS_CMD_GET_INFO:
3279                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3280                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3281                             ip_vs_conn_tab_size);
3282                 break;
3283         }
3284
3285         genlmsg_end(msg, reply);
3286         ret = genlmsg_reply(msg, info);
3287         goto out;
3288
3289 nla_put_failure:
3290         pr_err("not enough space in Netlink message\n");
3291         ret = -EMSGSIZE;
3292
3293 out_err:
3294         nlmsg_free(msg);
3295 out:
3296         mutex_unlock(&__ip_vs_mutex);
3297
3298         return ret;
3299 }
3300
3301
3302 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3303         {
3304                 .cmd    = IPVS_CMD_NEW_SERVICE,
3305                 .flags  = GENL_ADMIN_PERM,
3306                 .policy = ip_vs_cmd_policy,
3307                 .doit   = ip_vs_genl_set_cmd,
3308         },
3309         {
3310                 .cmd    = IPVS_CMD_SET_SERVICE,
3311                 .flags  = GENL_ADMIN_PERM,
3312                 .policy = ip_vs_cmd_policy,
3313                 .doit   = ip_vs_genl_set_cmd,
3314         },
3315         {
3316                 .cmd    = IPVS_CMD_DEL_SERVICE,
3317                 .flags  = GENL_ADMIN_PERM,
3318                 .policy = ip_vs_cmd_policy,
3319                 .doit   = ip_vs_genl_set_cmd,
3320         },
3321         {
3322                 .cmd    = IPVS_CMD_GET_SERVICE,
3323                 .flags  = GENL_ADMIN_PERM,
3324                 .doit   = ip_vs_genl_get_cmd,
3325                 .dumpit = ip_vs_genl_dump_services,
3326                 .policy = ip_vs_cmd_policy,
3327         },
3328         {
3329                 .cmd    = IPVS_CMD_NEW_DEST,
3330                 .flags  = GENL_ADMIN_PERM,
3331                 .policy = ip_vs_cmd_policy,
3332                 .doit   = ip_vs_genl_set_cmd,
3333         },
3334         {
3335                 .cmd    = IPVS_CMD_SET_DEST,
3336                 .flags  = GENL_ADMIN_PERM,
3337                 .policy = ip_vs_cmd_policy,
3338                 .doit   = ip_vs_genl_set_cmd,
3339         },
3340         {
3341                 .cmd    = IPVS_CMD_DEL_DEST,
3342                 .flags  = GENL_ADMIN_PERM,
3343                 .policy = ip_vs_cmd_policy,
3344                 .doit   = ip_vs_genl_set_cmd,
3345         },
3346         {
3347                 .cmd    = IPVS_CMD_GET_DEST,
3348                 .flags  = GENL_ADMIN_PERM,
3349                 .policy = ip_vs_cmd_policy,
3350                 .dumpit = ip_vs_genl_dump_dests,
3351         },
3352         {
3353                 .cmd    = IPVS_CMD_NEW_DAEMON,
3354                 .flags  = GENL_ADMIN_PERM,
3355                 .policy = ip_vs_cmd_policy,
3356                 .doit   = ip_vs_genl_set_cmd,
3357         },
3358         {
3359                 .cmd    = IPVS_CMD_DEL_DAEMON,
3360                 .flags  = GENL_ADMIN_PERM,
3361                 .policy = ip_vs_cmd_policy,
3362                 .doit   = ip_vs_genl_set_cmd,
3363         },
3364         {
3365                 .cmd    = IPVS_CMD_GET_DAEMON,
3366                 .flags  = GENL_ADMIN_PERM,
3367                 .dumpit = ip_vs_genl_dump_daemons,
3368         },
3369         {
3370                 .cmd    = IPVS_CMD_SET_CONFIG,
3371                 .flags  = GENL_ADMIN_PERM,
3372                 .policy = ip_vs_cmd_policy,
3373                 .doit   = ip_vs_genl_set_cmd,
3374         },
3375         {
3376                 .cmd    = IPVS_CMD_GET_CONFIG,
3377                 .flags  = GENL_ADMIN_PERM,
3378                 .doit   = ip_vs_genl_get_cmd,
3379         },
3380         {
3381                 .cmd    = IPVS_CMD_GET_INFO,
3382                 .flags  = GENL_ADMIN_PERM,
3383                 .doit   = ip_vs_genl_get_cmd,
3384         },
3385         {
3386                 .cmd    = IPVS_CMD_ZERO,
3387                 .flags  = GENL_ADMIN_PERM,
3388                 .policy = ip_vs_cmd_policy,
3389                 .doit   = ip_vs_genl_set_cmd,
3390         },
3391         {
3392                 .cmd    = IPVS_CMD_FLUSH,
3393                 .flags  = GENL_ADMIN_PERM,
3394                 .doit   = ip_vs_genl_set_cmd,
3395         },
3396 };
3397
3398 static int __init ip_vs_genl_register(void)
3399 {
3400         return genl_register_family_with_ops(&ip_vs_genl_family,
3401                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3402 }
3403
3404 static void ip_vs_genl_unregister(void)
3405 {
3406         genl_unregister_family(&ip_vs_genl_family);
3407 }
3408
3409 /* End of Generic Netlink interface definitions */
3410
3411
3412 int __init ip_vs_control_init(void)
3413 {
3414         int ret;
3415         int idx;
3416
3417         EnterFunction(2);
3418
3419         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3420         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3421                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3422                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3423         }
3424         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3425                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3426         }
3427         smp_wmb();
3428
3429         ret = nf_register_sockopt(&ip_vs_sockopts);
3430         if (ret) {
3431                 pr_err("cannot register sockopt.\n");
3432                 return ret;
3433         }
3434
3435         ret = ip_vs_genl_register();
3436         if (ret) {
3437                 pr_err("cannot register Generic Netlink interface.\n");
3438                 nf_unregister_sockopt(&ip_vs_sockopts);
3439                 return ret;
3440         }
3441
3442         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3443         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3444
3445         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3446
3447         ip_vs_new_estimator(&ip_vs_stats);
3448
3449         /* Hook the defense timer */
3450         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3451
3452         LeaveFunction(2);
3453         return 0;
3454 }
3455
3456
3457 void ip_vs_control_cleanup(void)
3458 {
3459         EnterFunction(2);
3460         ip_vs_trash_cleanup();
3461         cancel_rearming_delayed_work(&defense_work);
3462         cancel_work_sync(&defense_work.work);
3463         ip_vs_kill_estimator(&ip_vs_stats);
3464         unregister_sysctl_table(sysctl_header);
3465         proc_net_remove(&init_net, "ip_vs_stats");
3466         proc_net_remove(&init_net, "ip_vs");
3467         ip_vs_genl_unregister();
3468         nf_unregister_sockopt(&ip_vs_sockopts);
3469         LeaveFunction(2);
3470 }