]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/ipv4/ipvs/ip_vs_ctl.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[mv-sheeva.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #include <net/route.h>
39 #include <net/sock.h>
40 #include <net/genetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #include <net/ip_vs.h>
45
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DEFINE_MUTEX(__ip_vs_mutex);
48
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate = 0;
66 int ip_vs_drop_counter = 0;
67 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69 /* number of virtual services */
70 static int ip_vs_num_services = 0;
71
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry = 0;
74 static int sysctl_ip_vs_drop_packet = 0;
75 static int sysctl_ip_vs_secure_tcp = 0;
76 static int sysctl_ip_vs_amemthresh = 1024;
77 static int sysctl_ip_vs_am_droprate = 10;
78 int sysctl_ip_vs_cache_bypass = 0;
79 int sysctl_ip_vs_expire_nodest_conn = 0;
80 int sysctl_ip_vs_expire_quiescent_template = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
87
88 int ip_vs_get_debug_level(void)
89 {
90         return sysctl_ip_vs_debug_level;
91 }
92 #endif
93
94 /*
95  *      update_defense_level is called from keventd and from sysctl,
96  *      so it needs to protect itself from softirqs
97  */
98 static void update_defense_level(void)
99 {
100         struct sysinfo i;
101         static int old_secure_tcp = 0;
102         int availmem;
103         int nomem;
104         int to_change = -1;
105
106         /* we only count free and buffered memory (in pages) */
107         si_meminfo(&i);
108         availmem = i.freeram + i.bufferram;
109         /* however in linux 2.5 the i.bufferram is total page cache size,
110            we need adjust it */
111         /* si_swapinfo(&i); */
112         /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114         nomem = (availmem < sysctl_ip_vs_amemthresh);
115
116         local_bh_disable();
117
118         /* drop_entry */
119         spin_lock(&__ip_vs_dropentry_lock);
120         switch (sysctl_ip_vs_drop_entry) {
121         case 0:
122                 atomic_set(&ip_vs_dropentry, 0);
123                 break;
124         case 1:
125                 if (nomem) {
126                         atomic_set(&ip_vs_dropentry, 1);
127                         sysctl_ip_vs_drop_entry = 2;
128                 } else {
129                         atomic_set(&ip_vs_dropentry, 0);
130                 }
131                 break;
132         case 2:
133                 if (nomem) {
134                         atomic_set(&ip_vs_dropentry, 1);
135                 } else {
136                         atomic_set(&ip_vs_dropentry, 0);
137                         sysctl_ip_vs_drop_entry = 1;
138                 };
139                 break;
140         case 3:
141                 atomic_set(&ip_vs_dropentry, 1);
142                 break;
143         }
144         spin_unlock(&__ip_vs_dropentry_lock);
145
146         /* drop_packet */
147         spin_lock(&__ip_vs_droppacket_lock);
148         switch (sysctl_ip_vs_drop_packet) {
149         case 0:
150                 ip_vs_drop_rate = 0;
151                 break;
152         case 1:
153                 if (nomem) {
154                         ip_vs_drop_rate = ip_vs_drop_counter
155                                 = sysctl_ip_vs_amemthresh /
156                                 (sysctl_ip_vs_amemthresh-availmem);
157                         sysctl_ip_vs_drop_packet = 2;
158                 } else {
159                         ip_vs_drop_rate = 0;
160                 }
161                 break;
162         case 2:
163                 if (nomem) {
164                         ip_vs_drop_rate = ip_vs_drop_counter
165                                 = sysctl_ip_vs_amemthresh /
166                                 (sysctl_ip_vs_amemthresh-availmem);
167                 } else {
168                         ip_vs_drop_rate = 0;
169                         sysctl_ip_vs_drop_packet = 1;
170                 }
171                 break;
172         case 3:
173                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174                 break;
175         }
176         spin_unlock(&__ip_vs_droppacket_lock);
177
178         /* secure_tcp */
179         write_lock(&__ip_vs_securetcp_lock);
180         switch (sysctl_ip_vs_secure_tcp) {
181         case 0:
182                 if (old_secure_tcp >= 2)
183                         to_change = 0;
184                 break;
185         case 1:
186                 if (nomem) {
187                         if (old_secure_tcp < 2)
188                                 to_change = 1;
189                         sysctl_ip_vs_secure_tcp = 2;
190                 } else {
191                         if (old_secure_tcp >= 2)
192                                 to_change = 0;
193                 }
194                 break;
195         case 2:
196                 if (nomem) {
197                         if (old_secure_tcp < 2)
198                                 to_change = 1;
199                 } else {
200                         if (old_secure_tcp >= 2)
201                                 to_change = 0;
202                         sysctl_ip_vs_secure_tcp = 1;
203                 }
204                 break;
205         case 3:
206                 if (old_secure_tcp < 2)
207                         to_change = 1;
208                 break;
209         }
210         old_secure_tcp = sysctl_ip_vs_secure_tcp;
211         if (to_change >= 0)
212                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213         write_unlock(&__ip_vs_securetcp_lock);
214
215         local_bh_enable();
216 }
217
218
219 /*
220  *      Timer for checking the defense
221  */
222 #define DEFENSE_TIMER_PERIOD    1*HZ
223 static void defense_work_handler(struct work_struct *work);
224 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
225
226 static void defense_work_handler(struct work_struct *work)
227 {
228         update_defense_level();
229         if (atomic_read(&ip_vs_dropentry))
230                 ip_vs_random_dropentry();
231
232         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233 }
234
235 int
236 ip_vs_use_count_inc(void)
237 {
238         return try_module_get(THIS_MODULE);
239 }
240
241 void
242 ip_vs_use_count_dec(void)
243 {
244         module_put(THIS_MODULE);
245 }
246
247
248 /*
249  *      Hash table: for virtual service lookups
250  */
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260 /*
261  *      Hash table: for real service lookups
262  */
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269 /*
270  *      Trash for destinations
271  */
272 static LIST_HEAD(ip_vs_dest_trash);
273
274 /*
275  *      FTP & NULL virtual service counters
276  */
277 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281 /*
282  *      Returns hash value for virtual service
283  */
284 static __inline__ unsigned
285 ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
286 {
287         register unsigned porth = ntohs(port);
288
289         return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290                 & IP_VS_SVC_TAB_MASK;
291 }
292
293 /*
294  *      Returns hash value of fwmark for virtual service lookup
295  */
296 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297 {
298         return fwmark & IP_VS_SVC_TAB_MASK;
299 }
300
301 /*
302  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303  *      or in the ip_vs_svc_fwm_table by fwmark.
304  *      Should be called with locked tables.
305  */
306 static int ip_vs_svc_hash(struct ip_vs_service *svc)
307 {
308         unsigned hash;
309
310         if (svc->flags & IP_VS_SVC_F_HASHED) {
311                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312                           "called from %p\n", __builtin_return_address(0));
313                 return 0;
314         }
315
316         if (svc->fwmark == 0) {
317                 /*
318                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
319                  */
320                 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
321                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322         } else {
323                 /*
324                  *  Hash it by fwmark in ip_vs_svc_fwm_table
325                  */
326                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
327                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328         }
329
330         svc->flags |= IP_VS_SVC_F_HASHED;
331         /* increase its refcnt because it is referenced by the svc table */
332         atomic_inc(&svc->refcnt);
333         return 1;
334 }
335
336
337 /*
338  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342 {
343         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
344                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345                           "called from %p\n", __builtin_return_address(0));
346                 return 0;
347         }
348
349         if (svc->fwmark == 0) {
350                 /* Remove it from the ip_vs_svc_table table */
351                 list_del(&svc->s_list);
352         } else {
353                 /* Remove it from the ip_vs_svc_fwm_table table */
354                 list_del(&svc->f_list);
355         }
356
357         svc->flags &= ~IP_VS_SVC_F_HASHED;
358         atomic_dec(&svc->refcnt);
359         return 1;
360 }
361
362
363 /*
364  *      Get service by {proto,addr,port} in the service table.
365  */
366 static __inline__ struct ip_vs_service *
367 __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
368 {
369         unsigned hash;
370         struct ip_vs_service *svc;
371
372         /* Check for "full" addressed entries */
373         hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
374
375         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
376                 if ((svc->addr == vaddr)
377                     && (svc->port == vport)
378                     && (svc->protocol == protocol)) {
379                         /* HIT */
380                         atomic_inc(&svc->usecnt);
381                         return svc;
382                 }
383         }
384
385         return NULL;
386 }
387
388
389 /*
390  *      Get service by {fwmark} in the service table.
391  */
392 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
393 {
394         unsigned hash;
395         struct ip_vs_service *svc;
396
397         /* Check for fwmark addressed entries */
398         hash = ip_vs_svc_fwm_hashkey(fwmark);
399
400         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
401                 if (svc->fwmark == fwmark) {
402                         /* HIT */
403                         atomic_inc(&svc->usecnt);
404                         return svc;
405                 }
406         }
407
408         return NULL;
409 }
410
411 struct ip_vs_service *
412 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
413 {
414         struct ip_vs_service *svc;
415
416         read_lock(&__ip_vs_svc_lock);
417
418         /*
419          *      Check the table hashed by fwmark first
420          */
421         if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
422                 goto out;
423
424         /*
425          *      Check the table hashed by <protocol,addr,port>
426          *      for "full" addressed entries
427          */
428         svc = __ip_vs_service_get(protocol, vaddr, vport);
429
430         if (svc == NULL
431             && protocol == IPPROTO_TCP
432             && atomic_read(&ip_vs_ftpsvc_counter)
433             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434                 /*
435                  * Check if ftp service entry exists, the packet
436                  * might belong to FTP data connections.
437                  */
438                 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
439         }
440
441         if (svc == NULL
442             && atomic_read(&ip_vs_nullsvc_counter)) {
443                 /*
444                  * Check if the catch-all port (port zero) exists
445                  */
446                 svc = __ip_vs_service_get(protocol, vaddr, 0);
447         }
448
449   out:
450         read_unlock(&__ip_vs_svc_lock);
451
452         IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
453                   fwmark, ip_vs_proto_name(protocol),
454                   NIPQUAD(vaddr), ntohs(vport),
455                   svc?"hit":"not hit");
456
457         return svc;
458 }
459
460
461 static inline void
462 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463 {
464         atomic_inc(&svc->refcnt);
465         dest->svc = svc;
466 }
467
468 static inline void
469 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
470 {
471         struct ip_vs_service *svc = dest->svc;
472
473         dest->svc = NULL;
474         if (atomic_dec_and_test(&svc->refcnt))
475                 kfree(svc);
476 }
477
478
479 /*
480  *      Returns hash value for real service
481  */
482 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
483 {
484         register unsigned porth = ntohs(port);
485
486         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
487                 & IP_VS_RTAB_MASK;
488 }
489
490 /*
491  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492  *      should be called with locked tables.
493  */
494 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
495 {
496         unsigned hash;
497
498         if (!list_empty(&dest->d_list)) {
499                 return 0;
500         }
501
502         /*
503          *      Hash by proto,addr,port,
504          *      which are the parameters of the real service.
505          */
506         hash = ip_vs_rs_hashkey(dest->addr, dest->port);
507         list_add(&dest->d_list, &ip_vs_rtable[hash]);
508
509         return 1;
510 }
511
512 /*
513  *      UNhashes ip_vs_dest from ip_vs_rtable.
514  *      should be called with locked tables.
515  */
516 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
517 {
518         /*
519          * Remove it from the ip_vs_rtable table.
520          */
521         if (!list_empty(&dest->d_list)) {
522                 list_del(&dest->d_list);
523                 INIT_LIST_HEAD(&dest->d_list);
524         }
525
526         return 1;
527 }
528
529 /*
530  *      Lookup real service by <proto,addr,port> in the real service table.
531  */
532 struct ip_vs_dest *
533 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
534 {
535         unsigned hash;
536         struct ip_vs_dest *dest;
537
538         /*
539          *      Check for "full" addressed entries
540          *      Return the first found entry
541          */
542         hash = ip_vs_rs_hashkey(daddr, dport);
543
544         read_lock(&__ip_vs_rs_lock);
545         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
546                 if ((dest->addr == daddr)
547                     && (dest->port == dport)
548                     && ((dest->protocol == protocol) ||
549                         dest->vfwmark)) {
550                         /* HIT */
551                         read_unlock(&__ip_vs_rs_lock);
552                         return dest;
553                 }
554         }
555         read_unlock(&__ip_vs_rs_lock);
556
557         return NULL;
558 }
559
560 /*
561  *      Lookup destination by {addr,port} in the given service
562  */
563 static struct ip_vs_dest *
564 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
565 {
566         struct ip_vs_dest *dest;
567
568         /*
569          * Find the destination for the given service
570          */
571         list_for_each_entry(dest, &svc->destinations, n_list) {
572                 if ((dest->addr == daddr) && (dest->port == dport)) {
573                         /* HIT */
574                         return dest;
575                 }
576         }
577
578         return NULL;
579 }
580
581 /*
582  * Find destination by {daddr,dport,vaddr,protocol}
583  * Cretaed to be used in ip_vs_process_message() in
584  * the backup synchronization daemon. It finds the
585  * destination to be bound to the received connection
586  * on the backup.
587  *
588  * ip_vs_lookup_real_service() looked promissing, but
589  * seems not working as expected.
590  */
591 struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
592                                     __be32 vaddr, __be16 vport, __u16 protocol)
593 {
594         struct ip_vs_dest *dest;
595         struct ip_vs_service *svc;
596
597         svc = ip_vs_service_get(0, protocol, vaddr, vport);
598         if (!svc)
599                 return NULL;
600         dest = ip_vs_lookup_dest(svc, daddr, dport);
601         if (dest)
602                 atomic_inc(&dest->refcnt);
603         ip_vs_service_put(svc);
604         return dest;
605 }
606
607 /*
608  *  Lookup dest by {svc,addr,port} in the destination trash.
609  *  The destination trash is used to hold the destinations that are removed
610  *  from the service table but are still referenced by some conn entries.
611  *  The reason to add the destination trash is when the dest is temporary
612  *  down (either by administrator or by monitor program), the dest can be
613  *  picked back from the trash, the remaining connections to the dest can
614  *  continue, and the counting information of the dest is also useful for
615  *  scheduling.
616  */
617 static struct ip_vs_dest *
618 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
619 {
620         struct ip_vs_dest *dest, *nxt;
621
622         /*
623          * Find the destination in trash
624          */
625         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
626                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
627                           "dest->refcnt=%d\n",
628                           dest->vfwmark,
629                           NIPQUAD(dest->addr), ntohs(dest->port),
630                           atomic_read(&dest->refcnt));
631                 if (dest->addr == daddr &&
632                     dest->port == dport &&
633                     dest->vfwmark == svc->fwmark &&
634                     dest->protocol == svc->protocol &&
635                     (svc->fwmark ||
636                      (dest->vaddr == svc->addr &&
637                       dest->vport == svc->port))) {
638                         /* HIT */
639                         return dest;
640                 }
641
642                 /*
643                  * Try to purge the destination from trash if not referenced
644                  */
645                 if (atomic_read(&dest->refcnt) == 1) {
646                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
647                                   "from trash\n",
648                                   dest->vfwmark,
649                                   NIPQUAD(dest->addr), ntohs(dest->port));
650                         list_del(&dest->n_list);
651                         ip_vs_dst_reset(dest);
652                         __ip_vs_unbind_svc(dest);
653                         kfree(dest);
654                 }
655         }
656
657         return NULL;
658 }
659
660
661 /*
662  *  Clean up all the destinations in the trash
663  *  Called by the ip_vs_control_cleanup()
664  *
665  *  When the ip_vs_control_clearup is activated by ipvs module exit,
666  *  the service tables must have been flushed and all the connections
667  *  are expired, and the refcnt of each destination in the trash must
668  *  be 1, so we simply release them here.
669  */
670 static void ip_vs_trash_cleanup(void)
671 {
672         struct ip_vs_dest *dest, *nxt;
673
674         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
675                 list_del(&dest->n_list);
676                 ip_vs_dst_reset(dest);
677                 __ip_vs_unbind_svc(dest);
678                 kfree(dest);
679         }
680 }
681
682
683 static void
684 ip_vs_zero_stats(struct ip_vs_stats *stats)
685 {
686         spin_lock_bh(&stats->lock);
687
688         stats->conns = 0;
689         stats->inpkts = 0;
690         stats->outpkts = 0;
691         stats->inbytes = 0;
692         stats->outbytes = 0;
693
694         stats->cps = 0;
695         stats->inpps = 0;
696         stats->outpps = 0;
697         stats->inbps = 0;
698         stats->outbps = 0;
699
700         ip_vs_zero_estimator(stats);
701
702         spin_unlock_bh(&stats->lock);
703 }
704
705 /*
706  *      Update a destination in the given service
707  */
708 static void
709 __ip_vs_update_dest(struct ip_vs_service *svc,
710                     struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
711 {
712         int conn_flags;
713
714         /* set the weight and the flags */
715         atomic_set(&dest->weight, udest->weight);
716         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
717
718         /* check if local node and update the flags */
719         if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
720                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
721                         | IP_VS_CONN_F_LOCALNODE;
722         }
723
724         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
725         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
726                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
727         } else {
728                 /*
729                  *    Put the real service in ip_vs_rtable if not present.
730                  *    For now only for NAT!
731                  */
732                 write_lock_bh(&__ip_vs_rs_lock);
733                 ip_vs_rs_hash(dest);
734                 write_unlock_bh(&__ip_vs_rs_lock);
735         }
736         atomic_set(&dest->conn_flags, conn_flags);
737
738         /* bind the service */
739         if (!dest->svc) {
740                 __ip_vs_bind_svc(dest, svc);
741         } else {
742                 if (dest->svc != svc) {
743                         __ip_vs_unbind_svc(dest);
744                         ip_vs_zero_stats(&dest->stats);
745                         __ip_vs_bind_svc(dest, svc);
746                 }
747         }
748
749         /* set the dest status flags */
750         dest->flags |= IP_VS_DEST_F_AVAILABLE;
751
752         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
753                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
754         dest->u_threshold = udest->u_threshold;
755         dest->l_threshold = udest->l_threshold;
756 }
757
758
759 /*
760  *      Create a destination for the given service
761  */
762 static int
763 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
764                struct ip_vs_dest **dest_p)
765 {
766         struct ip_vs_dest *dest;
767         unsigned atype;
768
769         EnterFunction(2);
770
771         atype = inet_addr_type(&init_net, udest->addr);
772         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
773                 return -EINVAL;
774
775         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
776         if (dest == NULL) {
777                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
778                 return -ENOMEM;
779         }
780
781         dest->protocol = svc->protocol;
782         dest->vaddr = svc->addr;
783         dest->vport = svc->port;
784         dest->vfwmark = svc->fwmark;
785         dest->addr = udest->addr;
786         dest->port = udest->port;
787
788         atomic_set(&dest->activeconns, 0);
789         atomic_set(&dest->inactconns, 0);
790         atomic_set(&dest->persistconns, 0);
791         atomic_set(&dest->refcnt, 0);
792
793         INIT_LIST_HEAD(&dest->d_list);
794         spin_lock_init(&dest->dst_lock);
795         spin_lock_init(&dest->stats.lock);
796         __ip_vs_update_dest(svc, dest, udest);
797         ip_vs_new_estimator(&dest->stats);
798
799         *dest_p = dest;
800
801         LeaveFunction(2);
802         return 0;
803 }
804
805
806 /*
807  *      Add a destination into an existing service
808  */
809 static int
810 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
811 {
812         struct ip_vs_dest *dest;
813         __be32 daddr = udest->addr;
814         __be16 dport = udest->port;
815         int ret;
816
817         EnterFunction(2);
818
819         if (udest->weight < 0) {
820                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
821                 return -ERANGE;
822         }
823
824         if (udest->l_threshold > udest->u_threshold) {
825                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
826                           "upper threshold\n");
827                 return -ERANGE;
828         }
829
830         /*
831          * Check if the dest already exists in the list
832          */
833         dest = ip_vs_lookup_dest(svc, daddr, dport);
834         if (dest != NULL) {
835                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
836                 return -EEXIST;
837         }
838
839         /*
840          * Check if the dest already exists in the trash and
841          * is from the same service
842          */
843         dest = ip_vs_trash_get_dest(svc, daddr, dport);
844         if (dest != NULL) {
845                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
846                           "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
847                           NIPQUAD(daddr), ntohs(dport),
848                           atomic_read(&dest->refcnt),
849                           dest->vfwmark,
850                           NIPQUAD(dest->vaddr),
851                           ntohs(dest->vport));
852                 __ip_vs_update_dest(svc, dest, udest);
853
854                 /*
855                  * Get the destination from the trash
856                  */
857                 list_del(&dest->n_list);
858
859                 ip_vs_new_estimator(&dest->stats);
860
861                 write_lock_bh(&__ip_vs_svc_lock);
862
863                 /*
864                  * Wait until all other svc users go away.
865                  */
866                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
867
868                 list_add(&dest->n_list, &svc->destinations);
869                 svc->num_dests++;
870
871                 /* call the update_service function of its scheduler */
872                 if (svc->scheduler->update_service)
873                         svc->scheduler->update_service(svc);
874
875                 write_unlock_bh(&__ip_vs_svc_lock);
876                 return 0;
877         }
878
879         /*
880          * Allocate and initialize the dest structure
881          */
882         ret = ip_vs_new_dest(svc, udest, &dest);
883         if (ret) {
884                 return ret;
885         }
886
887         /*
888          * Add the dest entry into the list
889          */
890         atomic_inc(&dest->refcnt);
891
892         write_lock_bh(&__ip_vs_svc_lock);
893
894         /*
895          * Wait until all other svc users go away.
896          */
897         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
898
899         list_add(&dest->n_list, &svc->destinations);
900         svc->num_dests++;
901
902         /* call the update_service function of its scheduler */
903         if (svc->scheduler->update_service)
904                 svc->scheduler->update_service(svc);
905
906         write_unlock_bh(&__ip_vs_svc_lock);
907
908         LeaveFunction(2);
909
910         return 0;
911 }
912
913
914 /*
915  *      Edit a destination in the given service
916  */
917 static int
918 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
919 {
920         struct ip_vs_dest *dest;
921         __be32 daddr = udest->addr;
922         __be16 dport = udest->port;
923
924         EnterFunction(2);
925
926         if (udest->weight < 0) {
927                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
928                 return -ERANGE;
929         }
930
931         if (udest->l_threshold > udest->u_threshold) {
932                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
933                           "upper threshold\n");
934                 return -ERANGE;
935         }
936
937         /*
938          *  Lookup the destination list
939          */
940         dest = ip_vs_lookup_dest(svc, daddr, dport);
941         if (dest == NULL) {
942                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
943                 return -ENOENT;
944         }
945
946         __ip_vs_update_dest(svc, dest, udest);
947
948         write_lock_bh(&__ip_vs_svc_lock);
949
950         /* Wait until all other svc users go away */
951         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
952
953         /* call the update_service, because server weight may be changed */
954         if (svc->scheduler->update_service)
955                 svc->scheduler->update_service(svc);
956
957         write_unlock_bh(&__ip_vs_svc_lock);
958
959         LeaveFunction(2);
960
961         return 0;
962 }
963
964
965 /*
966  *      Delete a destination (must be already unlinked from the service)
967  */
968 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
969 {
970         ip_vs_kill_estimator(&dest->stats);
971
972         /*
973          *  Remove it from the d-linked list with the real services.
974          */
975         write_lock_bh(&__ip_vs_rs_lock);
976         ip_vs_rs_unhash(dest);
977         write_unlock_bh(&__ip_vs_rs_lock);
978
979         /*
980          *  Decrease the refcnt of the dest, and free the dest
981          *  if nobody refers to it (refcnt=0). Otherwise, throw
982          *  the destination into the trash.
983          */
984         if (atomic_dec_and_test(&dest->refcnt)) {
985                 ip_vs_dst_reset(dest);
986                 /* simply decrease svc->refcnt here, let the caller check
987                    and release the service if nobody refers to it.
988                    Only user context can release destination and service,
989                    and only one user context can update virtual service at a
990                    time, so the operation here is OK */
991                 atomic_dec(&dest->svc->refcnt);
992                 kfree(dest);
993         } else {
994                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
995                           "dest->refcnt=%d\n",
996                           NIPQUAD(dest->addr), ntohs(dest->port),
997                           atomic_read(&dest->refcnt));
998                 list_add(&dest->n_list, &ip_vs_dest_trash);
999                 atomic_inc(&dest->refcnt);
1000         }
1001 }
1002
1003
1004 /*
1005  *      Unlink a destination from the given service
1006  */
1007 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1008                                 struct ip_vs_dest *dest,
1009                                 int svcupd)
1010 {
1011         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1012
1013         /*
1014          *  Remove it from the d-linked destination list.
1015          */
1016         list_del(&dest->n_list);
1017         svc->num_dests--;
1018
1019         /*
1020          *  Call the update_service function of its scheduler
1021          */
1022         if (svcupd && svc->scheduler->update_service)
1023                         svc->scheduler->update_service(svc);
1024 }
1025
1026
1027 /*
1028  *      Delete a destination server in the given service
1029  */
1030 static int
1031 ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1032 {
1033         struct ip_vs_dest *dest;
1034         __be32 daddr = udest->addr;
1035         __be16 dport = udest->port;
1036
1037         EnterFunction(2);
1038
1039         dest = ip_vs_lookup_dest(svc, daddr, dport);
1040         if (dest == NULL) {
1041                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1042                 return -ENOENT;
1043         }
1044
1045         write_lock_bh(&__ip_vs_svc_lock);
1046
1047         /*
1048          *      Wait until all other svc users go away.
1049          */
1050         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1051
1052         /*
1053          *      Unlink dest from the service
1054          */
1055         __ip_vs_unlink_dest(svc, dest, 1);
1056
1057         write_unlock_bh(&__ip_vs_svc_lock);
1058
1059         /*
1060          *      Delete the destination
1061          */
1062         __ip_vs_del_dest(dest);
1063
1064         LeaveFunction(2);
1065
1066         return 0;
1067 }
1068
1069
1070 /*
1071  *      Add a service into the service hash table
1072  */
1073 static int
1074 ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1075 {
1076         int ret = 0;
1077         struct ip_vs_scheduler *sched = NULL;
1078         struct ip_vs_service *svc = NULL;
1079
1080         /* increase the module use count */
1081         ip_vs_use_count_inc();
1082
1083         /* Lookup the scheduler by 'u->sched_name' */
1084         sched = ip_vs_scheduler_get(u->sched_name);
1085         if (sched == NULL) {
1086                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1087                            u->sched_name);
1088                 ret = -ENOENT;
1089                 goto out_mod_dec;
1090         }
1091
1092         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1093         if (svc == NULL) {
1094                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1095                 ret = -ENOMEM;
1096                 goto out_err;
1097         }
1098
1099         /* I'm the first user of the service */
1100         atomic_set(&svc->usecnt, 1);
1101         atomic_set(&svc->refcnt, 0);
1102
1103         svc->protocol = u->protocol;
1104         svc->addr = u->addr;
1105         svc->port = u->port;
1106         svc->fwmark = u->fwmark;
1107         svc->flags = u->flags;
1108         svc->timeout = u->timeout * HZ;
1109         svc->netmask = u->netmask;
1110
1111         INIT_LIST_HEAD(&svc->destinations);
1112         rwlock_init(&svc->sched_lock);
1113         spin_lock_init(&svc->stats.lock);
1114
1115         /* Bind the scheduler */
1116         ret = ip_vs_bind_scheduler(svc, sched);
1117         if (ret)
1118                 goto out_err;
1119         sched = NULL;
1120
1121         /* Update the virtual service counters */
1122         if (svc->port == FTPPORT)
1123                 atomic_inc(&ip_vs_ftpsvc_counter);
1124         else if (svc->port == 0)
1125                 atomic_inc(&ip_vs_nullsvc_counter);
1126
1127         ip_vs_new_estimator(&svc->stats);
1128         ip_vs_num_services++;
1129
1130         /* Hash the service into the service table */
1131         write_lock_bh(&__ip_vs_svc_lock);
1132         ip_vs_svc_hash(svc);
1133         write_unlock_bh(&__ip_vs_svc_lock);
1134
1135         *svc_p = svc;
1136         return 0;
1137
1138   out_err:
1139         if (svc != NULL) {
1140                 if (svc->scheduler)
1141                         ip_vs_unbind_scheduler(svc);
1142                 if (svc->inc) {
1143                         local_bh_disable();
1144                         ip_vs_app_inc_put(svc->inc);
1145                         local_bh_enable();
1146                 }
1147                 kfree(svc);
1148         }
1149         ip_vs_scheduler_put(sched);
1150
1151   out_mod_dec:
1152         /* decrease the module use count */
1153         ip_vs_use_count_dec();
1154
1155         return ret;
1156 }
1157
1158
1159 /*
1160  *      Edit a service and bind it with a new scheduler
1161  */
1162 static int
1163 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1164 {
1165         struct ip_vs_scheduler *sched, *old_sched;
1166         int ret = 0;
1167
1168         /*
1169          * Lookup the scheduler, by 'u->sched_name'
1170          */
1171         sched = ip_vs_scheduler_get(u->sched_name);
1172         if (sched == NULL) {
1173                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1174                            u->sched_name);
1175                 return -ENOENT;
1176         }
1177         old_sched = sched;
1178
1179         write_lock_bh(&__ip_vs_svc_lock);
1180
1181         /*
1182          * Wait until all other svc users go away.
1183          */
1184         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1185
1186         /*
1187          * Set the flags and timeout value
1188          */
1189         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1190         svc->timeout = u->timeout * HZ;
1191         svc->netmask = u->netmask;
1192
1193         old_sched = svc->scheduler;
1194         if (sched != old_sched) {
1195                 /*
1196                  * Unbind the old scheduler
1197                  */
1198                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1199                         old_sched = sched;
1200                         goto out;
1201                 }
1202
1203                 /*
1204                  * Bind the new scheduler
1205                  */
1206                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1207                         /*
1208                          * If ip_vs_bind_scheduler fails, restore the old
1209                          * scheduler.
1210                          * The main reason of failure is out of memory.
1211                          *
1212                          * The question is if the old scheduler can be
1213                          * restored all the time. TODO: if it cannot be
1214                          * restored some time, we must delete the service,
1215                          * otherwise the system may crash.
1216                          */
1217                         ip_vs_bind_scheduler(svc, old_sched);
1218                         old_sched = sched;
1219                         goto out;
1220                 }
1221         }
1222
1223   out:
1224         write_unlock_bh(&__ip_vs_svc_lock);
1225
1226         if (old_sched)
1227                 ip_vs_scheduler_put(old_sched);
1228
1229         return ret;
1230 }
1231
1232
1233 /*
1234  *      Delete a service from the service list
1235  *      - The service must be unlinked, unlocked and not referenced!
1236  *      - We are called under _bh lock
1237  */
1238 static void __ip_vs_del_service(struct ip_vs_service *svc)
1239 {
1240         struct ip_vs_dest *dest, *nxt;
1241         struct ip_vs_scheduler *old_sched;
1242
1243         ip_vs_num_services--;
1244         ip_vs_kill_estimator(&svc->stats);
1245
1246         /* Unbind scheduler */
1247         old_sched = svc->scheduler;
1248         ip_vs_unbind_scheduler(svc);
1249         if (old_sched)
1250                 ip_vs_scheduler_put(old_sched);
1251
1252         /* Unbind app inc */
1253         if (svc->inc) {
1254                 ip_vs_app_inc_put(svc->inc);
1255                 svc->inc = NULL;
1256         }
1257
1258         /*
1259          *    Unlink the whole destination list
1260          */
1261         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1262                 __ip_vs_unlink_dest(svc, dest, 0);
1263                 __ip_vs_del_dest(dest);
1264         }
1265
1266         /*
1267          *    Update the virtual service counters
1268          */
1269         if (svc->port == FTPPORT)
1270                 atomic_dec(&ip_vs_ftpsvc_counter);
1271         else if (svc->port == 0)
1272                 atomic_dec(&ip_vs_nullsvc_counter);
1273
1274         /*
1275          *    Free the service if nobody refers to it
1276          */
1277         if (atomic_read(&svc->refcnt) == 0)
1278                 kfree(svc);
1279
1280         /* decrease the module use count */
1281         ip_vs_use_count_dec();
1282 }
1283
1284 /*
1285  *      Delete a service from the service list
1286  */
1287 static int ip_vs_del_service(struct ip_vs_service *svc)
1288 {
1289         if (svc == NULL)
1290                 return -EEXIST;
1291
1292         /*
1293          * Unhash it from the service table
1294          */
1295         write_lock_bh(&__ip_vs_svc_lock);
1296
1297         ip_vs_svc_unhash(svc);
1298
1299         /*
1300          * Wait until all the svc users go away.
1301          */
1302         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1303
1304         __ip_vs_del_service(svc);
1305
1306         write_unlock_bh(&__ip_vs_svc_lock);
1307
1308         return 0;
1309 }
1310
1311
1312 /*
1313  *      Flush all the virtual services
1314  */
1315 static int ip_vs_flush(void)
1316 {
1317         int idx;
1318         struct ip_vs_service *svc, *nxt;
1319
1320         /*
1321          * Flush the service table hashed by <protocol,addr,port>
1322          */
1323         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1324                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1325                         write_lock_bh(&__ip_vs_svc_lock);
1326                         ip_vs_svc_unhash(svc);
1327                         /*
1328                          * Wait until all the svc users go away.
1329                          */
1330                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1331                         __ip_vs_del_service(svc);
1332                         write_unlock_bh(&__ip_vs_svc_lock);
1333                 }
1334         }
1335
1336         /*
1337          * Flush the service table hashed by fwmark
1338          */
1339         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1340                 list_for_each_entry_safe(svc, nxt,
1341                                          &ip_vs_svc_fwm_table[idx], f_list) {
1342                         write_lock_bh(&__ip_vs_svc_lock);
1343                         ip_vs_svc_unhash(svc);
1344                         /*
1345                          * Wait until all the svc users go away.
1346                          */
1347                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1348                         __ip_vs_del_service(svc);
1349                         write_unlock_bh(&__ip_vs_svc_lock);
1350                 }
1351         }
1352
1353         return 0;
1354 }
1355
1356
1357 /*
1358  *      Zero counters in a service or all services
1359  */
1360 static int ip_vs_zero_service(struct ip_vs_service *svc)
1361 {
1362         struct ip_vs_dest *dest;
1363
1364         write_lock_bh(&__ip_vs_svc_lock);
1365         list_for_each_entry(dest, &svc->destinations, n_list) {
1366                 ip_vs_zero_stats(&dest->stats);
1367         }
1368         ip_vs_zero_stats(&svc->stats);
1369         write_unlock_bh(&__ip_vs_svc_lock);
1370         return 0;
1371 }
1372
1373 static int ip_vs_zero_all(void)
1374 {
1375         int idx;
1376         struct ip_vs_service *svc;
1377
1378         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1379                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1380                         ip_vs_zero_service(svc);
1381                 }
1382         }
1383
1384         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1385                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1386                         ip_vs_zero_service(svc);
1387                 }
1388         }
1389
1390         ip_vs_zero_stats(&ip_vs_stats);
1391         return 0;
1392 }
1393
1394
1395 static int
1396 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1397                      void __user *buffer, size_t *lenp, loff_t *ppos)
1398 {
1399         int *valp = table->data;
1400         int val = *valp;
1401         int rc;
1402
1403         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1404         if (write && (*valp != val)) {
1405                 if ((*valp < 0) || (*valp > 3)) {
1406                         /* Restore the correct value */
1407                         *valp = val;
1408                 } else {
1409                         update_defense_level();
1410                 }
1411         }
1412         return rc;
1413 }
1414
1415
1416 static int
1417 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1418                        void __user *buffer, size_t *lenp, loff_t *ppos)
1419 {
1420         int *valp = table->data;
1421         int val[2];
1422         int rc;
1423
1424         /* backup the value first */
1425         memcpy(val, valp, sizeof(val));
1426
1427         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1428         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1429                 /* Restore the correct value */
1430                 memcpy(valp, val, sizeof(val));
1431         }
1432         return rc;
1433 }
1434
1435
1436 /*
1437  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1438  */
1439
1440 static struct ctl_table vs_vars[] = {
1441         {
1442                 .procname       = "amemthresh",
1443                 .data           = &sysctl_ip_vs_amemthresh,
1444                 .maxlen         = sizeof(int),
1445                 .mode           = 0644,
1446                 .proc_handler   = &proc_dointvec,
1447         },
1448 #ifdef CONFIG_IP_VS_DEBUG
1449         {
1450                 .procname       = "debug_level",
1451                 .data           = &sysctl_ip_vs_debug_level,
1452                 .maxlen         = sizeof(int),
1453                 .mode           = 0644,
1454                 .proc_handler   = &proc_dointvec,
1455         },
1456 #endif
1457         {
1458                 .procname       = "am_droprate",
1459                 .data           = &sysctl_ip_vs_am_droprate,
1460                 .maxlen         = sizeof(int),
1461                 .mode           = 0644,
1462                 .proc_handler   = &proc_dointvec,
1463         },
1464         {
1465                 .procname       = "drop_entry",
1466                 .data           = &sysctl_ip_vs_drop_entry,
1467                 .maxlen         = sizeof(int),
1468                 .mode           = 0644,
1469                 .proc_handler   = &proc_do_defense_mode,
1470         },
1471         {
1472                 .procname       = "drop_packet",
1473                 .data           = &sysctl_ip_vs_drop_packet,
1474                 .maxlen         = sizeof(int),
1475                 .mode           = 0644,
1476                 .proc_handler   = &proc_do_defense_mode,
1477         },
1478         {
1479                 .procname       = "secure_tcp",
1480                 .data           = &sysctl_ip_vs_secure_tcp,
1481                 .maxlen         = sizeof(int),
1482                 .mode           = 0644,
1483                 .proc_handler   = &proc_do_defense_mode,
1484         },
1485 #if 0
1486         {
1487                 .procname       = "timeout_established",
1488                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1489                 .maxlen         = sizeof(int),
1490                 .mode           = 0644,
1491                 .proc_handler   = &proc_dointvec_jiffies,
1492         },
1493         {
1494                 .procname       = "timeout_synsent",
1495                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1496                 .maxlen         = sizeof(int),
1497                 .mode           = 0644,
1498                 .proc_handler   = &proc_dointvec_jiffies,
1499         },
1500         {
1501                 .procname       = "timeout_synrecv",
1502                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1503                 .maxlen         = sizeof(int),
1504                 .mode           = 0644,
1505                 .proc_handler   = &proc_dointvec_jiffies,
1506         },
1507         {
1508                 .procname       = "timeout_finwait",
1509                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1510                 .maxlen         = sizeof(int),
1511                 .mode           = 0644,
1512                 .proc_handler   = &proc_dointvec_jiffies,
1513         },
1514         {
1515                 .procname       = "timeout_timewait",
1516                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1517                 .maxlen         = sizeof(int),
1518                 .mode           = 0644,
1519                 .proc_handler   = &proc_dointvec_jiffies,
1520         },
1521         {
1522                 .procname       = "timeout_close",
1523                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1524                 .maxlen         = sizeof(int),
1525                 .mode           = 0644,
1526                 .proc_handler   = &proc_dointvec_jiffies,
1527         },
1528         {
1529                 .procname       = "timeout_closewait",
1530                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1531                 .maxlen         = sizeof(int),
1532                 .mode           = 0644,
1533                 .proc_handler   = &proc_dointvec_jiffies,
1534         },
1535         {
1536                 .procname       = "timeout_lastack",
1537                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1538                 .maxlen         = sizeof(int),
1539                 .mode           = 0644,
1540                 .proc_handler   = &proc_dointvec_jiffies,
1541         },
1542         {
1543                 .procname       = "timeout_listen",
1544                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1545                 .maxlen         = sizeof(int),
1546                 .mode           = 0644,
1547                 .proc_handler   = &proc_dointvec_jiffies,
1548         },
1549         {
1550                 .procname       = "timeout_synack",
1551                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1552                 .maxlen         = sizeof(int),
1553                 .mode           = 0644,
1554                 .proc_handler   = &proc_dointvec_jiffies,
1555         },
1556         {
1557                 .procname       = "timeout_udp",
1558                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1559                 .maxlen         = sizeof(int),
1560                 .mode           = 0644,
1561                 .proc_handler   = &proc_dointvec_jiffies,
1562         },
1563         {
1564                 .procname       = "timeout_icmp",
1565                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1566                 .maxlen         = sizeof(int),
1567                 .mode           = 0644,
1568                 .proc_handler   = &proc_dointvec_jiffies,
1569         },
1570 #endif
1571         {
1572                 .procname       = "cache_bypass",
1573                 .data           = &sysctl_ip_vs_cache_bypass,
1574                 .maxlen         = sizeof(int),
1575                 .mode           = 0644,
1576                 .proc_handler   = &proc_dointvec,
1577         },
1578         {
1579                 .procname       = "expire_nodest_conn",
1580                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1581                 .maxlen         = sizeof(int),
1582                 .mode           = 0644,
1583                 .proc_handler   = &proc_dointvec,
1584         },
1585         {
1586                 .procname       = "expire_quiescent_template",
1587                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = &proc_dointvec,
1591         },
1592         {
1593                 .procname       = "sync_threshold",
1594                 .data           = &sysctl_ip_vs_sync_threshold,
1595                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1596                 .mode           = 0644,
1597                 .proc_handler   = &proc_do_sync_threshold,
1598         },
1599         {
1600                 .procname       = "nat_icmp_send",
1601                 .data           = &sysctl_ip_vs_nat_icmp_send,
1602                 .maxlen         = sizeof(int),
1603                 .mode           = 0644,
1604                 .proc_handler   = &proc_dointvec,
1605         },
1606         { .ctl_name = 0 }
1607 };
1608
1609 const struct ctl_path net_vs_ctl_path[] = {
1610         { .procname = "net", .ctl_name = CTL_NET, },
1611         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1612         { .procname = "vs", },
1613         { }
1614 };
1615 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1616
1617 static struct ctl_table_header * sysctl_header;
1618
1619 #ifdef CONFIG_PROC_FS
1620
1621 struct ip_vs_iter {
1622         struct list_head *table;
1623         int bucket;
1624 };
1625
1626 /*
1627  *      Write the contents of the VS rule table to a PROCfs file.
1628  *      (It is kept just for backward compatibility)
1629  */
1630 static inline const char *ip_vs_fwd_name(unsigned flags)
1631 {
1632         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1633         case IP_VS_CONN_F_LOCALNODE:
1634                 return "Local";
1635         case IP_VS_CONN_F_TUNNEL:
1636                 return "Tunnel";
1637         case IP_VS_CONN_F_DROUTE:
1638                 return "Route";
1639         default:
1640                 return "Masq";
1641         }
1642 }
1643
1644
1645 /* Get the Nth entry in the two lists */
1646 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1647 {
1648         struct ip_vs_iter *iter = seq->private;
1649         int idx;
1650         struct ip_vs_service *svc;
1651
1652         /* look in hash by protocol */
1653         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1654                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1655                         if (pos-- == 0){
1656                                 iter->table = ip_vs_svc_table;
1657                                 iter->bucket = idx;
1658                                 return svc;
1659                         }
1660                 }
1661         }
1662
1663         /* keep looking in fwmark */
1664         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1665                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1666                         if (pos-- == 0) {
1667                                 iter->table = ip_vs_svc_fwm_table;
1668                                 iter->bucket = idx;
1669                                 return svc;
1670                         }
1671                 }
1672         }
1673
1674         return NULL;
1675 }
1676
1677 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1678 {
1679
1680         read_lock_bh(&__ip_vs_svc_lock);
1681         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1682 }
1683
1684
1685 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1686 {
1687         struct list_head *e;
1688         struct ip_vs_iter *iter;
1689         struct ip_vs_service *svc;
1690
1691         ++*pos;
1692         if (v == SEQ_START_TOKEN)
1693                 return ip_vs_info_array(seq,0);
1694
1695         svc = v;
1696         iter = seq->private;
1697
1698         if (iter->table == ip_vs_svc_table) {
1699                 /* next service in table hashed by protocol */
1700                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1701                         return list_entry(e, struct ip_vs_service, s_list);
1702
1703
1704                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1705                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1706                                             s_list) {
1707                                 return svc;
1708                         }
1709                 }
1710
1711                 iter->table = ip_vs_svc_fwm_table;
1712                 iter->bucket = -1;
1713                 goto scan_fwmark;
1714         }
1715
1716         /* next service in hashed by fwmark */
1717         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1718                 return list_entry(e, struct ip_vs_service, f_list);
1719
1720  scan_fwmark:
1721         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1722                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1723                                     f_list)
1724                         return svc;
1725         }
1726
1727         return NULL;
1728 }
1729
1730 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1731 {
1732         read_unlock_bh(&__ip_vs_svc_lock);
1733 }
1734
1735
1736 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1737 {
1738         if (v == SEQ_START_TOKEN) {
1739                 seq_printf(seq,
1740                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1741                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1742                 seq_puts(seq,
1743                          "Prot LocalAddress:Port Scheduler Flags\n");
1744                 seq_puts(seq,
1745                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1746         } else {
1747                 const struct ip_vs_service *svc = v;
1748                 const struct ip_vs_iter *iter = seq->private;
1749                 const struct ip_vs_dest *dest;
1750
1751                 if (iter->table == ip_vs_svc_table)
1752                         seq_printf(seq, "%s  %08X:%04X %s ",
1753                                    ip_vs_proto_name(svc->protocol),
1754                                    ntohl(svc->addr),
1755                                    ntohs(svc->port),
1756                                    svc->scheduler->name);
1757                 else
1758                         seq_printf(seq, "FWM  %08X %s ",
1759                                    svc->fwmark, svc->scheduler->name);
1760
1761                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1762                         seq_printf(seq, "persistent %d %08X\n",
1763                                 svc->timeout,
1764                                 ntohl(svc->netmask));
1765                 else
1766                         seq_putc(seq, '\n');
1767
1768                 list_for_each_entry(dest, &svc->destinations, n_list) {
1769                         seq_printf(seq,
1770                                    "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1771                                    ntohl(dest->addr), ntohs(dest->port),
1772                                    ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1773                                    atomic_read(&dest->weight),
1774                                    atomic_read(&dest->activeconns),
1775                                    atomic_read(&dest->inactconns));
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static const struct seq_operations ip_vs_info_seq_ops = {
1782         .start = ip_vs_info_seq_start,
1783         .next  = ip_vs_info_seq_next,
1784         .stop  = ip_vs_info_seq_stop,
1785         .show  = ip_vs_info_seq_show,
1786 };
1787
1788 static int ip_vs_info_open(struct inode *inode, struct file *file)
1789 {
1790         return seq_open_private(file, &ip_vs_info_seq_ops,
1791                         sizeof(struct ip_vs_iter));
1792 }
1793
1794 static const struct file_operations ip_vs_info_fops = {
1795         .owner   = THIS_MODULE,
1796         .open    = ip_vs_info_open,
1797         .read    = seq_read,
1798         .llseek  = seq_lseek,
1799         .release = seq_release_private,
1800 };
1801
1802 #endif
1803
1804 struct ip_vs_stats ip_vs_stats = {
1805         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1806 };
1807
1808 #ifdef CONFIG_PROC_FS
1809 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1810 {
1811
1812 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1813         seq_puts(seq,
1814                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1815         seq_printf(seq,
1816                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1817
1818         spin_lock_bh(&ip_vs_stats.lock);
1819         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1820                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1821                    (unsigned long long) ip_vs_stats.inbytes,
1822                    (unsigned long long) ip_vs_stats.outbytes);
1823
1824 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1825         seq_puts(seq,
1826                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1827         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1828                         ip_vs_stats.cps,
1829                         ip_vs_stats.inpps,
1830                         ip_vs_stats.outpps,
1831                         ip_vs_stats.inbps,
1832                         ip_vs_stats.outbps);
1833         spin_unlock_bh(&ip_vs_stats.lock);
1834
1835         return 0;
1836 }
1837
1838 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1839 {
1840         return single_open(file, ip_vs_stats_show, NULL);
1841 }
1842
1843 static const struct file_operations ip_vs_stats_fops = {
1844         .owner = THIS_MODULE,
1845         .open = ip_vs_stats_seq_open,
1846         .read = seq_read,
1847         .llseek = seq_lseek,
1848         .release = single_release,
1849 };
1850
1851 #endif
1852
1853 /*
1854  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1855  */
1856 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1857 {
1858         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1859                   u->tcp_timeout,
1860                   u->tcp_fin_timeout,
1861                   u->udp_timeout);
1862
1863 #ifdef CONFIG_IP_VS_PROTO_TCP
1864         if (u->tcp_timeout) {
1865                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1866                         = u->tcp_timeout * HZ;
1867         }
1868
1869         if (u->tcp_fin_timeout) {
1870                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1871                         = u->tcp_fin_timeout * HZ;
1872         }
1873 #endif
1874
1875 #ifdef CONFIG_IP_VS_PROTO_UDP
1876         if (u->udp_timeout) {
1877                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1878                         = u->udp_timeout * HZ;
1879         }
1880 #endif
1881         return 0;
1882 }
1883
1884
1885 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1886 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1887 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1888                                  sizeof(struct ip_vs_dest_user))
1889 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1890 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1891 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
1892
1893 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1894         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1895         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1896         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1897         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1898         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1899         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1900         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1901         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1902         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1903         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1904         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1905 };
1906
1907 static int
1908 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1909 {
1910         int ret;
1911         unsigned char arg[MAX_ARG_LEN];
1912         struct ip_vs_service_user *usvc;
1913         struct ip_vs_service *svc;
1914         struct ip_vs_dest_user *udest;
1915
1916         if (!capable(CAP_NET_ADMIN))
1917                 return -EPERM;
1918
1919         if (len != set_arglen[SET_CMDID(cmd)]) {
1920                 IP_VS_ERR("set_ctl: len %u != %u\n",
1921                           len, set_arglen[SET_CMDID(cmd)]);
1922                 return -EINVAL;
1923         }
1924
1925         if (copy_from_user(arg, user, len) != 0)
1926                 return -EFAULT;
1927
1928         /* increase the module use count */
1929         ip_vs_use_count_inc();
1930
1931         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1932                 ret = -ERESTARTSYS;
1933                 goto out_dec;
1934         }
1935
1936         if (cmd == IP_VS_SO_SET_FLUSH) {
1937                 /* Flush the virtual service */
1938                 ret = ip_vs_flush();
1939                 goto out_unlock;
1940         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1941                 /* Set timeout values for (tcp tcpfin udp) */
1942                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1943                 goto out_unlock;
1944         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1945                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1946                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1947                 goto out_unlock;
1948         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1949                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1950                 ret = stop_sync_thread(dm->state);
1951                 goto out_unlock;
1952         }
1953
1954         usvc = (struct ip_vs_service_user *)arg;
1955         udest = (struct ip_vs_dest_user *)(usvc + 1);
1956
1957         if (cmd == IP_VS_SO_SET_ZERO) {
1958                 /* if no service address is set, zero counters in all */
1959                 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1960                         ret = ip_vs_zero_all();
1961                         goto out_unlock;
1962                 }
1963         }
1964
1965         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1966         if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1967                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1968                           usvc->protocol, NIPQUAD(usvc->addr),
1969                           ntohs(usvc->port), usvc->sched_name);
1970                 ret = -EFAULT;
1971                 goto out_unlock;
1972         }
1973
1974         /* Lookup the exact service by <protocol, addr, port> or fwmark */
1975         if (usvc->fwmark == 0)
1976                 svc = __ip_vs_service_get(usvc->protocol,
1977                                           usvc->addr, usvc->port);
1978         else
1979                 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1980
1981         if (cmd != IP_VS_SO_SET_ADD
1982             && (svc == NULL || svc->protocol != usvc->protocol)) {
1983                 ret = -ESRCH;
1984                 goto out_unlock;
1985         }
1986
1987         switch (cmd) {
1988         case IP_VS_SO_SET_ADD:
1989                 if (svc != NULL)
1990                         ret = -EEXIST;
1991                 else
1992                         ret = ip_vs_add_service(usvc, &svc);
1993                 break;
1994         case IP_VS_SO_SET_EDIT:
1995                 ret = ip_vs_edit_service(svc, usvc);
1996                 break;
1997         case IP_VS_SO_SET_DEL:
1998                 ret = ip_vs_del_service(svc);
1999                 if (!ret)
2000                         goto out_unlock;
2001                 break;
2002         case IP_VS_SO_SET_ZERO:
2003                 ret = ip_vs_zero_service(svc);
2004                 break;
2005         case IP_VS_SO_SET_ADDDEST:
2006                 ret = ip_vs_add_dest(svc, udest);
2007                 break;
2008         case IP_VS_SO_SET_EDITDEST:
2009                 ret = ip_vs_edit_dest(svc, udest);
2010                 break;
2011         case IP_VS_SO_SET_DELDEST:
2012                 ret = ip_vs_del_dest(svc, udest);
2013                 break;
2014         default:
2015                 ret = -EINVAL;
2016         }
2017
2018         if (svc)
2019                 ip_vs_service_put(svc);
2020
2021   out_unlock:
2022         mutex_unlock(&__ip_vs_mutex);
2023   out_dec:
2024         /* decrease the module use count */
2025         ip_vs_use_count_dec();
2026
2027         return ret;
2028 }
2029
2030
2031 static void
2032 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2033 {
2034         spin_lock_bh(&src->lock);
2035         memcpy(dst, src, (char*)&src->lock - (char*)src);
2036         spin_unlock_bh(&src->lock);
2037 }
2038
2039 static void
2040 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2041 {
2042         dst->protocol = src->protocol;
2043         dst->addr = src->addr;
2044         dst->port = src->port;
2045         dst->fwmark = src->fwmark;
2046         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2047         dst->flags = src->flags;
2048         dst->timeout = src->timeout / HZ;
2049         dst->netmask = src->netmask;
2050         dst->num_dests = src->num_dests;
2051         ip_vs_copy_stats(&dst->stats, &src->stats);
2052 }
2053
2054 static inline int
2055 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2056                             struct ip_vs_get_services __user *uptr)
2057 {
2058         int idx, count=0;
2059         struct ip_vs_service *svc;
2060         struct ip_vs_service_entry entry;
2061         int ret = 0;
2062
2063         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2064                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2065                         if (count >= get->num_services)
2066                                 goto out;
2067                         memset(&entry, 0, sizeof(entry));
2068                         ip_vs_copy_service(&entry, svc);
2069                         if (copy_to_user(&uptr->entrytable[count],
2070                                          &entry, sizeof(entry))) {
2071                                 ret = -EFAULT;
2072                                 goto out;
2073                         }
2074                         count++;
2075                 }
2076         }
2077
2078         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2079                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2080                         if (count >= get->num_services)
2081                                 goto out;
2082                         memset(&entry, 0, sizeof(entry));
2083                         ip_vs_copy_service(&entry, svc);
2084                         if (copy_to_user(&uptr->entrytable[count],
2085                                          &entry, sizeof(entry))) {
2086                                 ret = -EFAULT;
2087                                 goto out;
2088                         }
2089                         count++;
2090                 }
2091         }
2092   out:
2093         return ret;
2094 }
2095
2096 static inline int
2097 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2098                          struct ip_vs_get_dests __user *uptr)
2099 {
2100         struct ip_vs_service *svc;
2101         int ret = 0;
2102
2103         if (get->fwmark)
2104                 svc = __ip_vs_svc_fwm_get(get->fwmark);
2105         else
2106                 svc = __ip_vs_service_get(get->protocol,
2107                                           get->addr, get->port);
2108         if (svc) {
2109                 int count = 0;
2110                 struct ip_vs_dest *dest;
2111                 struct ip_vs_dest_entry entry;
2112
2113                 list_for_each_entry(dest, &svc->destinations, n_list) {
2114                         if (count >= get->num_dests)
2115                                 break;
2116
2117                         entry.addr = dest->addr;
2118                         entry.port = dest->port;
2119                         entry.conn_flags = atomic_read(&dest->conn_flags);
2120                         entry.weight = atomic_read(&dest->weight);
2121                         entry.u_threshold = dest->u_threshold;
2122                         entry.l_threshold = dest->l_threshold;
2123                         entry.activeconns = atomic_read(&dest->activeconns);
2124                         entry.inactconns = atomic_read(&dest->inactconns);
2125                         entry.persistconns = atomic_read(&dest->persistconns);
2126                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2127                         if (copy_to_user(&uptr->entrytable[count],
2128                                          &entry, sizeof(entry))) {
2129                                 ret = -EFAULT;
2130                                 break;
2131                         }
2132                         count++;
2133                 }
2134                 ip_vs_service_put(svc);
2135         } else
2136                 ret = -ESRCH;
2137         return ret;
2138 }
2139
2140 static inline void
2141 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2142 {
2143 #ifdef CONFIG_IP_VS_PROTO_TCP
2144         u->tcp_timeout =
2145                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2146         u->tcp_fin_timeout =
2147                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2148 #endif
2149 #ifdef CONFIG_IP_VS_PROTO_UDP
2150         u->udp_timeout =
2151                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2152 #endif
2153 }
2154
2155
2156 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2157 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2158 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2159 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2160 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2161 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2162 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2163
2164 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2165         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2166         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2167         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2168         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2169         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2170         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2171         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2172 };
2173
2174 static int
2175 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2176 {
2177         unsigned char arg[128];
2178         int ret = 0;
2179
2180         if (!capable(CAP_NET_ADMIN))
2181                 return -EPERM;
2182
2183         if (*len < get_arglen[GET_CMDID(cmd)]) {
2184                 IP_VS_ERR("get_ctl: len %u < %u\n",
2185                           *len, get_arglen[GET_CMDID(cmd)]);
2186                 return -EINVAL;
2187         }
2188
2189         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2190                 return -EFAULT;
2191
2192         if (mutex_lock_interruptible(&__ip_vs_mutex))
2193                 return -ERESTARTSYS;
2194
2195         switch (cmd) {
2196         case IP_VS_SO_GET_VERSION:
2197         {
2198                 char buf[64];
2199
2200                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2201                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2202                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2203                         ret = -EFAULT;
2204                         goto out;
2205                 }
2206                 *len = strlen(buf)+1;
2207         }
2208         break;
2209
2210         case IP_VS_SO_GET_INFO:
2211         {
2212                 struct ip_vs_getinfo info;
2213                 info.version = IP_VS_VERSION_CODE;
2214                 info.size = IP_VS_CONN_TAB_SIZE;
2215                 info.num_services = ip_vs_num_services;
2216                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2217                         ret = -EFAULT;
2218         }
2219         break;
2220
2221         case IP_VS_SO_GET_SERVICES:
2222         {
2223                 struct ip_vs_get_services *get;
2224                 int size;
2225
2226                 get = (struct ip_vs_get_services *)arg;
2227                 size = sizeof(*get) +
2228                         sizeof(struct ip_vs_service_entry) * get->num_services;
2229                 if (*len != size) {
2230                         IP_VS_ERR("length: %u != %u\n", *len, size);
2231                         ret = -EINVAL;
2232                         goto out;
2233                 }
2234                 ret = __ip_vs_get_service_entries(get, user);
2235         }
2236         break;
2237
2238         case IP_VS_SO_GET_SERVICE:
2239         {
2240                 struct ip_vs_service_entry *entry;
2241                 struct ip_vs_service *svc;
2242
2243                 entry = (struct ip_vs_service_entry *)arg;
2244                 if (entry->fwmark)
2245                         svc = __ip_vs_svc_fwm_get(entry->fwmark);
2246                 else
2247                         svc = __ip_vs_service_get(entry->protocol,
2248                                                   entry->addr, entry->port);
2249                 if (svc) {
2250                         ip_vs_copy_service(entry, svc);
2251                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2252                                 ret = -EFAULT;
2253                         ip_vs_service_put(svc);
2254                 } else
2255                         ret = -ESRCH;
2256         }
2257         break;
2258
2259         case IP_VS_SO_GET_DESTS:
2260         {
2261                 struct ip_vs_get_dests *get;
2262                 int size;
2263
2264                 get = (struct ip_vs_get_dests *)arg;
2265                 size = sizeof(*get) +
2266                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2267                 if (*len != size) {
2268                         IP_VS_ERR("length: %u != %u\n", *len, size);
2269                         ret = -EINVAL;
2270                         goto out;
2271                 }
2272                 ret = __ip_vs_get_dest_entries(get, user);
2273         }
2274         break;
2275
2276         case IP_VS_SO_GET_TIMEOUT:
2277         {
2278                 struct ip_vs_timeout_user t;
2279
2280                 __ip_vs_get_timeouts(&t);
2281                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2282                         ret = -EFAULT;
2283         }
2284         break;
2285
2286         case IP_VS_SO_GET_DAEMON:
2287         {
2288                 struct ip_vs_daemon_user d[2];
2289
2290                 memset(&d, 0, sizeof(d));
2291                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2292                         d[0].state = IP_VS_STATE_MASTER;
2293                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2294                         d[0].syncid = ip_vs_master_syncid;
2295                 }
2296                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2297                         d[1].state = IP_VS_STATE_BACKUP;
2298                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2299                         d[1].syncid = ip_vs_backup_syncid;
2300                 }
2301                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2302                         ret = -EFAULT;
2303         }
2304         break;
2305
2306         default:
2307                 ret = -EINVAL;
2308         }
2309
2310   out:
2311         mutex_unlock(&__ip_vs_mutex);
2312         return ret;
2313 }
2314
2315
2316 static struct nf_sockopt_ops ip_vs_sockopts = {
2317         .pf             = PF_INET,
2318         .set_optmin     = IP_VS_BASE_CTL,
2319         .set_optmax     = IP_VS_SO_SET_MAX+1,
2320         .set            = do_ip_vs_set_ctl,
2321         .get_optmin     = IP_VS_BASE_CTL,
2322         .get_optmax     = IP_VS_SO_GET_MAX+1,
2323         .get            = do_ip_vs_get_ctl,
2324         .owner          = THIS_MODULE,
2325 };
2326
2327 /*
2328  * Generic Netlink interface
2329  */
2330
2331 /* IPVS genetlink family */
2332 static struct genl_family ip_vs_genl_family = {
2333         .id             = GENL_ID_GENERATE,
2334         .hdrsize        = 0,
2335         .name           = IPVS_GENL_NAME,
2336         .version        = IPVS_GENL_VERSION,
2337         .maxattr        = IPVS_CMD_MAX,
2338 };
2339
2340 /* Policy used for first-level command attributes */
2341 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2342         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2343         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2344         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2345         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2346         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2347         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2348 };
2349
2350 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2351 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2352         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2353         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2354                                             .len = IP_VS_IFNAME_MAXLEN },
2355         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2356 };
2357
2358 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2359 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2360         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2361         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2362         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2363                                             .len = sizeof(union nf_inet_addr) },
2364         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2365         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2366         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2367                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2368         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2369                                             .len = sizeof(struct ip_vs_flags) },
2370         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2371         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2372         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2373 };
2374
2375 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2376 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2377         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2378                                             .len = sizeof(union nf_inet_addr) },
2379         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2380         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2381         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2382         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2383         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2384         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2385         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2386         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2387         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2388 };
2389
2390 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2391                                  struct ip_vs_stats *stats)
2392 {
2393         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2394         if (!nl_stats)
2395                 return -EMSGSIZE;
2396
2397         spin_lock_bh(&stats->lock);
2398
2399         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2400         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2401         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2402         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2403         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2404         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2405         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2406         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2407         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2408         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2409
2410         spin_unlock_bh(&stats->lock);
2411
2412         nla_nest_end(skb, nl_stats);
2413
2414         return 0;
2415
2416 nla_put_failure:
2417         spin_unlock_bh(&stats->lock);
2418         nla_nest_cancel(skb, nl_stats);
2419         return -EMSGSIZE;
2420 }
2421
2422 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2423                                    struct ip_vs_service *svc)
2424 {
2425         struct nlattr *nl_service;
2426         struct ip_vs_flags flags = { .flags = svc->flags,
2427                                      .mask = ~0 };
2428
2429         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2430         if (!nl_service)
2431                 return -EMSGSIZE;
2432
2433         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2434
2435         if (svc->fwmark) {
2436                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2437         } else {
2438                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2439                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2440                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2441         }
2442
2443         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2444         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2445         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2446         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2447
2448         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2449                 goto nla_put_failure;
2450
2451         nla_nest_end(skb, nl_service);
2452
2453         return 0;
2454
2455 nla_put_failure:
2456         nla_nest_cancel(skb, nl_service);
2457         return -EMSGSIZE;
2458 }
2459
2460 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2461                                    struct ip_vs_service *svc,
2462                                    struct netlink_callback *cb)
2463 {
2464         void *hdr;
2465
2466         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2467                           &ip_vs_genl_family, NLM_F_MULTI,
2468                           IPVS_CMD_NEW_SERVICE);
2469         if (!hdr)
2470                 return -EMSGSIZE;
2471
2472         if (ip_vs_genl_fill_service(skb, svc) < 0)
2473                 goto nla_put_failure;
2474
2475         return genlmsg_end(skb, hdr);
2476
2477 nla_put_failure:
2478         genlmsg_cancel(skb, hdr);
2479         return -EMSGSIZE;
2480 }
2481
2482 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2483                                     struct netlink_callback *cb)
2484 {
2485         int idx = 0, i;
2486         int start = cb->args[0];
2487         struct ip_vs_service *svc;
2488
2489         mutex_lock(&__ip_vs_mutex);
2490         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2491                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2492                         if (++idx <= start)
2493                                 continue;
2494                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2495                                 idx--;
2496                                 goto nla_put_failure;
2497                         }
2498                 }
2499         }
2500
2501         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2502                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2503                         if (++idx <= start)
2504                                 continue;
2505                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2506                                 idx--;
2507                                 goto nla_put_failure;
2508                         }
2509                 }
2510         }
2511
2512 nla_put_failure:
2513         mutex_unlock(&__ip_vs_mutex);
2514         cb->args[0] = idx;
2515
2516         return skb->len;
2517 }
2518
2519 static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2520                                     struct nlattr *nla, int full_entry)
2521 {
2522         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2523         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2524
2525         /* Parse mandatory identifying service fields first */
2526         if (nla == NULL ||
2527             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2528                 return -EINVAL;
2529
2530         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2531         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2532         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2533         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2534         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2535
2536         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2537                 return -EINVAL;
2538
2539         /* For now, only support IPv4 */
2540         if (nla_get_u16(nla_af) != AF_INET)
2541                 return -EAFNOSUPPORT;
2542
2543         if (nla_fwmark) {
2544                 usvc->protocol = IPPROTO_TCP;
2545                 usvc->fwmark = nla_get_u32(nla_fwmark);
2546         } else {
2547                 usvc->protocol = nla_get_u16(nla_protocol);
2548                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2549                 usvc->port = nla_get_u16(nla_port);
2550                 usvc->fwmark = 0;
2551         }
2552
2553         /* If a full entry was requested, check for the additional fields */
2554         if (full_entry) {
2555                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2556                               *nla_netmask;
2557                 struct ip_vs_flags flags;
2558                 struct ip_vs_service *svc;
2559
2560                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2561                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2562                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2563                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2564
2565                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2566                         return -EINVAL;
2567
2568                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2569
2570                 /* prefill flags from service if it already exists */
2571                 if (usvc->fwmark)
2572                         svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2573                 else
2574                         svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2575                                                   usvc->port);
2576                 if (svc) {
2577                         usvc->flags = svc->flags;
2578                         ip_vs_service_put(svc);
2579                 } else
2580                         usvc->flags = 0;
2581
2582                 /* set new flags from userland */
2583                 usvc->flags = (usvc->flags & ~flags.mask) |
2584                               (flags.flags & flags.mask);
2585
2586                 strlcpy(usvc->sched_name, nla_data(nla_sched),
2587                         sizeof(usvc->sched_name));
2588                 usvc->timeout = nla_get_u32(nla_timeout);
2589                 usvc->netmask = nla_get_u32(nla_netmask);
2590         }
2591
2592         return 0;
2593 }
2594
2595 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2596 {
2597         struct ip_vs_service_user usvc;
2598         int ret;
2599
2600         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2601         if (ret)
2602                 return ERR_PTR(ret);
2603
2604         if (usvc.fwmark)
2605                 return __ip_vs_svc_fwm_get(usvc.fwmark);
2606         else
2607                 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2608                                            usvc.port);
2609 }
2610
2611 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2612 {
2613         struct nlattr *nl_dest;
2614
2615         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2616         if (!nl_dest)
2617                 return -EMSGSIZE;
2618
2619         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2620         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2621
2622         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2623                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2624         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2625         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2626         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2627         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2628                     atomic_read(&dest->activeconns));
2629         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2630                     atomic_read(&dest->inactconns));
2631         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2632                     atomic_read(&dest->persistconns));
2633
2634         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2635                 goto nla_put_failure;
2636
2637         nla_nest_end(skb, nl_dest);
2638
2639         return 0;
2640
2641 nla_put_failure:
2642         nla_nest_cancel(skb, nl_dest);
2643         return -EMSGSIZE;
2644 }
2645
2646 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2647                                 struct netlink_callback *cb)
2648 {
2649         void *hdr;
2650
2651         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2652                           &ip_vs_genl_family, NLM_F_MULTI,
2653                           IPVS_CMD_NEW_DEST);
2654         if (!hdr)
2655                 return -EMSGSIZE;
2656
2657         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2658                 goto nla_put_failure;
2659
2660         return genlmsg_end(skb, hdr);
2661
2662 nla_put_failure:
2663         genlmsg_cancel(skb, hdr);
2664         return -EMSGSIZE;
2665 }
2666
2667 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2668                                  struct netlink_callback *cb)
2669 {
2670         int idx = 0;
2671         int start = cb->args[0];
2672         struct ip_vs_service *svc;
2673         struct ip_vs_dest *dest;
2674         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2675
2676         mutex_lock(&__ip_vs_mutex);
2677
2678         /* Try to find the service for which to dump destinations */
2679         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2680                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2681                 goto out_err;
2682
2683         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2684         if (IS_ERR(svc) || svc == NULL)
2685                 goto out_err;
2686
2687         /* Dump the destinations */
2688         list_for_each_entry(dest, &svc->destinations, n_list) {
2689                 if (++idx <= start)
2690                         continue;
2691                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2692                         idx--;
2693                         goto nla_put_failure;
2694                 }
2695         }
2696
2697 nla_put_failure:
2698         cb->args[0] = idx;
2699         ip_vs_service_put(svc);
2700
2701 out_err:
2702         mutex_unlock(&__ip_vs_mutex);
2703
2704         return skb->len;
2705 }
2706
2707 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2708                                  struct nlattr *nla, int full_entry)
2709 {
2710         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2711         struct nlattr *nla_addr, *nla_port;
2712
2713         /* Parse mandatory identifying destination fields first */
2714         if (nla == NULL ||
2715             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2716                 return -EINVAL;
2717
2718         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2719         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2720
2721         if (!(nla_addr && nla_port))
2722                 return -EINVAL;
2723
2724         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2725         udest->port = nla_get_u16(nla_port);
2726
2727         /* If a full entry was requested, check for the additional fields */
2728         if (full_entry) {
2729                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2730                               *nla_l_thresh;
2731
2732                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2733                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2734                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2735                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2736
2737                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2738                         return -EINVAL;
2739
2740                 udest->conn_flags = nla_get_u32(nla_fwd)
2741                                     & IP_VS_CONN_F_FWD_MASK;
2742                 udest->weight = nla_get_u32(nla_weight);
2743                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2744                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2745         }
2746
2747         return 0;
2748 }
2749
2750 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2751                                   const char *mcast_ifn, __be32 syncid)
2752 {
2753         struct nlattr *nl_daemon;
2754
2755         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2756         if (!nl_daemon)
2757                 return -EMSGSIZE;
2758
2759         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2760         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2761         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2762
2763         nla_nest_end(skb, nl_daemon);
2764
2765         return 0;
2766
2767 nla_put_failure:
2768         nla_nest_cancel(skb, nl_daemon);
2769         return -EMSGSIZE;
2770 }
2771
2772 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2773                                   const char *mcast_ifn, __be32 syncid,
2774                                   struct netlink_callback *cb)
2775 {
2776         void *hdr;
2777         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2778                           &ip_vs_genl_family, NLM_F_MULTI,
2779                           IPVS_CMD_NEW_DAEMON);
2780         if (!hdr)
2781                 return -EMSGSIZE;
2782
2783         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2784                 goto nla_put_failure;
2785
2786         return genlmsg_end(skb, hdr);
2787
2788 nla_put_failure:
2789         genlmsg_cancel(skb, hdr);
2790         return -EMSGSIZE;
2791 }
2792
2793 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2794                                    struct netlink_callback *cb)
2795 {
2796         mutex_lock(&__ip_vs_mutex);
2797         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2798                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2799                                            ip_vs_master_mcast_ifn,
2800                                            ip_vs_master_syncid, cb) < 0)
2801                         goto nla_put_failure;
2802
2803                 cb->args[0] = 1;
2804         }
2805
2806         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2807                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2808                                            ip_vs_backup_mcast_ifn,
2809                                            ip_vs_backup_syncid, cb) < 0)
2810                         goto nla_put_failure;
2811
2812                 cb->args[1] = 1;
2813         }
2814
2815 nla_put_failure:
2816         mutex_unlock(&__ip_vs_mutex);
2817
2818         return skb->len;
2819 }
2820
2821 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2822 {
2823         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2824               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2825               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2826                 return -EINVAL;
2827
2828         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2829                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2830                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2831 }
2832
2833 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2834 {
2835         if (!attrs[IPVS_DAEMON_ATTR_STATE])
2836                 return -EINVAL;
2837
2838         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2839 }
2840
2841 static int ip_vs_genl_set_config(struct nlattr **attrs)
2842 {
2843         struct ip_vs_timeout_user t;
2844
2845         __ip_vs_get_timeouts(&t);
2846
2847         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2848                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2849
2850         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2851                 t.tcp_fin_timeout =
2852                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2853
2854         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2855                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2856
2857         return ip_vs_set_timeout(&t);
2858 }
2859
2860 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2861 {
2862         struct ip_vs_service *svc = NULL;
2863         struct ip_vs_service_user usvc;
2864         struct ip_vs_dest_user udest;
2865         int ret = 0, cmd;
2866         int need_full_svc = 0, need_full_dest = 0;
2867
2868         cmd = info->genlhdr->cmd;
2869
2870         mutex_lock(&__ip_vs_mutex);
2871
2872         if (cmd == IPVS_CMD_FLUSH) {
2873                 ret = ip_vs_flush();
2874                 goto out;
2875         } else if (cmd == IPVS_CMD_SET_CONFIG) {
2876                 ret = ip_vs_genl_set_config(info->attrs);
2877                 goto out;
2878         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2879                    cmd == IPVS_CMD_DEL_DAEMON) {
2880
2881                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2882
2883                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2884                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2885                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
2886                                      ip_vs_daemon_policy)) {
2887                         ret = -EINVAL;
2888                         goto out;
2889                 }
2890
2891                 if (cmd == IPVS_CMD_NEW_DAEMON)
2892                         ret = ip_vs_genl_new_daemon(daemon_attrs);
2893                 else
2894                         ret = ip_vs_genl_del_daemon(daemon_attrs);
2895                 goto out;
2896         } else if (cmd == IPVS_CMD_ZERO &&
2897                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2898                 ret = ip_vs_zero_all();
2899                 goto out;
2900         }
2901
2902         /* All following commands require a service argument, so check if we
2903          * received a valid one. We need a full service specification when
2904          * adding / editing a service. Only identifying members otherwise. */
2905         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2906                 need_full_svc = 1;
2907
2908         ret = ip_vs_genl_parse_service(&usvc,
2909                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
2910                                        need_full_svc);
2911         if (ret)
2912                 goto out;
2913
2914         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2915         if (usvc.fwmark == 0)
2916                 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2917         else
2918                 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2919
2920         /* Unless we're adding a new service, the service must already exist */
2921         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2922                 ret = -ESRCH;
2923                 goto out;
2924         }
2925
2926         /* Destination commands require a valid destination argument. For
2927          * adding / editing a destination, we need a full destination
2928          * specification. */
2929         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2930             cmd == IPVS_CMD_DEL_DEST) {
2931                 if (cmd != IPVS_CMD_DEL_DEST)
2932                         need_full_dest = 1;
2933
2934                 ret = ip_vs_genl_parse_dest(&udest,
2935                                             info->attrs[IPVS_CMD_ATTR_DEST],
2936                                             need_full_dest);
2937                 if (ret)
2938                         goto out;
2939         }
2940
2941         switch (cmd) {
2942         case IPVS_CMD_NEW_SERVICE:
2943                 if (svc == NULL)
2944                         ret = ip_vs_add_service(&usvc, &svc);
2945                 else
2946                         ret = -EEXIST;
2947                 break;
2948         case IPVS_CMD_SET_SERVICE:
2949                 ret = ip_vs_edit_service(svc, &usvc);
2950                 break;
2951         case IPVS_CMD_DEL_SERVICE:
2952                 ret = ip_vs_del_service(svc);
2953                 break;
2954         case IPVS_CMD_NEW_DEST:
2955                 ret = ip_vs_add_dest(svc, &udest);
2956                 break;
2957         case IPVS_CMD_SET_DEST:
2958                 ret = ip_vs_edit_dest(svc, &udest);
2959                 break;
2960         case IPVS_CMD_DEL_DEST:
2961                 ret = ip_vs_del_dest(svc, &udest);
2962                 break;
2963         case IPVS_CMD_ZERO:
2964                 ret = ip_vs_zero_service(svc);
2965                 break;
2966         default:
2967                 ret = -EINVAL;
2968         }
2969
2970 out:
2971         if (svc)
2972                 ip_vs_service_put(svc);
2973         mutex_unlock(&__ip_vs_mutex);
2974
2975         return ret;
2976 }
2977
2978 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2979 {
2980         struct sk_buff *msg;
2981         void *reply;
2982         int ret, cmd, reply_cmd;
2983
2984         cmd = info->genlhdr->cmd;
2985
2986         if (cmd == IPVS_CMD_GET_SERVICE)
2987                 reply_cmd = IPVS_CMD_NEW_SERVICE;
2988         else if (cmd == IPVS_CMD_GET_INFO)
2989                 reply_cmd = IPVS_CMD_SET_INFO;
2990         else if (cmd == IPVS_CMD_GET_CONFIG)
2991                 reply_cmd = IPVS_CMD_SET_CONFIG;
2992         else {
2993                 IP_VS_ERR("unknown Generic Netlink command\n");
2994                 return -EINVAL;
2995         }
2996
2997         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2998         if (!msg)
2999                 return -ENOMEM;
3000
3001         mutex_lock(&__ip_vs_mutex);
3002
3003         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3004         if (reply == NULL)
3005                 goto nla_put_failure;
3006
3007         switch (cmd) {
3008         case IPVS_CMD_GET_SERVICE:
3009         {
3010                 struct ip_vs_service *svc;
3011
3012                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3013                 if (IS_ERR(svc)) {
3014                         ret = PTR_ERR(svc);
3015                         goto out_err;
3016                 } else if (svc) {
3017                         ret = ip_vs_genl_fill_service(msg, svc);
3018                         ip_vs_service_put(svc);
3019                         if (ret)
3020                                 goto nla_put_failure;
3021                 } else {
3022                         ret = -ESRCH;
3023                         goto out_err;
3024                 }
3025
3026                 break;
3027         }
3028
3029         case IPVS_CMD_GET_CONFIG:
3030         {
3031                 struct ip_vs_timeout_user t;
3032
3033                 __ip_vs_get_timeouts(&t);
3034 #ifdef CONFIG_IP_VS_PROTO_TCP
3035                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3036                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3037                             t.tcp_fin_timeout);
3038 #endif
3039 #ifdef CONFIG_IP_VS_PROTO_UDP
3040                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3041 #endif
3042
3043                 break;
3044         }
3045
3046         case IPVS_CMD_GET_INFO:
3047                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3048                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3049                             IP_VS_CONN_TAB_SIZE);
3050                 break;
3051         }
3052
3053         genlmsg_end(msg, reply);
3054         ret = genlmsg_unicast(msg, info->snd_pid);
3055         goto out;
3056
3057 nla_put_failure:
3058         IP_VS_ERR("not enough space in Netlink message\n");
3059         ret = -EMSGSIZE;
3060
3061 out_err:
3062         nlmsg_free(msg);
3063 out:
3064         mutex_unlock(&__ip_vs_mutex);
3065
3066         return ret;
3067 }
3068
3069
3070 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3071         {
3072                 .cmd    = IPVS_CMD_NEW_SERVICE,
3073                 .flags  = GENL_ADMIN_PERM,
3074                 .policy = ip_vs_cmd_policy,
3075                 .doit   = ip_vs_genl_set_cmd,
3076         },
3077         {
3078                 .cmd    = IPVS_CMD_SET_SERVICE,
3079                 .flags  = GENL_ADMIN_PERM,
3080                 .policy = ip_vs_cmd_policy,
3081                 .doit   = ip_vs_genl_set_cmd,
3082         },
3083         {
3084                 .cmd    = IPVS_CMD_DEL_SERVICE,
3085                 .flags  = GENL_ADMIN_PERM,
3086                 .policy = ip_vs_cmd_policy,
3087                 .doit   = ip_vs_genl_set_cmd,
3088         },
3089         {
3090                 .cmd    = IPVS_CMD_GET_SERVICE,
3091                 .flags  = GENL_ADMIN_PERM,
3092                 .doit   = ip_vs_genl_get_cmd,
3093                 .dumpit = ip_vs_genl_dump_services,
3094                 .policy = ip_vs_cmd_policy,
3095         },
3096         {
3097                 .cmd    = IPVS_CMD_NEW_DEST,
3098                 .flags  = GENL_ADMIN_PERM,
3099                 .policy = ip_vs_cmd_policy,
3100                 .doit   = ip_vs_genl_set_cmd,
3101         },
3102         {
3103                 .cmd    = IPVS_CMD_SET_DEST,
3104                 .flags  = GENL_ADMIN_PERM,
3105                 .policy = ip_vs_cmd_policy,
3106                 .doit   = ip_vs_genl_set_cmd,
3107         },
3108         {
3109                 .cmd    = IPVS_CMD_DEL_DEST,
3110                 .flags  = GENL_ADMIN_PERM,
3111                 .policy = ip_vs_cmd_policy,
3112                 .doit   = ip_vs_genl_set_cmd,
3113         },
3114         {
3115                 .cmd    = IPVS_CMD_GET_DEST,
3116                 .flags  = GENL_ADMIN_PERM,
3117                 .policy = ip_vs_cmd_policy,
3118                 .dumpit = ip_vs_genl_dump_dests,
3119         },
3120         {
3121                 .cmd    = IPVS_CMD_NEW_DAEMON,
3122                 .flags  = GENL_ADMIN_PERM,
3123                 .policy = ip_vs_cmd_policy,
3124                 .doit   = ip_vs_genl_set_cmd,
3125         },
3126         {
3127                 .cmd    = IPVS_CMD_DEL_DAEMON,
3128                 .flags  = GENL_ADMIN_PERM,
3129                 .policy = ip_vs_cmd_policy,
3130                 .doit   = ip_vs_genl_set_cmd,
3131         },
3132         {
3133                 .cmd    = IPVS_CMD_GET_DAEMON,
3134                 .flags  = GENL_ADMIN_PERM,
3135                 .dumpit = ip_vs_genl_dump_daemons,
3136         },
3137         {
3138                 .cmd    = IPVS_CMD_SET_CONFIG,
3139                 .flags  = GENL_ADMIN_PERM,
3140                 .policy = ip_vs_cmd_policy,
3141                 .doit   = ip_vs_genl_set_cmd,
3142         },
3143         {
3144                 .cmd    = IPVS_CMD_GET_CONFIG,
3145                 .flags  = GENL_ADMIN_PERM,
3146                 .doit   = ip_vs_genl_get_cmd,
3147         },
3148         {
3149                 .cmd    = IPVS_CMD_GET_INFO,
3150                 .flags  = GENL_ADMIN_PERM,
3151                 .doit   = ip_vs_genl_get_cmd,
3152         },
3153         {
3154                 .cmd    = IPVS_CMD_ZERO,
3155                 .flags  = GENL_ADMIN_PERM,
3156                 .policy = ip_vs_cmd_policy,
3157                 .doit   = ip_vs_genl_set_cmd,
3158         },
3159         {
3160                 .cmd    = IPVS_CMD_FLUSH,
3161                 .flags  = GENL_ADMIN_PERM,
3162                 .doit   = ip_vs_genl_set_cmd,
3163         },
3164 };
3165
3166 static int __init ip_vs_genl_register(void)
3167 {
3168         int ret, i;
3169
3170         ret = genl_register_family(&ip_vs_genl_family);
3171         if (ret)
3172                 return ret;
3173
3174         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3175                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3176                 if (ret)
3177                         goto err_out;
3178         }
3179         return 0;
3180
3181 err_out:
3182         genl_unregister_family(&ip_vs_genl_family);
3183         return ret;
3184 }
3185
3186 static void ip_vs_genl_unregister(void)
3187 {
3188         genl_unregister_family(&ip_vs_genl_family);
3189 }
3190
3191 /* End of Generic Netlink interface definitions */
3192
3193
3194 int __init ip_vs_control_init(void)
3195 {
3196         int ret;
3197         int idx;
3198
3199         EnterFunction(2);
3200
3201         ret = nf_register_sockopt(&ip_vs_sockopts);
3202         if (ret) {
3203                 IP_VS_ERR("cannot register sockopt.\n");
3204                 return ret;
3205         }
3206
3207         ret = ip_vs_genl_register();
3208         if (ret) {
3209                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3210                 nf_unregister_sockopt(&ip_vs_sockopts);
3211                 return ret;
3212         }
3213
3214         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3215         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3216
3217         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3218
3219         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3220         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3221                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3222                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3223         }
3224         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3225                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3226         }
3227
3228         ip_vs_new_estimator(&ip_vs_stats);
3229
3230         /* Hook the defense timer */
3231         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3232
3233         LeaveFunction(2);
3234         return 0;
3235 }
3236
3237
3238 void ip_vs_control_cleanup(void)
3239 {
3240         EnterFunction(2);
3241         ip_vs_trash_cleanup();
3242         cancel_rearming_delayed_work(&defense_work);
3243         cancel_work_sync(&defense_work.work);
3244         ip_vs_kill_estimator(&ip_vs_stats);
3245         unregister_sysctl_table(sysctl_header);
3246         proc_net_remove(&init_net, "ip_vs_stats");
3247         proc_net_remove(&init_net, "ip_vs");
3248         ip_vs_genl_unregister();
3249         nf_unregister_sockopt(&ip_vs_sockopts);
3250         LeaveFunction(2);
3251 }