]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/dccp/proto.c
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[karo-tx-linux.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         struct dccp_sock *dp = dccp_sk(sk);
98
99         dp->dccps_role = DCCP_ROLE_LISTEN;
100         /*
101          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102          * before calling listen()
103          */
104         if (dccp_service_not_initialized(sk))
105                 return -EPROTO;
106         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111         struct inet_connection_sock *icsk = inet_csk(sk);
112         struct inet_sock *inet = inet_sk(sk);
113         int err = 0;
114         const int old_state = sk->sk_state;
115
116         if (old_state != DCCP_CLOSED)
117                 dccp_set_state(sk, DCCP_CLOSED);
118
119         /* ABORT function of RFC793 */
120         if (old_state == DCCP_LISTEN) {
121                 inet_csk_listen_stop(sk);
122         /* FIXME: do the active reset thing */
123         } else if (old_state == DCCP_REQUESTING)
124                 sk->sk_err = ECONNRESET;
125
126         dccp_clear_xmit_timers(sk);
127         __skb_queue_purge(&sk->sk_receive_queue);
128         if (sk->sk_send_head != NULL) {
129                 __kfree_skb(sk->sk_send_head);
130                 sk->sk_send_head = NULL;
131         }
132
133         inet->dport = 0;
134
135         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136                 inet_reset_saddr(sk);
137
138         sk->sk_shutdown = 0;
139         sock_reset_flag(sk, SOCK_DONE);
140
141         icsk->icsk_backoff = 0;
142         inet_csk_delack_init(sk);
143         __sk_dst_reset(sk);
144
145         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147         sk->sk_error_report(sk);
148         return err;
149 }
150
151 /*
152  *      Wait for a DCCP event.
153  *
154  *      Note that we don't need to lock the socket, as the upper poll layers
155  *      take care of normal races (between the test and the event) and we don't
156  *      go look at any of the socket buffers directly.
157  */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159                               poll_table *wait)
160 {
161         unsigned int mask;
162         struct sock *sk = sock->sk;
163
164         poll_wait(file, sk->sk_sleep, wait);
165         if (sk->sk_state == DCCP_LISTEN)
166                 return inet_csk_listen_poll(sk);
167
168         /* Socket is not locked. We are protected from async events
169            by poll logic and correct handling of state changes
170            made by another threads is impossible in any case.
171          */
172
173         mask = 0;
174         if (sk->sk_err)
175                 mask = POLLERR;
176
177         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178                 mask |= POLLHUP;
179         if (sk->sk_shutdown & RCV_SHUTDOWN)
180                 mask |= POLLIN | POLLRDNORM;
181
182         /* Connected? */
183         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185                         mask |= POLLIN | POLLRDNORM;
186
187                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189                                 mask |= POLLOUT | POLLWRNORM;
190                         } else {  /* send SIGIO later */
191                                 set_bit(SOCK_ASYNC_NOSPACE,
192                                         &sk->sk_socket->flags);
193                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195                                 /* Race breaker. If space is freed after
196                                  * wspace test but before the flags are set,
197                                  * IO signal will be lost.
198                                  */
199                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200                                         mask |= POLLOUT | POLLWRNORM;
201                         }
202                 }
203         }
204         return mask;
205 }
206
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209         dccp_pr_debug("entry\n");
210         return -ENOIOCTLCMD;
211 }
212
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214                                    char __user *optval, int optlen)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217         struct dccp_service_list *sl = NULL;
218
219         if (service == DCCP_SERVICE_INVALID_VALUE || 
220             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221                 return -EINVAL;
222
223         if (optlen > sizeof(service)) {
224                 sl = kmalloc(optlen, GFP_KERNEL);
225                 if (sl == NULL)
226                         return -ENOMEM;
227
228                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229                 if (copy_from_user(sl->dccpsl_list,
230                                    optval + sizeof(service),
231                                    optlen - sizeof(service)) ||
232                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233                         kfree(sl);
234                         return -EFAULT;
235                 }
236         }
237
238         lock_sock(sk);
239         dp->dccps_service = service;
240
241         if (dp->dccps_service_list != NULL)
242                 kfree(dp->dccps_service_list);
243
244         dp->dccps_service_list = sl;
245         release_sock(sk);
246         return 0;
247 }
248
249 int dccp_setsockopt(struct sock *sk, int level, int optname,
250                     char __user *optval, int optlen)
251 {
252         struct dccp_sock *dp;
253         int err;
254         int val;
255
256         if (level != SOL_DCCP)
257                 return ip_setsockopt(sk, level, optname, optval, optlen);
258
259         if (optlen < sizeof(int))
260                 return -EINVAL;
261
262         if (get_user(val, (int __user *)optval))
263                 return -EFAULT;
264
265         if (optname == DCCP_SOCKOPT_SERVICE)
266                 return dccp_setsockopt_service(sk, val, optval, optlen);
267
268         lock_sock(sk);
269         dp = dccp_sk(sk);
270         err = 0;
271
272         switch (optname) {
273         case DCCP_SOCKOPT_PACKET_SIZE:
274                 dp->dccps_packet_size = val;
275                 break;
276         default:
277                 err = -ENOPROTOOPT;
278                 break;
279         }
280         
281         release_sock(sk);
282         return err;
283 }
284
285 static int dccp_getsockopt_service(struct sock *sk, int len,
286                                    u32 __user *optval,
287                                    int __user *optlen)
288 {
289         const struct dccp_sock *dp = dccp_sk(sk);
290         const struct dccp_service_list *sl;
291         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293         lock_sock(sk);
294         if (dccp_service_not_initialized(sk))
295                 goto out;
296
297         if ((sl = dp->dccps_service_list) != NULL) {
298                 slen = sl->dccpsl_nr * sizeof(u32);
299                 total_len += slen;
300         }
301
302         err = -EINVAL;
303         if (total_len > len)
304                 goto out;
305
306         err = 0;
307         if (put_user(total_len, optlen) ||
308             put_user(dp->dccps_service, optval) ||
309             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310                 err = -EFAULT;
311 out:
312         release_sock(sk);
313         return err;
314 }
315
316 int dccp_getsockopt(struct sock *sk, int level, int optname,
317                     char __user *optval, int __user *optlen)
318 {
319         struct dccp_sock *dp;
320         int val, len;
321
322         if (level != SOL_DCCP)
323                 return ip_getsockopt(sk, level, optname, optval, optlen);
324
325         if (get_user(len, optlen))
326                 return -EFAULT;
327
328         if (optname == DCCP_SOCKOPT_SERVICE)
329                 return dccp_getsockopt_service(sk, len,
330                                                (u32 __user *)optval, optlen);
331
332         len = min_t(unsigned int, len, sizeof(int));
333         if (len < 0)
334                 return -EINVAL;
335
336         dp = dccp_sk(sk);
337
338         switch (optname) {
339         case DCCP_SOCKOPT_PACKET_SIZE:
340                 val = dp->dccps_packet_size;
341                 break;
342         default:
343                 return -ENOPROTOOPT;
344         }
345
346         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
347                 return -EFAULT;
348
349         return 0;
350 }
351
352 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
353                  size_t len)
354 {
355         const struct dccp_sock *dp = dccp_sk(sk);
356         const int flags = msg->msg_flags;
357         const int noblock = flags & MSG_DONTWAIT;
358         struct sk_buff *skb;
359         int rc, size;
360         long timeo;
361
362         if (len > dp->dccps_mss_cache)
363                 return -EMSGSIZE;
364
365         lock_sock(sk);
366         timeo = sock_sndtimeo(sk, noblock);
367
368         /*
369          * We have to use sk_stream_wait_connect here to set sk_write_pending,
370          * so that the trick in dccp_rcv_request_sent_state_process.
371          */
372         /* Wait for a connection to finish. */
373         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
374                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
375                         goto out_release;
376
377         size = sk->sk_prot->max_header + len;
378         release_sock(sk);
379         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
380         lock_sock(sk);
381         if (skb == NULL)
382                 goto out_release;
383
384         skb_reserve(skb, sk->sk_prot->max_header);
385         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
386         if (rc != 0)
387                 goto out_discard;
388
389         rc = dccp_write_xmit(sk, skb, &timeo);
390         /*
391          * XXX we don't use sk_write_queue, so just discard the packet.
392          *     Current plan however is to _use_ sk_write_queue with
393          *     an algorith similar to tcp_sendmsg, where the main difference
394          *     is that in DCCP we have to respect packet boundaries, so
395          *     no coalescing of skbs.
396          *
397          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
398          *     generated callgraph 8) -acme
399          */
400         if (rc != 0)
401                 goto out_discard;
402 out_release:
403         release_sock(sk);
404         return rc ? : len;
405 out_discard:
406         kfree_skb(skb);
407         goto out_release;
408 }
409
410 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
411                  size_t len, int nonblock, int flags, int *addr_len)
412 {
413         const struct dccp_hdr *dh;
414         long timeo;
415
416         lock_sock(sk);
417
418         if (sk->sk_state == DCCP_LISTEN) {
419                 len = -ENOTCONN;
420                 goto out;
421         }
422
423         timeo = sock_rcvtimeo(sk, nonblock);
424
425         do {
426                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
427
428                 if (skb == NULL)
429                         goto verify_sock_status;
430
431                 dh = dccp_hdr(skb);
432
433                 if (dh->dccph_type == DCCP_PKT_DATA ||
434                     dh->dccph_type == DCCP_PKT_DATAACK)
435                         goto found_ok_skb;
436
437                 if (dh->dccph_type == DCCP_PKT_RESET ||
438                     dh->dccph_type == DCCP_PKT_CLOSE) {
439                         dccp_pr_debug("found fin ok!\n");
440                         len = 0;
441                         goto found_fin_ok;
442                 }
443                 dccp_pr_debug("packet_type=%s\n",
444                               dccp_packet_name(dh->dccph_type));
445                 sk_eat_skb(sk, skb);
446 verify_sock_status:
447                 if (sock_flag(sk, SOCK_DONE)) {
448                         len = 0;
449                         break;
450                 }
451
452                 if (sk->sk_err) {
453                         len = sock_error(sk);
454                         break;
455                 }
456
457                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
458                         len = 0;
459                         break;
460                 }
461
462                 if (sk->sk_state == DCCP_CLOSED) {
463                         if (!sock_flag(sk, SOCK_DONE)) {
464                                 /* This occurs when user tries to read
465                                  * from never connected socket.
466                                  */
467                                 len = -ENOTCONN;
468                                 break;
469                         }
470                         len = 0;
471                         break;
472                 }
473
474                 if (!timeo) {
475                         len = -EAGAIN;
476                         break;
477                 }
478
479                 if (signal_pending(current)) {
480                         len = sock_intr_errno(timeo);
481                         break;
482                 }
483
484                 sk_wait_data(sk, &timeo);
485                 continue;
486         found_ok_skb:
487                 if (len > skb->len)
488                         len = skb->len;
489                 else if (len < skb->len)
490                         msg->msg_flags |= MSG_TRUNC;
491
492                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
493                         /* Exception. Bailout! */
494                         len = -EFAULT;
495                         break;
496                 }
497         found_fin_ok:
498                 if (!(flags & MSG_PEEK))
499                         sk_eat_skb(sk, skb);
500                 break;
501         } while (1);
502 out:
503         release_sock(sk);
504         return len;
505 }
506
507 static int inet_dccp_listen(struct socket *sock, int backlog)
508 {
509         struct sock *sk = sock->sk;
510         unsigned char old_state;
511         int err;
512
513         lock_sock(sk);
514
515         err = -EINVAL;
516         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
517                 goto out;
518
519         old_state = sk->sk_state;
520         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
521                 goto out;
522
523         /* Really, if the socket is already in listen state
524          * we can only allow the backlog to be adjusted.
525          */
526         if (old_state != DCCP_LISTEN) {
527                 /*
528                  * FIXME: here it probably should be sk->sk_prot->listen_start
529                  * see tcp_listen_start
530                  */
531                 err = dccp_listen_start(sk);
532                 if (err)
533                         goto out;
534         }
535         sk->sk_max_ack_backlog = backlog;
536         err = 0;
537
538 out:
539         release_sock(sk);
540         return err;
541 }
542
543 static const unsigned char dccp_new_state[] = {
544         /* current state:   new state:      action:     */
545         [0]               = DCCP_CLOSED,
546         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
547         [DCCP_REQUESTING] = DCCP_CLOSED,
548         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
549         [DCCP_LISTEN]     = DCCP_CLOSED,
550         [DCCP_RESPOND]    = DCCP_CLOSED,
551         [DCCP_CLOSING]    = DCCP_CLOSED,
552         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
553         [DCCP_CLOSED]     = DCCP_CLOSED,
554 };
555
556 static int dccp_close_state(struct sock *sk)
557 {
558         const int next = dccp_new_state[sk->sk_state];
559         const int ns = next & DCCP_STATE_MASK;
560
561         if (ns != sk->sk_state)
562                 dccp_set_state(sk, ns);
563
564         return next & DCCP_ACTION_FIN;
565 }
566
567 void dccp_close(struct sock *sk, long timeout)
568 {
569         struct sk_buff *skb;
570
571         lock_sock(sk);
572
573         sk->sk_shutdown = SHUTDOWN_MASK;
574
575         if (sk->sk_state == DCCP_LISTEN) {
576                 dccp_set_state(sk, DCCP_CLOSED);
577
578                 /* Special case. */
579                 inet_csk_listen_stop(sk);
580
581                 goto adjudge_to_death;
582         }
583
584         /*
585          * We need to flush the recv. buffs.  We do this only on the
586          * descriptor close, not protocol-sourced closes, because the
587           *reader process may not have drained the data yet!
588          */
589         /* FIXME: check for unread data */
590         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
591                 __kfree_skb(skb);
592         }
593
594         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
595                 /* Check zero linger _after_ checking for unread data. */
596                 sk->sk_prot->disconnect(sk, 0);
597         } else if (dccp_close_state(sk)) {
598                 dccp_send_close(sk, 1);
599         }
600
601         sk_stream_wait_close(sk, timeout);
602
603 adjudge_to_death:
604         /*
605          * It is the last release_sock in its life. It will remove backlog.
606          */
607         release_sock(sk);
608         /*
609          * Now socket is owned by kernel and we acquire BH lock
610          * to finish close. No need to check for user refs.
611          */
612         local_bh_disable();
613         bh_lock_sock(sk);
614         BUG_TRAP(!sock_owned_by_user(sk));
615
616         sock_hold(sk);
617         sock_orphan(sk);
618
619         /*
620          * The last release_sock may have processed the CLOSE or RESET
621          * packet moving sock to CLOSED state, if not we have to fire
622          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
623          * in draft-ietf-dccp-spec-11. -acme
624          */
625         if (sk->sk_state == DCCP_CLOSING) {
626                 /* FIXME: should start at 2 * RTT */
627                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
628                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
629                                           inet_csk(sk)->icsk_rto,
630                                           DCCP_RTO_MAX);
631 #if 0
632                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
633                 dccp_set_state(sk, DCCP_CLOSED);
634 #endif
635         }
636
637         atomic_inc(sk->sk_prot->orphan_count);
638         if (sk->sk_state == DCCP_CLOSED)
639                 inet_csk_destroy_sock(sk);
640
641         /* Otherwise, socket is reprieved until protocol close. */
642
643         bh_unlock_sock(sk);
644         local_bh_enable();
645         sock_put(sk);
646 }
647
648 void dccp_shutdown(struct sock *sk, int how)
649 {
650         dccp_pr_debug("entry\n");
651 }
652
653 static struct proto_ops inet_dccp_ops = {
654         .family         = PF_INET,
655         .owner          = THIS_MODULE,
656         .release        = inet_release,
657         .bind           = inet_bind,
658         .connect        = inet_stream_connect,
659         .socketpair     = sock_no_socketpair,
660         .accept         = inet_accept,
661         .getname        = inet_getname,
662         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
663         .poll           = dccp_poll,
664         .ioctl          = inet_ioctl,
665         /* FIXME: work on inet_listen to rename it to sock_common_listen */
666         .listen         = inet_dccp_listen,
667         .shutdown       = inet_shutdown,
668         .setsockopt     = sock_common_setsockopt,
669         .getsockopt     = sock_common_getsockopt,
670         .sendmsg        = inet_sendmsg,
671         .recvmsg        = sock_common_recvmsg,
672         .mmap           = sock_no_mmap,
673         .sendpage       = sock_no_sendpage,
674 };
675
676 extern struct net_proto_family inet_family_ops;
677
678 static struct inet_protosw dccp_v4_protosw = {
679         .type           = SOCK_DCCP,
680         .protocol       = IPPROTO_DCCP,
681         .prot           = &dccp_v4_prot,
682         .ops            = &inet_dccp_ops,
683         .capability     = -1,
684         .no_check       = 0,
685         .flags          = 0,
686 };
687
688 /*
689  * This is the global socket data structure used for responding to
690  * the Out-of-the-blue (OOTB) packets. A control sock will be created
691  * for this socket at the initialization time.
692  */
693 struct socket *dccp_ctl_socket;
694
695 static char dccp_ctl_socket_err_msg[] __initdata =
696         KERN_ERR "DCCP: Failed to create the control socket.\n";
697
698 static int __init dccp_ctl_sock_init(void)
699 {
700         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
701                                   &dccp_ctl_socket);
702         if (rc < 0)
703                 printk(dccp_ctl_socket_err_msg);
704         else {
705                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
706                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
707
708                 /* Unhash it so that IP input processing does not even
709                  * see it, we do not wish this socket to see incoming
710                  * packets.
711                  */
712                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
713         }
714
715         return rc;
716 }
717
718 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
719 void dccp_ctl_sock_exit(void)
720 {
721         if (dccp_ctl_socket != NULL) {
722                 sock_release(dccp_ctl_socket);
723                 dccp_ctl_socket = NULL;
724         }
725 }
726
727 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
728 #endif
729
730 static int __init init_dccp_v4_mibs(void)
731 {
732         int rc = -ENOMEM;
733
734         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
735         if (dccp_statistics[0] == NULL)
736                 goto out;
737
738         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
739         if (dccp_statistics[1] == NULL)
740                 goto out_free_one;
741
742         rc = 0;
743 out:
744         return rc;
745 out_free_one:
746         free_percpu(dccp_statistics[0]);
747         dccp_statistics[0] = NULL;
748         goto out;
749
750 }
751
752 static int thash_entries;
753 module_param(thash_entries, int, 0444);
754 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
755
756 #ifdef CONFIG_IP_DCCP_DEBUG
757 int dccp_debug;
758 module_param(dccp_debug, int, 0444);
759 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
760 #endif
761
762 static int __init dccp_init(void)
763 {
764         unsigned long goal;
765         int ehash_order, bhash_order, i;
766         int rc = proto_register(&dccp_v4_prot, 1);
767
768         if (rc)
769                 goto out;
770
771         dccp_hashinfo.bind_bucket_cachep =
772                 kmem_cache_create("dccp_bind_bucket",
773                                   sizeof(struct inet_bind_bucket), 0,
774                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
775         if (!dccp_hashinfo.bind_bucket_cachep)
776                 goto out_proto_unregister;
777
778         /*
779          * Size and allocate the main established and bind bucket
780          * hash tables.
781          *
782          * The methodology is similar to that of the buffer cache.
783          */
784         if (num_physpages >= (128 * 1024))
785                 goal = num_physpages >> (21 - PAGE_SHIFT);
786         else
787                 goal = num_physpages >> (23 - PAGE_SHIFT);
788
789         if (thash_entries)
790                 goal = (thash_entries *
791                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
792         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
793                 ;
794         do {
795                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
796                                         sizeof(struct inet_ehash_bucket);
797                 dccp_hashinfo.ehash_size >>= 1;
798                 while (dccp_hashinfo.ehash_size &
799                        (dccp_hashinfo.ehash_size - 1))
800                         dccp_hashinfo.ehash_size--;
801                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
802                         __get_free_pages(GFP_ATOMIC, ehash_order);
803         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
804
805         if (!dccp_hashinfo.ehash) {
806                 printk(KERN_CRIT "Failed to allocate DCCP "
807                                  "established hash table\n");
808                 goto out_free_bind_bucket_cachep;
809         }
810
811         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
812                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
813                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
814         }
815
816         bhash_order = ehash_order;
817
818         do {
819                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
820                                         sizeof(struct inet_bind_hashbucket);
821                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
822                     bhash_order > 0)
823                         continue;
824                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
825                         __get_free_pages(GFP_ATOMIC, bhash_order);
826         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
827
828         if (!dccp_hashinfo.bhash) {
829                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
830                 goto out_free_dccp_ehash;
831         }
832
833         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
834                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
835                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
836         }
837
838         if (init_dccp_v4_mibs())
839                 goto out_free_dccp_bhash;
840
841         rc = -EAGAIN;
842         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
843                 goto out_free_dccp_v4_mibs;
844
845         inet_register_protosw(&dccp_v4_protosw);
846
847         rc = dccp_ctl_sock_init();
848         if (rc)
849                 goto out_unregister_protosw;
850 out:
851         return rc;
852 out_unregister_protosw:
853         inet_unregister_protosw(&dccp_v4_protosw);
854         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
855 out_free_dccp_v4_mibs:
856         free_percpu(dccp_statistics[0]);
857         free_percpu(dccp_statistics[1]);
858         dccp_statistics[0] = dccp_statistics[1] = NULL;
859 out_free_dccp_bhash:
860         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
861         dccp_hashinfo.bhash = NULL;
862 out_free_dccp_ehash:
863         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
864         dccp_hashinfo.ehash = NULL;
865 out_free_bind_bucket_cachep:
866         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
867         dccp_hashinfo.bind_bucket_cachep = NULL;
868 out_proto_unregister:
869         proto_unregister(&dccp_v4_prot);
870         goto out;
871 }
872
873 static const char dccp_del_proto_err_msg[] __exitdata =
874         KERN_ERR "can't remove dccp net_protocol\n";
875
876 static void __exit dccp_fini(void)
877 {
878         inet_unregister_protosw(&dccp_v4_protosw);
879
880         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
881                 printk(dccp_del_proto_err_msg);
882
883         free_percpu(dccp_statistics[0]);
884         free_percpu(dccp_statistics[1]);
885         free_pages((unsigned long)dccp_hashinfo.bhash,
886                    get_order(dccp_hashinfo.bhash_size *
887                              sizeof(struct inet_bind_hashbucket)));
888         free_pages((unsigned long)dccp_hashinfo.ehash,
889                    get_order(dccp_hashinfo.ehash_size *
890                              sizeof(struct inet_ehash_bucket)));
891         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
892         proto_unregister(&dccp_v4_prot);
893 }
894
895 module_init(dccp_init);
896 module_exit(dccp_fini);
897
898 /*
899  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
900  * values directly, Also cover the case where the protocol is not specified,
901  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
902  */
903 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
904 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
905 MODULE_LICENSE("GPL");
906 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
907 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");