]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/dccp/proto.c
[DCCP]: Fix error handling in dccp_init
[mv-sheeva.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37
38 #include "ccid.h"
39 #include "dccp.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46
47 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48
49 static struct net_protocol dccp_protocol = {
50         .handler        = dccp_v4_rcv,
51         .err_handler    = dccp_v4_err,
52         .no_policy      = 1,
53 };
54
55 const char *dccp_packet_name(const int type)
56 {
57         static const char *dccp_packet_names[] = {
58                 [DCCP_PKT_REQUEST]  = "REQUEST",
59                 [DCCP_PKT_RESPONSE] = "RESPONSE",
60                 [DCCP_PKT_DATA]     = "DATA",
61                 [DCCP_PKT_ACK]      = "ACK",
62                 [DCCP_PKT_DATAACK]  = "DATAACK",
63                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
64                 [DCCP_PKT_CLOSE]    = "CLOSE",
65                 [DCCP_PKT_RESET]    = "RESET",
66                 [DCCP_PKT_SYNC]     = "SYNC",
67                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
68         };
69
70         if (type >= DCCP_NR_PKT_TYPES)
71                 return "INVALID";
72         else
73                 return dccp_packet_names[type];
74 }
75
76 EXPORT_SYMBOL_GPL(dccp_packet_name);
77
78 const char *dccp_state_name(const int state)
79 {
80         static char *dccp_state_names[] = {
81         [DCCP_OPEN]       = "OPEN",
82         [DCCP_REQUESTING] = "REQUESTING",
83         [DCCP_PARTOPEN]   = "PARTOPEN",
84         [DCCP_LISTEN]     = "LISTEN",
85         [DCCP_RESPOND]    = "RESPOND",
86         [DCCP_CLOSING]    = "CLOSING",
87         [DCCP_TIME_WAIT]  = "TIME_WAIT",
88         [DCCP_CLOSED]     = "CLOSED",
89         };
90
91         if (state >= DCCP_MAX_STATES)
92                 return "INVALID STATE!";
93         else
94                 return dccp_state_names[state];
95 }
96
97 EXPORT_SYMBOL_GPL(dccp_state_name);
98
99 static inline int dccp_listen_start(struct sock *sk)
100 {
101         struct dccp_sock *dp = dccp_sk(sk);
102
103         dp->dccps_role = DCCP_ROLE_LISTEN;
104         /*
105          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
106          * before calling listen()
107          */
108         if (dccp_service_not_initialized(sk))
109                 return -EPROTO;
110         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
111 }
112
113 int dccp_disconnect(struct sock *sk, int flags)
114 {
115         struct inet_connection_sock *icsk = inet_csk(sk);
116         struct inet_sock *inet = inet_sk(sk);
117         int err = 0;
118         const int old_state = sk->sk_state;
119
120         if (old_state != DCCP_CLOSED)
121                 dccp_set_state(sk, DCCP_CLOSED);
122
123         /* ABORT function of RFC793 */
124         if (old_state == DCCP_LISTEN) {
125                 inet_csk_listen_stop(sk);
126         /* FIXME: do the active reset thing */
127         } else if (old_state == DCCP_REQUESTING)
128                 sk->sk_err = ECONNRESET;
129
130         dccp_clear_xmit_timers(sk);
131         __skb_queue_purge(&sk->sk_receive_queue);
132         if (sk->sk_send_head != NULL) {
133                 __kfree_skb(sk->sk_send_head);
134                 sk->sk_send_head = NULL;
135         }
136
137         inet->dport = 0;
138
139         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
140                 inet_reset_saddr(sk);
141
142         sk->sk_shutdown = 0;
143         sock_reset_flag(sk, SOCK_DONE);
144
145         icsk->icsk_backoff = 0;
146         inet_csk_delack_init(sk);
147         __sk_dst_reset(sk);
148
149         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
150
151         sk->sk_error_report(sk);
152         return err;
153 }
154
155 EXPORT_SYMBOL_GPL(dccp_disconnect);
156
157 /*
158  *      Wait for a DCCP event.
159  *
160  *      Note that we don't need to lock the socket, as the upper poll layers
161  *      take care of normal races (between the test and the event) and we don't
162  *      go look at any of the socket buffers directly.
163  */
164 unsigned int dccp_poll(struct file *file, struct socket *sock,
165                        poll_table *wait)
166 {
167         unsigned int mask;
168         struct sock *sk = sock->sk;
169
170         poll_wait(file, sk->sk_sleep, wait);
171         if (sk->sk_state == DCCP_LISTEN)
172                 return inet_csk_listen_poll(sk);
173
174         /* Socket is not locked. We are protected from async events
175            by poll logic and correct handling of state changes
176            made by another threads is impossible in any case.
177          */
178
179         mask = 0;
180         if (sk->sk_err)
181                 mask = POLLERR;
182
183         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
184                 mask |= POLLHUP;
185         if (sk->sk_shutdown & RCV_SHUTDOWN)
186                 mask |= POLLIN | POLLRDNORM;
187
188         /* Connected? */
189         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
190                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
191                         mask |= POLLIN | POLLRDNORM;
192
193                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
194                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
195                                 mask |= POLLOUT | POLLWRNORM;
196                         } else {  /* send SIGIO later */
197                                 set_bit(SOCK_ASYNC_NOSPACE,
198                                         &sk->sk_socket->flags);
199                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
200
201                                 /* Race breaker. If space is freed after
202                                  * wspace test but before the flags are set,
203                                  * IO signal will be lost.
204                                  */
205                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
206                                         mask |= POLLOUT | POLLWRNORM;
207                         }
208                 }
209         }
210         return mask;
211 }
212
213 EXPORT_SYMBOL_GPL(dccp_poll);
214
215 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
216 {
217         dccp_pr_debug("entry\n");
218         return -ENOIOCTLCMD;
219 }
220
221 EXPORT_SYMBOL_GPL(dccp_ioctl);
222
223 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
224                                    char __user *optval, int optlen)
225 {
226         struct dccp_sock *dp = dccp_sk(sk);
227         struct dccp_service_list *sl = NULL;
228
229         if (service == DCCP_SERVICE_INVALID_VALUE || 
230             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
231                 return -EINVAL;
232
233         if (optlen > sizeof(service)) {
234                 sl = kmalloc(optlen, GFP_KERNEL);
235                 if (sl == NULL)
236                         return -ENOMEM;
237
238                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
239                 if (copy_from_user(sl->dccpsl_list,
240                                    optval + sizeof(service),
241                                    optlen - sizeof(service)) ||
242                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
243                         kfree(sl);
244                         return -EFAULT;
245                 }
246         }
247
248         lock_sock(sk);
249         dp->dccps_service = service;
250
251         kfree(dp->dccps_service_list);
252
253         dp->dccps_service_list = sl;
254         release_sock(sk);
255         return 0;
256 }
257
258 int dccp_setsockopt(struct sock *sk, int level, int optname,
259                     char __user *optval, int optlen)
260 {
261         struct dccp_sock *dp;
262         int err;
263         int val;
264
265         if (level != SOL_DCCP)
266                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
267                                                              optname, optval,
268                                                              optlen);
269
270         if (optlen < sizeof(int))
271                 return -EINVAL;
272
273         if (get_user(val, (int __user *)optval))
274                 return -EFAULT;
275
276         if (optname == DCCP_SOCKOPT_SERVICE)
277                 return dccp_setsockopt_service(sk, val, optval, optlen);
278
279         lock_sock(sk);
280         dp = dccp_sk(sk);
281         err = 0;
282
283         switch (optname) {
284         case DCCP_SOCKOPT_PACKET_SIZE:
285                 dp->dccps_packet_size = val;
286                 break;
287         default:
288                 err = -ENOPROTOOPT;
289                 break;
290         }
291         
292         release_sock(sk);
293         return err;
294 }
295
296 EXPORT_SYMBOL_GPL(dccp_setsockopt);
297
298 static int dccp_getsockopt_service(struct sock *sk, int len,
299                                    u32 __user *optval,
300                                    int __user *optlen)
301 {
302         const struct dccp_sock *dp = dccp_sk(sk);
303         const struct dccp_service_list *sl;
304         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
305
306         lock_sock(sk);
307         if (dccp_service_not_initialized(sk))
308                 goto out;
309
310         if ((sl = dp->dccps_service_list) != NULL) {
311                 slen = sl->dccpsl_nr * sizeof(u32);
312                 total_len += slen;
313         }
314
315         err = -EINVAL;
316         if (total_len > len)
317                 goto out;
318
319         err = 0;
320         if (put_user(total_len, optlen) ||
321             put_user(dp->dccps_service, optval) ||
322             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
323                 err = -EFAULT;
324 out:
325         release_sock(sk);
326         return err;
327 }
328
329 int dccp_getsockopt(struct sock *sk, int level, int optname,
330                     char __user *optval, int __user *optlen)
331 {
332         struct dccp_sock *dp;
333         int val, len;
334
335         if (level != SOL_DCCP)
336                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
337                                                              optname, optval,
338                                                              optlen);
339         if (get_user(len, optlen))
340                 return -EFAULT;
341
342         if (len < sizeof(int))
343                 return -EINVAL;
344
345         dp = dccp_sk(sk);
346
347         switch (optname) {
348         case DCCP_SOCKOPT_PACKET_SIZE:
349                 val = dp->dccps_packet_size;
350                 len = sizeof(dp->dccps_packet_size);
351                 break;
352         case DCCP_SOCKOPT_SERVICE:
353                 return dccp_getsockopt_service(sk, len,
354                                                (u32 __user *)optval, optlen);
355         case 128 ... 191:
356                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
357                                              len, (u32 __user *)optval, optlen);
358         case 192 ... 255:
359                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
360                                              len, (u32 __user *)optval, optlen);
361         default:
362                 return -ENOPROTOOPT;
363         }
364
365         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
366                 return -EFAULT;
367
368         return 0;
369 }
370
371 EXPORT_SYMBOL_GPL(dccp_getsockopt);
372
373 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
374                  size_t len)
375 {
376         const struct dccp_sock *dp = dccp_sk(sk);
377         const int flags = msg->msg_flags;
378         const int noblock = flags & MSG_DONTWAIT;
379         struct sk_buff *skb;
380         int rc, size;
381         long timeo;
382
383         if (len > dp->dccps_mss_cache)
384                 return -EMSGSIZE;
385
386         lock_sock(sk);
387         timeo = sock_sndtimeo(sk, noblock);
388
389         /*
390          * We have to use sk_stream_wait_connect here to set sk_write_pending,
391          * so that the trick in dccp_rcv_request_sent_state_process.
392          */
393         /* Wait for a connection to finish. */
394         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
395                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
396                         goto out_release;
397
398         size = sk->sk_prot->max_header + len;
399         release_sock(sk);
400         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
401         lock_sock(sk);
402         if (skb == NULL)
403                 goto out_release;
404
405         skb_reserve(skb, sk->sk_prot->max_header);
406         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
407         if (rc != 0)
408                 goto out_discard;
409
410         rc = dccp_write_xmit(sk, skb, &timeo);
411         /*
412          * XXX we don't use sk_write_queue, so just discard the packet.
413          *     Current plan however is to _use_ sk_write_queue with
414          *     an algorith similar to tcp_sendmsg, where the main difference
415          *     is that in DCCP we have to respect packet boundaries, so
416          *     no coalescing of skbs.
417          *
418          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
419          *     generated callgraph 8) -acme
420          */
421 out_release:
422         release_sock(sk);
423         return rc ? : len;
424 out_discard:
425         kfree_skb(skb);
426         goto out_release;
427 }
428
429 EXPORT_SYMBOL_GPL(dccp_sendmsg);
430
431 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
432                  size_t len, int nonblock, int flags, int *addr_len)
433 {
434         const struct dccp_hdr *dh;
435         long timeo;
436
437         lock_sock(sk);
438
439         if (sk->sk_state == DCCP_LISTEN) {
440                 len = -ENOTCONN;
441                 goto out;
442         }
443
444         timeo = sock_rcvtimeo(sk, nonblock);
445
446         do {
447                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
448
449                 if (skb == NULL)
450                         goto verify_sock_status;
451
452                 dh = dccp_hdr(skb);
453
454                 if (dh->dccph_type == DCCP_PKT_DATA ||
455                     dh->dccph_type == DCCP_PKT_DATAACK)
456                         goto found_ok_skb;
457
458                 if (dh->dccph_type == DCCP_PKT_RESET ||
459                     dh->dccph_type == DCCP_PKT_CLOSE) {
460                         dccp_pr_debug("found fin ok!\n");
461                         len = 0;
462                         goto found_fin_ok;
463                 }
464                 dccp_pr_debug("packet_type=%s\n",
465                               dccp_packet_name(dh->dccph_type));
466                 sk_eat_skb(sk, skb);
467 verify_sock_status:
468                 if (sock_flag(sk, SOCK_DONE)) {
469                         len = 0;
470                         break;
471                 }
472
473                 if (sk->sk_err) {
474                         len = sock_error(sk);
475                         break;
476                 }
477
478                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
479                         len = 0;
480                         break;
481                 }
482
483                 if (sk->sk_state == DCCP_CLOSED) {
484                         if (!sock_flag(sk, SOCK_DONE)) {
485                                 /* This occurs when user tries to read
486                                  * from never connected socket.
487                                  */
488                                 len = -ENOTCONN;
489                                 break;
490                         }
491                         len = 0;
492                         break;
493                 }
494
495                 if (!timeo) {
496                         len = -EAGAIN;
497                         break;
498                 }
499
500                 if (signal_pending(current)) {
501                         len = sock_intr_errno(timeo);
502                         break;
503                 }
504
505                 sk_wait_data(sk, &timeo);
506                 continue;
507         found_ok_skb:
508                 if (len > skb->len)
509                         len = skb->len;
510                 else if (len < skb->len)
511                         msg->msg_flags |= MSG_TRUNC;
512
513                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
514                         /* Exception. Bailout! */
515                         len = -EFAULT;
516                         break;
517                 }
518         found_fin_ok:
519                 if (!(flags & MSG_PEEK))
520                         sk_eat_skb(sk, skb);
521                 break;
522         } while (1);
523 out:
524         release_sock(sk);
525         return len;
526 }
527
528 EXPORT_SYMBOL_GPL(dccp_recvmsg);
529
530 int inet_dccp_listen(struct socket *sock, int backlog)
531 {
532         struct sock *sk = sock->sk;
533         unsigned char old_state;
534         int err;
535
536         lock_sock(sk);
537
538         err = -EINVAL;
539         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
540                 goto out;
541
542         old_state = sk->sk_state;
543         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
544                 goto out;
545
546         /* Really, if the socket is already in listen state
547          * we can only allow the backlog to be adjusted.
548          */
549         if (old_state != DCCP_LISTEN) {
550                 /*
551                  * FIXME: here it probably should be sk->sk_prot->listen_start
552                  * see tcp_listen_start
553                  */
554                 err = dccp_listen_start(sk);
555                 if (err)
556                         goto out;
557         }
558         sk->sk_max_ack_backlog = backlog;
559         err = 0;
560
561 out:
562         release_sock(sk);
563         return err;
564 }
565
566 EXPORT_SYMBOL_GPL(inet_dccp_listen);
567
568 static const unsigned char dccp_new_state[] = {
569         /* current state:   new state:      action:     */
570         [0]               = DCCP_CLOSED,
571         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
572         [DCCP_REQUESTING] = DCCP_CLOSED,
573         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
574         [DCCP_LISTEN]     = DCCP_CLOSED,
575         [DCCP_RESPOND]    = DCCP_CLOSED,
576         [DCCP_CLOSING]    = DCCP_CLOSED,
577         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
578         [DCCP_CLOSED]     = DCCP_CLOSED,
579 };
580
581 static int dccp_close_state(struct sock *sk)
582 {
583         const int next = dccp_new_state[sk->sk_state];
584         const int ns = next & DCCP_STATE_MASK;
585
586         if (ns != sk->sk_state)
587                 dccp_set_state(sk, ns);
588
589         return next & DCCP_ACTION_FIN;
590 }
591
592 void dccp_close(struct sock *sk, long timeout)
593 {
594         struct sk_buff *skb;
595
596         lock_sock(sk);
597
598         sk->sk_shutdown = SHUTDOWN_MASK;
599
600         if (sk->sk_state == DCCP_LISTEN) {
601                 dccp_set_state(sk, DCCP_CLOSED);
602
603                 /* Special case. */
604                 inet_csk_listen_stop(sk);
605
606                 goto adjudge_to_death;
607         }
608
609         /*
610          * We need to flush the recv. buffs.  We do this only on the
611          * descriptor close, not protocol-sourced closes, because the
612           *reader process may not have drained the data yet!
613          */
614         /* FIXME: check for unread data */
615         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
616                 __kfree_skb(skb);
617         }
618
619         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
620                 /* Check zero linger _after_ checking for unread data. */
621                 sk->sk_prot->disconnect(sk, 0);
622         } else if (dccp_close_state(sk)) {
623                 dccp_send_close(sk, 1);
624         }
625
626         sk_stream_wait_close(sk, timeout);
627
628 adjudge_to_death:
629         /*
630          * It is the last release_sock in its life. It will remove backlog.
631          */
632         release_sock(sk);
633         /*
634          * Now socket is owned by kernel and we acquire BH lock
635          * to finish close. No need to check for user refs.
636          */
637         local_bh_disable();
638         bh_lock_sock(sk);
639         BUG_TRAP(!sock_owned_by_user(sk));
640
641         sock_hold(sk);
642         sock_orphan(sk);
643
644         /*
645          * The last release_sock may have processed the CLOSE or RESET
646          * packet moving sock to CLOSED state, if not we have to fire
647          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
648          * in draft-ietf-dccp-spec-11. -acme
649          */
650         if (sk->sk_state == DCCP_CLOSING) {
651                 /* FIXME: should start at 2 * RTT */
652                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
653                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
654                                           inet_csk(sk)->icsk_rto,
655                                           DCCP_RTO_MAX);
656 #if 0
657                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
658                 dccp_set_state(sk, DCCP_CLOSED);
659 #endif
660         }
661
662         atomic_inc(sk->sk_prot->orphan_count);
663         if (sk->sk_state == DCCP_CLOSED)
664                 inet_csk_destroy_sock(sk);
665
666         /* Otherwise, socket is reprieved until protocol close. */
667
668         bh_unlock_sock(sk);
669         local_bh_enable();
670         sock_put(sk);
671 }
672
673 EXPORT_SYMBOL_GPL(dccp_close);
674
675 void dccp_shutdown(struct sock *sk, int how)
676 {
677         dccp_pr_debug("entry\n");
678 }
679
680 EXPORT_SYMBOL_GPL(dccp_shutdown);
681
682 static const struct proto_ops inet_dccp_ops = {
683         .family         = PF_INET,
684         .owner          = THIS_MODULE,
685         .release        = inet_release,
686         .bind           = inet_bind,
687         .connect        = inet_stream_connect,
688         .socketpair     = sock_no_socketpair,
689         .accept         = inet_accept,
690         .getname        = inet_getname,
691         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
692         .poll           = dccp_poll,
693         .ioctl          = inet_ioctl,
694         /* FIXME: work on inet_listen to rename it to sock_common_listen */
695         .listen         = inet_dccp_listen,
696         .shutdown       = inet_shutdown,
697         .setsockopt     = sock_common_setsockopt,
698         .getsockopt     = sock_common_getsockopt,
699         .sendmsg        = inet_sendmsg,
700         .recvmsg        = sock_common_recvmsg,
701         .mmap           = sock_no_mmap,
702         .sendpage       = sock_no_sendpage,
703 };
704
705 extern struct net_proto_family inet_family_ops;
706
707 static struct inet_protosw dccp_v4_protosw = {
708         .type           = SOCK_DCCP,
709         .protocol       = IPPROTO_DCCP,
710         .prot           = &dccp_prot,
711         .ops            = &inet_dccp_ops,
712         .capability     = -1,
713         .no_check       = 0,
714         .flags          = INET_PROTOSW_ICSK,
715 };
716
717 /*
718  * This is the global socket data structure used for responding to
719  * the Out-of-the-blue (OOTB) packets. A control sock will be created
720  * for this socket at the initialization time.
721  */
722 struct socket *dccp_ctl_socket;
723
724 static char dccp_ctl_socket_err_msg[] __initdata =
725         KERN_ERR "DCCP: Failed to create the control socket.\n";
726
727 static int __init dccp_ctl_sock_init(void)
728 {
729         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
730                                   &dccp_ctl_socket);
731         if (rc < 0)
732                 printk(dccp_ctl_socket_err_msg);
733         else {
734                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
735                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
736
737                 /* Unhash it so that IP input processing does not even
738                  * see it, we do not wish this socket to see incoming
739                  * packets.
740                  */
741                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
742         }
743
744         return rc;
745 }
746
747 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
748 void dccp_ctl_sock_exit(void)
749 {
750         if (dccp_ctl_socket != NULL) {
751                 sock_release(dccp_ctl_socket);
752                 dccp_ctl_socket = NULL;
753         }
754 }
755
756 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
757 #endif
758
759 static int __init init_dccp_v4_mibs(void)
760 {
761         int rc = -ENOMEM;
762
763         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
764         if (dccp_statistics[0] == NULL)
765                 goto out;
766
767         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
768         if (dccp_statistics[1] == NULL)
769                 goto out_free_one;
770
771         rc = 0;
772 out:
773         return rc;
774 out_free_one:
775         free_percpu(dccp_statistics[0]);
776         dccp_statistics[0] = NULL;
777         goto out;
778
779 }
780
781 static int thash_entries;
782 module_param(thash_entries, int, 0444);
783 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
784
785 #ifdef CONFIG_IP_DCCP_DEBUG
786 int dccp_debug;
787 module_param(dccp_debug, int, 0444);
788 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
789
790 EXPORT_SYMBOL_GPL(dccp_debug);
791 #endif
792
793 static int __init dccp_init(void)
794 {
795         unsigned long goal;
796         int ehash_order, bhash_order, i;
797         int rc = proto_register(&dccp_prot, 1);
798
799         if (rc)
800                 goto out;
801
802         rc = -ENOBUFS;
803         dccp_hashinfo.bind_bucket_cachep =
804                 kmem_cache_create("dccp_bind_bucket",
805                                   sizeof(struct inet_bind_bucket), 0,
806                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
807         if (!dccp_hashinfo.bind_bucket_cachep)
808                 goto out_proto_unregister;
809
810         /*
811          * Size and allocate the main established and bind bucket
812          * hash tables.
813          *
814          * The methodology is similar to that of the buffer cache.
815          */
816         if (num_physpages >= (128 * 1024))
817                 goal = num_physpages >> (21 - PAGE_SHIFT);
818         else
819                 goal = num_physpages >> (23 - PAGE_SHIFT);
820
821         if (thash_entries)
822                 goal = (thash_entries *
823                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
824         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
825                 ;
826         do {
827                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
828                                         sizeof(struct inet_ehash_bucket);
829                 dccp_hashinfo.ehash_size >>= 1;
830                 while (dccp_hashinfo.ehash_size &
831                        (dccp_hashinfo.ehash_size - 1))
832                         dccp_hashinfo.ehash_size--;
833                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
834                         __get_free_pages(GFP_ATOMIC, ehash_order);
835         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
836
837         if (!dccp_hashinfo.ehash) {
838                 printk(KERN_CRIT "Failed to allocate DCCP "
839                                  "established hash table\n");
840                 goto out_free_bind_bucket_cachep;
841         }
842
843         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
844                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
845                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
846         }
847
848         bhash_order = ehash_order;
849
850         do {
851                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
852                                         sizeof(struct inet_bind_hashbucket);
853                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
854                     bhash_order > 0)
855                         continue;
856                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
857                         __get_free_pages(GFP_ATOMIC, bhash_order);
858         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
859
860         if (!dccp_hashinfo.bhash) {
861                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
862                 goto out_free_dccp_ehash;
863         }
864
865         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
866                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
867                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
868         }
869
870         rc = init_dccp_v4_mibs();
871         if (rc)
872                 goto out_free_dccp_bhash;
873
874         rc = -EAGAIN;
875         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
876                 goto out_free_dccp_v4_mibs;
877
878         inet_register_protosw(&dccp_v4_protosw);
879
880         rc = dccp_ctl_sock_init();
881         if (rc)
882                 goto out_unregister_protosw;
883 out:
884         return rc;
885 out_unregister_protosw:
886         inet_unregister_protosw(&dccp_v4_protosw);
887         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
888 out_free_dccp_v4_mibs:
889         free_percpu(dccp_statistics[0]);
890         free_percpu(dccp_statistics[1]);
891         dccp_statistics[0] = dccp_statistics[1] = NULL;
892 out_free_dccp_bhash:
893         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
894         dccp_hashinfo.bhash = NULL;
895 out_free_dccp_ehash:
896         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
897         dccp_hashinfo.ehash = NULL;
898 out_free_bind_bucket_cachep:
899         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
900         dccp_hashinfo.bind_bucket_cachep = NULL;
901 out_proto_unregister:
902         proto_unregister(&dccp_prot);
903         goto out;
904 }
905
906 static const char dccp_del_proto_err_msg[] __exitdata =
907         KERN_ERR "can't remove dccp net_protocol\n";
908
909 static void __exit dccp_fini(void)
910 {
911         inet_unregister_protosw(&dccp_v4_protosw);
912
913         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
914                 printk(dccp_del_proto_err_msg);
915
916         free_percpu(dccp_statistics[0]);
917         free_percpu(dccp_statistics[1]);
918         free_pages((unsigned long)dccp_hashinfo.bhash,
919                    get_order(dccp_hashinfo.bhash_size *
920                              sizeof(struct inet_bind_hashbucket)));
921         free_pages((unsigned long)dccp_hashinfo.ehash,
922                    get_order(dccp_hashinfo.ehash_size *
923                              sizeof(struct inet_ehash_bucket)));
924         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
925         proto_unregister(&dccp_prot);
926 }
927
928 module_init(dccp_init);
929 module_exit(dccp_fini);
930
931 /*
932  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
933  * values directly, Also cover the case where the protocol is not specified,
934  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
935  */
936 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
937 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
938 MODULE_LICENSE("GPL");
939 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
940 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");