]> git.karo-electronics.de Git - linux-beck.git/blob - net/dccp/proto.c
c6b4362bb1d7fd3d3dba3e311d8463415aaf5dd4
[linux-beck.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
474 {
475         u8 *list, len;
476         int i, rc;
477
478         if (cscov < 0 || cscov > 15)
479                 return -EINVAL;
480         /*
481          * Populate a list of permissible values, in the range cscov...15. This
482          * is necessary since feature negotiation of single values only works if
483          * both sides incidentally choose the same value. Since the list starts
484          * lowest-value first, negotiation will pick the smallest shared value.
485          */
486         if (cscov == 0)
487                 return 0;
488         len = 16 - cscov;
489
490         list = kmalloc(len, GFP_KERNEL);
491         if (list == NULL)
492                 return -ENOBUFS;
493
494         for (i = 0; i < len; i++)
495                 list[i] = cscov++;
496
497         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
498
499         if (rc == 0) {
500                 if (rx)
501                         dccp_sk(sk)->dccps_pcrlen = cscov;
502                 else
503                         dccp_sk(sk)->dccps_pcslen = cscov;
504         }
505         kfree(list);
506         return rc;
507 }
508
509 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
510                 char __user *optval, int optlen)
511 {
512         struct dccp_sock *dp = dccp_sk(sk);
513         int val, err = 0;
514
515         if (optlen < sizeof(int))
516                 return -EINVAL;
517
518         if (get_user(val, (int __user *)optval))
519                 return -EFAULT;
520
521         if (optname == DCCP_SOCKOPT_SERVICE)
522                 return dccp_setsockopt_service(sk, val, optval, optlen);
523
524         lock_sock(sk);
525         switch (optname) {
526         case DCCP_SOCKOPT_PACKET_SIZE:
527                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
528                 err = 0;
529                 break;
530         case DCCP_SOCKOPT_CHANGE_L:
531         case DCCP_SOCKOPT_CHANGE_R:
532                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
533                 err = 0;
534                 break;
535         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
536                 if (dp->dccps_role != DCCP_ROLE_SERVER)
537                         err = -EOPNOTSUPP;
538                 else
539                         dp->dccps_server_timewait = (val != 0);
540                 break;
541         case DCCP_SOCKOPT_SEND_CSCOV:
542                 err = dccp_setsockopt_cscov(sk, val, false);
543                 break;
544         case DCCP_SOCKOPT_RECV_CSCOV:
545                 err = dccp_setsockopt_cscov(sk, val, true);
546                 break;
547         default:
548                 err = -ENOPROTOOPT;
549                 break;
550         }
551
552         release_sock(sk);
553         return err;
554 }
555
556 int dccp_setsockopt(struct sock *sk, int level, int optname,
557                     char __user *optval, int optlen)
558 {
559         if (level != SOL_DCCP)
560                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
561                                                              optname, optval,
562                                                              optlen);
563         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
564 }
565
566 EXPORT_SYMBOL_GPL(dccp_setsockopt);
567
568 #ifdef CONFIG_COMPAT
569 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
570                            char __user *optval, int optlen)
571 {
572         if (level != SOL_DCCP)
573                 return inet_csk_compat_setsockopt(sk, level, optname,
574                                                   optval, optlen);
575         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
576 }
577
578 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
579 #endif
580
581 static int dccp_getsockopt_service(struct sock *sk, int len,
582                                    __be32 __user *optval,
583                                    int __user *optlen)
584 {
585         const struct dccp_sock *dp = dccp_sk(sk);
586         const struct dccp_service_list *sl;
587         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
588
589         lock_sock(sk);
590         if ((sl = dp->dccps_service_list) != NULL) {
591                 slen = sl->dccpsl_nr * sizeof(u32);
592                 total_len += slen;
593         }
594
595         err = -EINVAL;
596         if (total_len > len)
597                 goto out;
598
599         err = 0;
600         if (put_user(total_len, optlen) ||
601             put_user(dp->dccps_service, optval) ||
602             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
603                 err = -EFAULT;
604 out:
605         release_sock(sk);
606         return err;
607 }
608
609 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
610                     char __user *optval, int __user *optlen)
611 {
612         struct dccp_sock *dp;
613         int val, len;
614
615         if (get_user(len, optlen))
616                 return -EFAULT;
617
618         if (len < (int)sizeof(int))
619                 return -EINVAL;
620
621         dp = dccp_sk(sk);
622
623         switch (optname) {
624         case DCCP_SOCKOPT_PACKET_SIZE:
625                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
626                 return 0;
627         case DCCP_SOCKOPT_SERVICE:
628                 return dccp_getsockopt_service(sk, len,
629                                                (__be32 __user *)optval, optlen);
630         case DCCP_SOCKOPT_GET_CUR_MPS:
631                 val = dp->dccps_mss_cache;
632                 break;
633         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
634                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
635         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
636                 val = dp->dccps_server_timewait;
637                 break;
638         case DCCP_SOCKOPT_SEND_CSCOV:
639                 val = dp->dccps_pcslen;
640                 break;
641         case DCCP_SOCKOPT_RECV_CSCOV:
642                 val = dp->dccps_pcrlen;
643                 break;
644         case 128 ... 191:
645                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
646                                              len, (u32 __user *)optval, optlen);
647         case 192 ... 255:
648                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
649                                              len, (u32 __user *)optval, optlen);
650         default:
651                 return -ENOPROTOOPT;
652         }
653
654         len = sizeof(val);
655         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
656                 return -EFAULT;
657
658         return 0;
659 }
660
661 int dccp_getsockopt(struct sock *sk, int level, int optname,
662                     char __user *optval, int __user *optlen)
663 {
664         if (level != SOL_DCCP)
665                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
666                                                              optname, optval,
667                                                              optlen);
668         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
669 }
670
671 EXPORT_SYMBOL_GPL(dccp_getsockopt);
672
673 #ifdef CONFIG_COMPAT
674 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
675                            char __user *optval, int __user *optlen)
676 {
677         if (level != SOL_DCCP)
678                 return inet_csk_compat_getsockopt(sk, level, optname,
679                                                   optval, optlen);
680         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
681 }
682
683 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
684 #endif
685
686 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687                  size_t len)
688 {
689         const struct dccp_sock *dp = dccp_sk(sk);
690         const int flags = msg->msg_flags;
691         const int noblock = flags & MSG_DONTWAIT;
692         struct sk_buff *skb;
693         int rc, size;
694         long timeo;
695
696         if (len > dp->dccps_mss_cache)
697                 return -EMSGSIZE;
698
699         lock_sock(sk);
700
701         if (sysctl_dccp_tx_qlen &&
702             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
703                 rc = -EAGAIN;
704                 goto out_release;
705         }
706
707         timeo = sock_sndtimeo(sk, noblock);
708
709         /*
710          * We have to use sk_stream_wait_connect here to set sk_write_pending,
711          * so that the trick in dccp_rcv_request_sent_state_process.
712          */
713         /* Wait for a connection to finish. */
714         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
715                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
716                         goto out_release;
717
718         size = sk->sk_prot->max_header + len;
719         release_sock(sk);
720         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
721         lock_sock(sk);
722         if (skb == NULL)
723                 goto out_release;
724
725         skb_reserve(skb, sk->sk_prot->max_header);
726         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
727         if (rc != 0)
728                 goto out_discard;
729
730         skb_queue_tail(&sk->sk_write_queue, skb);
731         dccp_write_xmit(sk,0);
732 out_release:
733         release_sock(sk);
734         return rc ? : len;
735 out_discard:
736         kfree_skb(skb);
737         goto out_release;
738 }
739
740 EXPORT_SYMBOL_GPL(dccp_sendmsg);
741
742 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
743                  size_t len, int nonblock, int flags, int *addr_len)
744 {
745         const struct dccp_hdr *dh;
746         long timeo;
747
748         lock_sock(sk);
749
750         if (sk->sk_state == DCCP_LISTEN) {
751                 len = -ENOTCONN;
752                 goto out;
753         }
754
755         timeo = sock_rcvtimeo(sk, nonblock);
756
757         do {
758                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
759
760                 if (skb == NULL)
761                         goto verify_sock_status;
762
763                 dh = dccp_hdr(skb);
764
765                 switch (dh->dccph_type) {
766                 case DCCP_PKT_DATA:
767                 case DCCP_PKT_DATAACK:
768                         goto found_ok_skb;
769
770                 case DCCP_PKT_CLOSE:
771                 case DCCP_PKT_CLOSEREQ:
772                         if (!(flags & MSG_PEEK))
773                                 dccp_finish_passive_close(sk);
774                         /* fall through */
775                 case DCCP_PKT_RESET:
776                         dccp_pr_debug("found fin (%s) ok!\n",
777                                       dccp_packet_name(dh->dccph_type));
778                         len = 0;
779                         goto found_fin_ok;
780                 default:
781                         dccp_pr_debug("packet_type=%s\n",
782                                       dccp_packet_name(dh->dccph_type));
783                         sk_eat_skb(sk, skb, 0);
784                 }
785 verify_sock_status:
786                 if (sock_flag(sk, SOCK_DONE)) {
787                         len = 0;
788                         break;
789                 }
790
791                 if (sk->sk_err) {
792                         len = sock_error(sk);
793                         break;
794                 }
795
796                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
797                         len = 0;
798                         break;
799                 }
800
801                 if (sk->sk_state == DCCP_CLOSED) {
802                         if (!sock_flag(sk, SOCK_DONE)) {
803                                 /* This occurs when user tries to read
804                                  * from never connected socket.
805                                  */
806                                 len = -ENOTCONN;
807                                 break;
808                         }
809                         len = 0;
810                         break;
811                 }
812
813                 if (!timeo) {
814                         len = -EAGAIN;
815                         break;
816                 }
817
818                 if (signal_pending(current)) {
819                         len = sock_intr_errno(timeo);
820                         break;
821                 }
822
823                 sk_wait_data(sk, &timeo);
824                 continue;
825         found_ok_skb:
826                 if (len > skb->len)
827                         len = skb->len;
828                 else if (len < skb->len)
829                         msg->msg_flags |= MSG_TRUNC;
830
831                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
832                         /* Exception. Bailout! */
833                         len = -EFAULT;
834                         break;
835                 }
836         found_fin_ok:
837                 if (!(flags & MSG_PEEK))
838                         sk_eat_skb(sk, skb, 0);
839                 break;
840         } while (1);
841 out:
842         release_sock(sk);
843         return len;
844 }
845
846 EXPORT_SYMBOL_GPL(dccp_recvmsg);
847
848 int inet_dccp_listen(struct socket *sock, int backlog)
849 {
850         struct sock *sk = sock->sk;
851         unsigned char old_state;
852         int err;
853
854         lock_sock(sk);
855
856         err = -EINVAL;
857         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
858                 goto out;
859
860         old_state = sk->sk_state;
861         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
862                 goto out;
863
864         /* Really, if the socket is already in listen state
865          * we can only allow the backlog to be adjusted.
866          */
867         if (old_state != DCCP_LISTEN) {
868                 /*
869                  * FIXME: here it probably should be sk->sk_prot->listen_start
870                  * see tcp_listen_start
871                  */
872                 err = dccp_listen_start(sk, backlog);
873                 if (err)
874                         goto out;
875         }
876         sk->sk_max_ack_backlog = backlog;
877         err = 0;
878
879 out:
880         release_sock(sk);
881         return err;
882 }
883
884 EXPORT_SYMBOL_GPL(inet_dccp_listen);
885
886 static void dccp_terminate_connection(struct sock *sk)
887 {
888         u8 next_state = DCCP_CLOSED;
889
890         switch (sk->sk_state) {
891         case DCCP_PASSIVE_CLOSE:
892         case DCCP_PASSIVE_CLOSEREQ:
893                 dccp_finish_passive_close(sk);
894                 break;
895         case DCCP_PARTOPEN:
896                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
897                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
898                 /* fall through */
899         case DCCP_OPEN:
900                 dccp_send_close(sk, 1);
901
902                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
903                     !dccp_sk(sk)->dccps_server_timewait)
904                         next_state = DCCP_ACTIVE_CLOSEREQ;
905                 else
906                         next_state = DCCP_CLOSING;
907                 /* fall through */
908         default:
909                 dccp_set_state(sk, next_state);
910         }
911 }
912
913 void dccp_close(struct sock *sk, long timeout)
914 {
915         struct dccp_sock *dp = dccp_sk(sk);
916         struct sk_buff *skb;
917         u32 data_was_unread = 0;
918         int state;
919
920         lock_sock(sk);
921
922         sk->sk_shutdown = SHUTDOWN_MASK;
923
924         if (sk->sk_state == DCCP_LISTEN) {
925                 dccp_set_state(sk, DCCP_CLOSED);
926
927                 /* Special case. */
928                 inet_csk_listen_stop(sk);
929
930                 goto adjudge_to_death;
931         }
932
933         sk_stop_timer(sk, &dp->dccps_xmit_timer);
934
935         /*
936          * We need to flush the recv. buffs.  We do this only on the
937          * descriptor close, not protocol-sourced closes, because the
938           *reader process may not have drained the data yet!
939          */
940         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
941                 data_was_unread += skb->len;
942                 __kfree_skb(skb);
943         }
944
945         if (data_was_unread) {
946                 /* Unread data was tossed, send an appropriate Reset Code */
947                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
948                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
949                 dccp_set_state(sk, DCCP_CLOSED);
950         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
951                 /* Check zero linger _after_ checking for unread data. */
952                 sk->sk_prot->disconnect(sk, 0);
953         } else if (sk->sk_state != DCCP_CLOSED) {
954                 dccp_terminate_connection(sk);
955         }
956
957         sk_stream_wait_close(sk, timeout);
958
959 adjudge_to_death:
960         state = sk->sk_state;
961         sock_hold(sk);
962         sock_orphan(sk);
963         atomic_inc(sk->sk_prot->orphan_count);
964
965         /*
966          * It is the last release_sock in its life. It will remove backlog.
967          */
968         release_sock(sk);
969         /*
970          * Now socket is owned by kernel and we acquire BH lock
971          * to finish close. No need to check for user refs.
972          */
973         local_bh_disable();
974         bh_lock_sock(sk);
975         WARN_ON(sock_owned_by_user(sk));
976
977         /* Have we already been destroyed by a softirq or backlog? */
978         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
979                 goto out;
980
981         if (sk->sk_state == DCCP_CLOSED)
982                 inet_csk_destroy_sock(sk);
983
984         /* Otherwise, socket is reprieved until protocol close. */
985
986 out:
987         bh_unlock_sock(sk);
988         local_bh_enable();
989         sock_put(sk);
990 }
991
992 EXPORT_SYMBOL_GPL(dccp_close);
993
994 void dccp_shutdown(struct sock *sk, int how)
995 {
996         dccp_pr_debug("called shutdown(%x)\n", how);
997 }
998
999 EXPORT_SYMBOL_GPL(dccp_shutdown);
1000
1001 static inline int dccp_mib_init(void)
1002 {
1003         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1004 }
1005
1006 static inline void dccp_mib_exit(void)
1007 {
1008         snmp_mib_free((void**)dccp_statistics);
1009 }
1010
1011 static int thash_entries;
1012 module_param(thash_entries, int, 0444);
1013 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1014
1015 #ifdef CONFIG_IP_DCCP_DEBUG
1016 int dccp_debug;
1017 module_param(dccp_debug, bool, 0644);
1018 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1019
1020 EXPORT_SYMBOL_GPL(dccp_debug);
1021 #endif
1022
1023 static int __init dccp_init(void)
1024 {
1025         unsigned long goal;
1026         int ehash_order, bhash_order, i;
1027         int rc = -ENOBUFS;
1028
1029         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1030                      FIELD_SIZEOF(struct sk_buff, cb));
1031
1032         dccp_hashinfo.bind_bucket_cachep =
1033                 kmem_cache_create("dccp_bind_bucket",
1034                                   sizeof(struct inet_bind_bucket), 0,
1035                                   SLAB_HWCACHE_ALIGN, NULL);
1036         if (!dccp_hashinfo.bind_bucket_cachep)
1037                 goto out;
1038
1039         /*
1040          * Size and allocate the main established and bind bucket
1041          * hash tables.
1042          *
1043          * The methodology is similar to that of the buffer cache.
1044          */
1045         if (num_physpages >= (128 * 1024))
1046                 goal = num_physpages >> (21 - PAGE_SHIFT);
1047         else
1048                 goal = num_physpages >> (23 - PAGE_SHIFT);
1049
1050         if (thash_entries)
1051                 goal = (thash_entries *
1052                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1053         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1054                 ;
1055         do {
1056                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1057                                         sizeof(struct inet_ehash_bucket);
1058                 while (dccp_hashinfo.ehash_size &
1059                        (dccp_hashinfo.ehash_size - 1))
1060                         dccp_hashinfo.ehash_size--;
1061                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1062                         __get_free_pages(GFP_ATOMIC, ehash_order);
1063         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1064
1065         if (!dccp_hashinfo.ehash) {
1066                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1067                 goto out_free_bind_bucket_cachep;
1068         }
1069
1070         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1071                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1072                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1073         }
1074
1075         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1076                         goto out_free_dccp_ehash;
1077
1078         bhash_order = ehash_order;
1079
1080         do {
1081                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1082                                         sizeof(struct inet_bind_hashbucket);
1083                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1084                     bhash_order > 0)
1085                         continue;
1086                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1087                         __get_free_pages(GFP_ATOMIC, bhash_order);
1088         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1089
1090         if (!dccp_hashinfo.bhash) {
1091                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1092                 goto out_free_dccp_locks;
1093         }
1094
1095         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1096                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1097                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1098         }
1099
1100         rc = dccp_mib_init();
1101         if (rc)
1102                 goto out_free_dccp_bhash;
1103
1104         rc = dccp_ackvec_init();
1105         if (rc)
1106                 goto out_free_dccp_mib;
1107
1108         rc = dccp_sysctl_init();
1109         if (rc)
1110                 goto out_ackvec_exit;
1111
1112         dccp_timestamping_init();
1113 out:
1114         return rc;
1115 out_ackvec_exit:
1116         dccp_ackvec_exit();
1117 out_free_dccp_mib:
1118         dccp_mib_exit();
1119 out_free_dccp_bhash:
1120         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1121         dccp_hashinfo.bhash = NULL;
1122 out_free_dccp_locks:
1123         inet_ehash_locks_free(&dccp_hashinfo);
1124 out_free_dccp_ehash:
1125         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1126         dccp_hashinfo.ehash = NULL;
1127 out_free_bind_bucket_cachep:
1128         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1129         dccp_hashinfo.bind_bucket_cachep = NULL;
1130         goto out;
1131 }
1132
1133 static void __exit dccp_fini(void)
1134 {
1135         dccp_mib_exit();
1136         free_pages((unsigned long)dccp_hashinfo.bhash,
1137                    get_order(dccp_hashinfo.bhash_size *
1138                              sizeof(struct inet_bind_hashbucket)));
1139         free_pages((unsigned long)dccp_hashinfo.ehash,
1140                    get_order(dccp_hashinfo.ehash_size *
1141                              sizeof(struct inet_ehash_bucket)));
1142         inet_ehash_locks_free(&dccp_hashinfo);
1143         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1144         dccp_ackvec_exit();
1145         dccp_sysctl_exit();
1146 }
1147
1148 module_init(dccp_init);
1149 module_exit(dccp_fini);
1150
1151 MODULE_LICENSE("GPL");
1152 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1153 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");