]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/dccp/proto.c
Merge tag 'drm-intel-fixes-2017-08-24' of git://anongit.freedesktop.org/git/drm-intel...
[karo-tx-linux.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29
30 #include <asm/ioctls.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 struct percpu_counter dccp_orphan_count;
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo dccp_hashinfo;
48 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49
50 /* the maximum queue length for tx in packets. 0 is no limit */
51 int sysctl_dccp_tx_qlen __read_mostly = 5;
52
53 #ifdef CONFIG_IP_DCCP_DEBUG
54 static const char *dccp_state_name(const int state)
55 {
56         static const char *const dccp_state_names[] = {
57         [DCCP_OPEN]             = "OPEN",
58         [DCCP_REQUESTING]       = "REQUESTING",
59         [DCCP_PARTOPEN]         = "PARTOPEN",
60         [DCCP_LISTEN]           = "LISTEN",
61         [DCCP_RESPOND]          = "RESPOND",
62         [DCCP_CLOSING]          = "CLOSING",
63         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
64         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
65         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66         [DCCP_TIME_WAIT]        = "TIME_WAIT",
67         [DCCP_CLOSED]           = "CLOSED",
68         };
69
70         if (state >= DCCP_MAX_STATES)
71                 return "INVALID STATE!";
72         else
73                 return dccp_state_names[state];
74 }
75 #endif
76
77 void dccp_set_state(struct sock *sk, const int state)
78 {
79         const int oldstate = sk->sk_state;
80
81         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
82                       dccp_state_name(oldstate), dccp_state_name(state));
83         WARN_ON(state == oldstate);
84
85         switch (state) {
86         case DCCP_OPEN:
87                 if (oldstate != DCCP_OPEN)
88                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
89                 /* Client retransmits all Confirm options until entering OPEN */
90                 if (oldstate == DCCP_PARTOPEN)
91                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92                 break;
93
94         case DCCP_CLOSED:
95                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96                     oldstate == DCCP_CLOSING)
97                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
98
99                 sk->sk_prot->unhash(sk);
100                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
101                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
102                         inet_put_port(sk);
103                 /* fall through */
104         default:
105                 if (oldstate == DCCP_OPEN)
106                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
107         }
108
109         /* Change state AFTER socket is unhashed to avoid closed
110          * socket sitting in hash tables.
111          */
112         sk->sk_state = state;
113 }
114
115 EXPORT_SYMBOL_GPL(dccp_set_state);
116
117 static void dccp_finish_passive_close(struct sock *sk)
118 {
119         switch (sk->sk_state) {
120         case DCCP_PASSIVE_CLOSE:
121                 /* Node (client or server) has received Close packet. */
122                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
123                 dccp_set_state(sk, DCCP_CLOSED);
124                 break;
125         case DCCP_PASSIVE_CLOSEREQ:
126                 /*
127                  * Client received CloseReq. We set the `active' flag so that
128                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129                  */
130                 dccp_send_close(sk, 1);
131                 dccp_set_state(sk, DCCP_CLOSING);
132         }
133 }
134
135 void dccp_done(struct sock *sk)
136 {
137         dccp_set_state(sk, DCCP_CLOSED);
138         dccp_clear_xmit_timers(sk);
139
140         sk->sk_shutdown = SHUTDOWN_MASK;
141
142         if (!sock_flag(sk, SOCK_DEAD))
143                 sk->sk_state_change(sk);
144         else
145                 inet_csk_destroy_sock(sk);
146 }
147
148 EXPORT_SYMBOL_GPL(dccp_done);
149
150 const char *dccp_packet_name(const int type)
151 {
152         static const char *const dccp_packet_names[] = {
153                 [DCCP_PKT_REQUEST]  = "REQUEST",
154                 [DCCP_PKT_RESPONSE] = "RESPONSE",
155                 [DCCP_PKT_DATA]     = "DATA",
156                 [DCCP_PKT_ACK]      = "ACK",
157                 [DCCP_PKT_DATAACK]  = "DATAACK",
158                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159                 [DCCP_PKT_CLOSE]    = "CLOSE",
160                 [DCCP_PKT_RESET]    = "RESET",
161                 [DCCP_PKT_SYNC]     = "SYNC",
162                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
163         };
164
165         if (type >= DCCP_NR_PKT_TYPES)
166                 return "INVALID";
167         else
168                 return dccp_packet_names[type];
169 }
170
171 EXPORT_SYMBOL_GPL(dccp_packet_name);
172
173 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 {
175         struct dccp_sock *dp = dccp_sk(sk);
176         struct inet_connection_sock *icsk = inet_csk(sk);
177
178         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
179         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
180         sk->sk_state            = DCCP_CLOSED;
181         sk->sk_write_space      = dccp_write_space;
182         icsk->icsk_sync_mss     = dccp_sync_mss;
183         dp->dccps_mss_cache     = 536;
184         dp->dccps_rate_last     = jiffies;
185         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
186         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
187         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
188
189         dccp_init_xmit_timers(sk);
190
191         INIT_LIST_HEAD(&dp->dccps_featneg);
192         /* control socket doesn't need feat nego */
193         if (likely(ctl_sock_initialized))
194                 return dccp_feat_init(sk);
195         return 0;
196 }
197
198 EXPORT_SYMBOL_GPL(dccp_init_sock);
199
200 void dccp_destroy_sock(struct sock *sk)
201 {
202         struct dccp_sock *dp = dccp_sk(sk);
203
204         __skb_queue_purge(&sk->sk_write_queue);
205         if (sk->sk_send_head != NULL) {
206                 kfree_skb(sk->sk_send_head);
207                 sk->sk_send_head = NULL;
208         }
209
210         /* Clean up a referenced DCCP bind bucket. */
211         if (inet_csk(sk)->icsk_bind_hash != NULL)
212                 inet_put_port(sk);
213
214         kfree(dp->dccps_service_list);
215         dp->dccps_service_list = NULL;
216
217         if (dp->dccps_hc_rx_ackvec != NULL) {
218                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
219                 dp->dccps_hc_rx_ackvec = NULL;
220         }
221         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
224
225         /* clean up feature negotiation state */
226         dccp_feat_list_purge(&dp->dccps_featneg);
227 }
228
229 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
230
231 static inline int dccp_listen_start(struct sock *sk, int backlog)
232 {
233         struct dccp_sock *dp = dccp_sk(sk);
234
235         dp->dccps_role = DCCP_ROLE_LISTEN;
236         /* do not start to listen if feature negotiation setup fails */
237         if (dccp_feat_finalise_settings(dp))
238                 return -EPROTO;
239         return inet_csk_listen_start(sk, backlog);
240 }
241
242 static inline int dccp_need_reset(int state)
243 {
244         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
245                state != DCCP_REQUESTING;
246 }
247
248 int dccp_disconnect(struct sock *sk, int flags)
249 {
250         struct inet_connection_sock *icsk = inet_csk(sk);
251         struct inet_sock *inet = inet_sk(sk);
252         int err = 0;
253         const int old_state = sk->sk_state;
254
255         if (old_state != DCCP_CLOSED)
256                 dccp_set_state(sk, DCCP_CLOSED);
257
258         /*
259          * This corresponds to the ABORT function of RFC793, sec. 3.8
260          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
261          */
262         if (old_state == DCCP_LISTEN) {
263                 inet_csk_listen_stop(sk);
264         } else if (dccp_need_reset(old_state)) {
265                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
266                 sk->sk_err = ECONNRESET;
267         } else if (old_state == DCCP_REQUESTING)
268                 sk->sk_err = ECONNRESET;
269
270         dccp_clear_xmit_timers(sk);
271
272         __skb_queue_purge(&sk->sk_receive_queue);
273         __skb_queue_purge(&sk->sk_write_queue);
274         if (sk->sk_send_head != NULL) {
275                 __kfree_skb(sk->sk_send_head);
276                 sk->sk_send_head = NULL;
277         }
278
279         inet->inet_dport = 0;
280
281         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
282                 inet_reset_saddr(sk);
283
284         sk->sk_shutdown = 0;
285         sock_reset_flag(sk, SOCK_DONE);
286
287         icsk->icsk_backoff = 0;
288         inet_csk_delack_init(sk);
289         __sk_dst_reset(sk);
290
291         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
292
293         sk->sk_error_report(sk);
294         return err;
295 }
296
297 EXPORT_SYMBOL_GPL(dccp_disconnect);
298
299 /*
300  *      Wait for a DCCP event.
301  *
302  *      Note that we don't need to lock the socket, as the upper poll layers
303  *      take care of normal races (between the test and the event) and we don't
304  *      go look at any of the socket buffers directly.
305  */
306 unsigned int dccp_poll(struct file *file, struct socket *sock,
307                        poll_table *wait)
308 {
309         unsigned int mask;
310         struct sock *sk = sock->sk;
311
312         sock_poll_wait(file, sk_sleep(sk), wait);
313         if (sk->sk_state == DCCP_LISTEN)
314                 return inet_csk_listen_poll(sk);
315
316         /* Socket is not locked. We are protected from async events
317            by poll logic and correct handling of state changes
318            made by another threads is impossible in any case.
319          */
320
321         mask = 0;
322         if (sk->sk_err)
323                 mask = POLLERR;
324
325         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
326                 mask |= POLLHUP;
327         if (sk->sk_shutdown & RCV_SHUTDOWN)
328                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
329
330         /* Connected? */
331         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
332                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
333                         mask |= POLLIN | POLLRDNORM;
334
335                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
336                         if (sk_stream_is_writeable(sk)) {
337                                 mask |= POLLOUT | POLLWRNORM;
338                         } else {  /* send SIGIO later */
339                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
340                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
341
342                                 /* Race breaker. If space is freed after
343                                  * wspace test but before the flags are set,
344                                  * IO signal will be lost.
345                                  */
346                                 if (sk_stream_is_writeable(sk))
347                                         mask |= POLLOUT | POLLWRNORM;
348                         }
349                 }
350         }
351         return mask;
352 }
353
354 EXPORT_SYMBOL_GPL(dccp_poll);
355
356 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
357 {
358         int rc = -ENOTCONN;
359
360         lock_sock(sk);
361
362         if (sk->sk_state == DCCP_LISTEN)
363                 goto out;
364
365         switch (cmd) {
366         case SIOCINQ: {
367                 struct sk_buff *skb;
368                 unsigned long amount = 0;
369
370                 skb = skb_peek(&sk->sk_receive_queue);
371                 if (skb != NULL) {
372                         /*
373                          * We will only return the amount of this packet since
374                          * that is all that will be read.
375                          */
376                         amount = skb->len;
377                 }
378                 rc = put_user(amount, (int __user *)arg);
379         }
380                 break;
381         default:
382                 rc = -ENOIOCTLCMD;
383                 break;
384         }
385 out:
386         release_sock(sk);
387         return rc;
388 }
389
390 EXPORT_SYMBOL_GPL(dccp_ioctl);
391
392 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
393                                    char __user *optval, unsigned int optlen)
394 {
395         struct dccp_sock *dp = dccp_sk(sk);
396         struct dccp_service_list *sl = NULL;
397
398         if (service == DCCP_SERVICE_INVALID_VALUE ||
399             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
400                 return -EINVAL;
401
402         if (optlen > sizeof(service)) {
403                 sl = kmalloc(optlen, GFP_KERNEL);
404                 if (sl == NULL)
405                         return -ENOMEM;
406
407                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
408                 if (copy_from_user(sl->dccpsl_list,
409                                    optval + sizeof(service),
410                                    optlen - sizeof(service)) ||
411                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
412                         kfree(sl);
413                         return -EFAULT;
414                 }
415         }
416
417         lock_sock(sk);
418         dp->dccps_service = service;
419
420         kfree(dp->dccps_service_list);
421
422         dp->dccps_service_list = sl;
423         release_sock(sk);
424         return 0;
425 }
426
427 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
428 {
429         u8 *list, len;
430         int i, rc;
431
432         if (cscov < 0 || cscov > 15)
433                 return -EINVAL;
434         /*
435          * Populate a list of permissible values, in the range cscov...15. This
436          * is necessary since feature negotiation of single values only works if
437          * both sides incidentally choose the same value. Since the list starts
438          * lowest-value first, negotiation will pick the smallest shared value.
439          */
440         if (cscov == 0)
441                 return 0;
442         len = 16 - cscov;
443
444         list = kmalloc(len, GFP_KERNEL);
445         if (list == NULL)
446                 return -ENOBUFS;
447
448         for (i = 0; i < len; i++)
449                 list[i] = cscov++;
450
451         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
452
453         if (rc == 0) {
454                 if (rx)
455                         dccp_sk(sk)->dccps_pcrlen = cscov;
456                 else
457                         dccp_sk(sk)->dccps_pcslen = cscov;
458         }
459         kfree(list);
460         return rc;
461 }
462
463 static int dccp_setsockopt_ccid(struct sock *sk, int type,
464                                 char __user *optval, unsigned int optlen)
465 {
466         u8 *val;
467         int rc = 0;
468
469         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
470                 return -EINVAL;
471
472         val = memdup_user(optval, optlen);
473         if (IS_ERR(val))
474                 return PTR_ERR(val);
475
476         lock_sock(sk);
477         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
478                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
479
480         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
481                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
482         release_sock(sk);
483
484         kfree(val);
485         return rc;
486 }
487
488 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
489                 char __user *optval, unsigned int optlen)
490 {
491         struct dccp_sock *dp = dccp_sk(sk);
492         int val, err = 0;
493
494         switch (optname) {
495         case DCCP_SOCKOPT_PACKET_SIZE:
496                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
497                 return 0;
498         case DCCP_SOCKOPT_CHANGE_L:
499         case DCCP_SOCKOPT_CHANGE_R:
500                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
501                 return 0;
502         case DCCP_SOCKOPT_CCID:
503         case DCCP_SOCKOPT_RX_CCID:
504         case DCCP_SOCKOPT_TX_CCID:
505                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
506         }
507
508         if (optlen < (int)sizeof(int))
509                 return -EINVAL;
510
511         if (get_user(val, (int __user *)optval))
512                 return -EFAULT;
513
514         if (optname == DCCP_SOCKOPT_SERVICE)
515                 return dccp_setsockopt_service(sk, val, optval, optlen);
516
517         lock_sock(sk);
518         switch (optname) {
519         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
520                 if (dp->dccps_role != DCCP_ROLE_SERVER)
521                         err = -EOPNOTSUPP;
522                 else
523                         dp->dccps_server_timewait = (val != 0);
524                 break;
525         case DCCP_SOCKOPT_SEND_CSCOV:
526                 err = dccp_setsockopt_cscov(sk, val, false);
527                 break;
528         case DCCP_SOCKOPT_RECV_CSCOV:
529                 err = dccp_setsockopt_cscov(sk, val, true);
530                 break;
531         case DCCP_SOCKOPT_QPOLICY_ID:
532                 if (sk->sk_state != DCCP_CLOSED)
533                         err = -EISCONN;
534                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
535                         err = -EINVAL;
536                 else
537                         dp->dccps_qpolicy = val;
538                 break;
539         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
540                 if (val < 0)
541                         err = -EINVAL;
542                 else
543                         dp->dccps_tx_qlen = val;
544                 break;
545         default:
546                 err = -ENOPROTOOPT;
547                 break;
548         }
549         release_sock(sk);
550
551         return err;
552 }
553
554 int dccp_setsockopt(struct sock *sk, int level, int optname,
555                     char __user *optval, unsigned int optlen)
556 {
557         if (level != SOL_DCCP)
558                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
559                                                              optname, optval,
560                                                              optlen);
561         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
562 }
563
564 EXPORT_SYMBOL_GPL(dccp_setsockopt);
565
566 #ifdef CONFIG_COMPAT
567 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
568                            char __user *optval, unsigned int optlen)
569 {
570         if (level != SOL_DCCP)
571                 return inet_csk_compat_setsockopt(sk, level, optname,
572                                                   optval, optlen);
573         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
574 }
575
576 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
577 #endif
578
579 static int dccp_getsockopt_service(struct sock *sk, int len,
580                                    __be32 __user *optval,
581                                    int __user *optlen)
582 {
583         const struct dccp_sock *dp = dccp_sk(sk);
584         const struct dccp_service_list *sl;
585         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
586
587         lock_sock(sk);
588         if ((sl = dp->dccps_service_list) != NULL) {
589                 slen = sl->dccpsl_nr * sizeof(u32);
590                 total_len += slen;
591         }
592
593         err = -EINVAL;
594         if (total_len > len)
595                 goto out;
596
597         err = 0;
598         if (put_user(total_len, optlen) ||
599             put_user(dp->dccps_service, optval) ||
600             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
601                 err = -EFAULT;
602 out:
603         release_sock(sk);
604         return err;
605 }
606
607 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
608                     char __user *optval, int __user *optlen)
609 {
610         struct dccp_sock *dp;
611         int val, len;
612
613         if (get_user(len, optlen))
614                 return -EFAULT;
615
616         if (len < (int)sizeof(int))
617                 return -EINVAL;
618
619         dp = dccp_sk(sk);
620
621         switch (optname) {
622         case DCCP_SOCKOPT_PACKET_SIZE:
623                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
624                 return 0;
625         case DCCP_SOCKOPT_SERVICE:
626                 return dccp_getsockopt_service(sk, len,
627                                                (__be32 __user *)optval, optlen);
628         case DCCP_SOCKOPT_GET_CUR_MPS:
629                 val = dp->dccps_mss_cache;
630                 break;
631         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
632                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
633         case DCCP_SOCKOPT_TX_CCID:
634                 val = ccid_get_current_tx_ccid(dp);
635                 if (val < 0)
636                         return -ENOPROTOOPT;
637                 break;
638         case DCCP_SOCKOPT_RX_CCID:
639                 val = ccid_get_current_rx_ccid(dp);
640                 if (val < 0)
641                         return -ENOPROTOOPT;
642                 break;
643         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
644                 val = dp->dccps_server_timewait;
645                 break;
646         case DCCP_SOCKOPT_SEND_CSCOV:
647                 val = dp->dccps_pcslen;
648                 break;
649         case DCCP_SOCKOPT_RECV_CSCOV:
650                 val = dp->dccps_pcrlen;
651                 break;
652         case DCCP_SOCKOPT_QPOLICY_ID:
653                 val = dp->dccps_qpolicy;
654                 break;
655         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
656                 val = dp->dccps_tx_qlen;
657                 break;
658         case 128 ... 191:
659                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
660                                              len, (u32 __user *)optval, optlen);
661         case 192 ... 255:
662                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
663                                              len, (u32 __user *)optval, optlen);
664         default:
665                 return -ENOPROTOOPT;
666         }
667
668         len = sizeof(val);
669         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
670                 return -EFAULT;
671
672         return 0;
673 }
674
675 int dccp_getsockopt(struct sock *sk, int level, int optname,
676                     char __user *optval, int __user *optlen)
677 {
678         if (level != SOL_DCCP)
679                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680                                                              optname, optval,
681                                                              optlen);
682         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 }
684
685 EXPORT_SYMBOL_GPL(dccp_getsockopt);
686
687 #ifdef CONFIG_COMPAT
688 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
689                            char __user *optval, int __user *optlen)
690 {
691         if (level != SOL_DCCP)
692                 return inet_csk_compat_getsockopt(sk, level, optname,
693                                                   optval, optlen);
694         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696
697 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698 #endif
699
700 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
701 {
702         struct cmsghdr *cmsg;
703
704         /*
705          * Assign an (opaque) qpolicy priority value to skb->priority.
706          *
707          * We are overloading this skb field for use with the qpolicy subystem.
708          * The skb->priority is normally used for the SO_PRIORITY option, which
709          * is initialised from sk_priority. Since the assignment of sk_priority
710          * to skb->priority happens later (on layer 3), we overload this field
711          * for use with queueing priorities as long as the skb is on layer 4.
712          * The default priority value (if nothing is set) is 0.
713          */
714         skb->priority = 0;
715
716         for_each_cmsghdr(cmsg, msg) {
717                 if (!CMSG_OK(msg, cmsg))
718                         return -EINVAL;
719
720                 if (cmsg->cmsg_level != SOL_DCCP)
721                         continue;
722
723                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
724                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
725                         return -EINVAL;
726
727                 switch (cmsg->cmsg_type) {
728                 case DCCP_SCM_PRIORITY:
729                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
730                                 return -EINVAL;
731                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
732                         break;
733                 default:
734                         return -EINVAL;
735                 }
736         }
737         return 0;
738 }
739
740 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
741 {
742         const struct dccp_sock *dp = dccp_sk(sk);
743         const int flags = msg->msg_flags;
744         const int noblock = flags & MSG_DONTWAIT;
745         struct sk_buff *skb;
746         int rc, size;
747         long timeo;
748
749         if (len > dp->dccps_mss_cache)
750                 return -EMSGSIZE;
751
752         lock_sock(sk);
753
754         if (dccp_qpolicy_full(sk)) {
755                 rc = -EAGAIN;
756                 goto out_release;
757         }
758
759         timeo = sock_sndtimeo(sk, noblock);
760
761         /*
762          * We have to use sk_stream_wait_connect here to set sk_write_pending,
763          * so that the trick in dccp_rcv_request_sent_state_process.
764          */
765         /* Wait for a connection to finish. */
766         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
767                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
768                         goto out_release;
769
770         size = sk->sk_prot->max_header + len;
771         release_sock(sk);
772         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
773         lock_sock(sk);
774         if (skb == NULL)
775                 goto out_release;
776
777         skb_reserve(skb, sk->sk_prot->max_header);
778         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
779         if (rc != 0)
780                 goto out_discard;
781
782         rc = dccp_msghdr_parse(msg, skb);
783         if (rc != 0)
784                 goto out_discard;
785
786         dccp_qpolicy_push(sk, skb);
787         /*
788          * The xmit_timer is set if the TX CCID is rate-based and will expire
789          * when congestion control permits to release further packets into the
790          * network. Window-based CCIDs do not use this timer.
791          */
792         if (!timer_pending(&dp->dccps_xmit_timer))
793                 dccp_write_xmit(sk);
794 out_release:
795         release_sock(sk);
796         return rc ? : len;
797 out_discard:
798         kfree_skb(skb);
799         goto out_release;
800 }
801
802 EXPORT_SYMBOL_GPL(dccp_sendmsg);
803
804 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
805                  int flags, int *addr_len)
806 {
807         const struct dccp_hdr *dh;
808         long timeo;
809
810         lock_sock(sk);
811
812         if (sk->sk_state == DCCP_LISTEN) {
813                 len = -ENOTCONN;
814                 goto out;
815         }
816
817         timeo = sock_rcvtimeo(sk, nonblock);
818
819         do {
820                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
821
822                 if (skb == NULL)
823                         goto verify_sock_status;
824
825                 dh = dccp_hdr(skb);
826
827                 switch (dh->dccph_type) {
828                 case DCCP_PKT_DATA:
829                 case DCCP_PKT_DATAACK:
830                         goto found_ok_skb;
831
832                 case DCCP_PKT_CLOSE:
833                 case DCCP_PKT_CLOSEREQ:
834                         if (!(flags & MSG_PEEK))
835                                 dccp_finish_passive_close(sk);
836                         /* fall through */
837                 case DCCP_PKT_RESET:
838                         dccp_pr_debug("found fin (%s) ok!\n",
839                                       dccp_packet_name(dh->dccph_type));
840                         len = 0;
841                         goto found_fin_ok;
842                 default:
843                         dccp_pr_debug("packet_type=%s\n",
844                                       dccp_packet_name(dh->dccph_type));
845                         sk_eat_skb(sk, skb);
846                 }
847 verify_sock_status:
848                 if (sock_flag(sk, SOCK_DONE)) {
849                         len = 0;
850                         break;
851                 }
852
853                 if (sk->sk_err) {
854                         len = sock_error(sk);
855                         break;
856                 }
857
858                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
859                         len = 0;
860                         break;
861                 }
862
863                 if (sk->sk_state == DCCP_CLOSED) {
864                         if (!sock_flag(sk, SOCK_DONE)) {
865                                 /* This occurs when user tries to read
866                                  * from never connected socket.
867                                  */
868                                 len = -ENOTCONN;
869                                 break;
870                         }
871                         len = 0;
872                         break;
873                 }
874
875                 if (!timeo) {
876                         len = -EAGAIN;
877                         break;
878                 }
879
880                 if (signal_pending(current)) {
881                         len = sock_intr_errno(timeo);
882                         break;
883                 }
884
885                 sk_wait_data(sk, &timeo, NULL);
886                 continue;
887         found_ok_skb:
888                 if (len > skb->len)
889                         len = skb->len;
890                 else if (len < skb->len)
891                         msg->msg_flags |= MSG_TRUNC;
892
893                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
894                         /* Exception. Bailout! */
895                         len = -EFAULT;
896                         break;
897                 }
898                 if (flags & MSG_TRUNC)
899                         len = skb->len;
900         found_fin_ok:
901                 if (!(flags & MSG_PEEK))
902                         sk_eat_skb(sk, skb);
903                 break;
904         } while (1);
905 out:
906         release_sock(sk);
907         return len;
908 }
909
910 EXPORT_SYMBOL_GPL(dccp_recvmsg);
911
912 int inet_dccp_listen(struct socket *sock, int backlog)
913 {
914         struct sock *sk = sock->sk;
915         unsigned char old_state;
916         int err;
917
918         lock_sock(sk);
919
920         err = -EINVAL;
921         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
922                 goto out;
923
924         old_state = sk->sk_state;
925         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
926                 goto out;
927
928         /* Really, if the socket is already in listen state
929          * we can only allow the backlog to be adjusted.
930          */
931         if (old_state != DCCP_LISTEN) {
932                 /*
933                  * FIXME: here it probably should be sk->sk_prot->listen_start
934                  * see tcp_listen_start
935                  */
936                 err = dccp_listen_start(sk, backlog);
937                 if (err)
938                         goto out;
939         }
940         sk->sk_max_ack_backlog = backlog;
941         err = 0;
942
943 out:
944         release_sock(sk);
945         return err;
946 }
947
948 EXPORT_SYMBOL_GPL(inet_dccp_listen);
949
950 static void dccp_terminate_connection(struct sock *sk)
951 {
952         u8 next_state = DCCP_CLOSED;
953
954         switch (sk->sk_state) {
955         case DCCP_PASSIVE_CLOSE:
956         case DCCP_PASSIVE_CLOSEREQ:
957                 dccp_finish_passive_close(sk);
958                 break;
959         case DCCP_PARTOPEN:
960                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
961                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
962                 /* fall through */
963         case DCCP_OPEN:
964                 dccp_send_close(sk, 1);
965
966                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
967                     !dccp_sk(sk)->dccps_server_timewait)
968                         next_state = DCCP_ACTIVE_CLOSEREQ;
969                 else
970                         next_state = DCCP_CLOSING;
971                 /* fall through */
972         default:
973                 dccp_set_state(sk, next_state);
974         }
975 }
976
977 void dccp_close(struct sock *sk, long timeout)
978 {
979         struct dccp_sock *dp = dccp_sk(sk);
980         struct sk_buff *skb;
981         u32 data_was_unread = 0;
982         int state;
983
984         lock_sock(sk);
985
986         sk->sk_shutdown = SHUTDOWN_MASK;
987
988         if (sk->sk_state == DCCP_LISTEN) {
989                 dccp_set_state(sk, DCCP_CLOSED);
990
991                 /* Special case. */
992                 inet_csk_listen_stop(sk);
993
994                 goto adjudge_to_death;
995         }
996
997         sk_stop_timer(sk, &dp->dccps_xmit_timer);
998
999         /*
1000          * We need to flush the recv. buffs.  We do this only on the
1001          * descriptor close, not protocol-sourced closes, because the
1002           *reader process may not have drained the data yet!
1003          */
1004         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1005                 data_was_unread += skb->len;
1006                 __kfree_skb(skb);
1007         }
1008
1009         /* If socket has been already reset kill it. */
1010         if (sk->sk_state == DCCP_CLOSED)
1011                 goto adjudge_to_death;
1012
1013         if (data_was_unread) {
1014                 /* Unread data was tossed, send an appropriate Reset Code */
1015                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1016                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1017                 dccp_set_state(sk, DCCP_CLOSED);
1018         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1019                 /* Check zero linger _after_ checking for unread data. */
1020                 sk->sk_prot->disconnect(sk, 0);
1021         } else if (sk->sk_state != DCCP_CLOSED) {
1022                 /*
1023                  * Normal connection termination. May need to wait if there are
1024                  * still packets in the TX queue that are delayed by the CCID.
1025                  */
1026                 dccp_flush_write_queue(sk, &timeout);
1027                 dccp_terminate_connection(sk);
1028         }
1029
1030         /*
1031          * Flush write queue. This may be necessary in several cases:
1032          * - we have been closed by the peer but still have application data;
1033          * - abortive termination (unread data or zero linger time),
1034          * - normal termination but queue could not be flushed within time limit
1035          */
1036         __skb_queue_purge(&sk->sk_write_queue);
1037
1038         sk_stream_wait_close(sk, timeout);
1039
1040 adjudge_to_death:
1041         state = sk->sk_state;
1042         sock_hold(sk);
1043         sock_orphan(sk);
1044
1045         /*
1046          * It is the last release_sock in its life. It will remove backlog.
1047          */
1048         release_sock(sk);
1049         /*
1050          * Now socket is owned by kernel and we acquire BH lock
1051          * to finish close. No need to check for user refs.
1052          */
1053         local_bh_disable();
1054         bh_lock_sock(sk);
1055         WARN_ON(sock_owned_by_user(sk));
1056
1057         percpu_counter_inc(sk->sk_prot->orphan_count);
1058
1059         /* Have we already been destroyed by a softirq or backlog? */
1060         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1061                 goto out;
1062
1063         if (sk->sk_state == DCCP_CLOSED)
1064                 inet_csk_destroy_sock(sk);
1065
1066         /* Otherwise, socket is reprieved until protocol close. */
1067
1068 out:
1069         bh_unlock_sock(sk);
1070         local_bh_enable();
1071         sock_put(sk);
1072 }
1073
1074 EXPORT_SYMBOL_GPL(dccp_close);
1075
1076 void dccp_shutdown(struct sock *sk, int how)
1077 {
1078         dccp_pr_debug("called shutdown(%x)\n", how);
1079 }
1080
1081 EXPORT_SYMBOL_GPL(dccp_shutdown);
1082
1083 static inline int __init dccp_mib_init(void)
1084 {
1085         dccp_statistics = alloc_percpu(struct dccp_mib);
1086         if (!dccp_statistics)
1087                 return -ENOMEM;
1088         return 0;
1089 }
1090
1091 static inline void dccp_mib_exit(void)
1092 {
1093         free_percpu(dccp_statistics);
1094 }
1095
1096 static int thash_entries;
1097 module_param(thash_entries, int, 0444);
1098 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1099
1100 #ifdef CONFIG_IP_DCCP_DEBUG
1101 bool dccp_debug;
1102 module_param(dccp_debug, bool, 0644);
1103 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1104
1105 EXPORT_SYMBOL_GPL(dccp_debug);
1106 #endif
1107
1108 static int __init dccp_init(void)
1109 {
1110         unsigned long goal;
1111         int ehash_order, bhash_order, i;
1112         int rc;
1113
1114         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1115                      FIELD_SIZEOF(struct sk_buff, cb));
1116         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1117         if (rc)
1118                 goto out_fail;
1119         rc = -ENOBUFS;
1120         inet_hashinfo_init(&dccp_hashinfo);
1121         dccp_hashinfo.bind_bucket_cachep =
1122                 kmem_cache_create("dccp_bind_bucket",
1123                                   sizeof(struct inet_bind_bucket), 0,
1124                                   SLAB_HWCACHE_ALIGN, NULL);
1125         if (!dccp_hashinfo.bind_bucket_cachep)
1126                 goto out_free_percpu;
1127
1128         /*
1129          * Size and allocate the main established and bind bucket
1130          * hash tables.
1131          *
1132          * The methodology is similar to that of the buffer cache.
1133          */
1134         if (totalram_pages >= (128 * 1024))
1135                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1136         else
1137                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1138
1139         if (thash_entries)
1140                 goal = (thash_entries *
1141                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1142         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1143                 ;
1144         do {
1145                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1146                                         sizeof(struct inet_ehash_bucket);
1147
1148                 while (hash_size & (hash_size - 1))
1149                         hash_size--;
1150                 dccp_hashinfo.ehash_mask = hash_size - 1;
1151                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1152                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1153         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1154
1155         if (!dccp_hashinfo.ehash) {
1156                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1157                 goto out_free_bind_bucket_cachep;
1158         }
1159
1160         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1161                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1162
1163         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1164                         goto out_free_dccp_ehash;
1165
1166         bhash_order = ehash_order;
1167
1168         do {
1169                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1170                                         sizeof(struct inet_bind_hashbucket);
1171                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1172                     bhash_order > 0)
1173                         continue;
1174                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1175                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1176         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1177
1178         if (!dccp_hashinfo.bhash) {
1179                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1180                 goto out_free_dccp_locks;
1181         }
1182
1183         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1184                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1185                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1186         }
1187
1188         rc = dccp_mib_init();
1189         if (rc)
1190                 goto out_free_dccp_bhash;
1191
1192         rc = dccp_ackvec_init();
1193         if (rc)
1194                 goto out_free_dccp_mib;
1195
1196         rc = dccp_sysctl_init();
1197         if (rc)
1198                 goto out_ackvec_exit;
1199
1200         rc = ccid_initialize_builtins();
1201         if (rc)
1202                 goto out_sysctl_exit;
1203
1204         dccp_timestamping_init();
1205
1206         return 0;
1207
1208 out_sysctl_exit:
1209         dccp_sysctl_exit();
1210 out_ackvec_exit:
1211         dccp_ackvec_exit();
1212 out_free_dccp_mib:
1213         dccp_mib_exit();
1214 out_free_dccp_bhash:
1215         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1216 out_free_dccp_locks:
1217         inet_ehash_locks_free(&dccp_hashinfo);
1218 out_free_dccp_ehash:
1219         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1220 out_free_bind_bucket_cachep:
1221         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1222 out_free_percpu:
1223         percpu_counter_destroy(&dccp_orphan_count);
1224 out_fail:
1225         dccp_hashinfo.bhash = NULL;
1226         dccp_hashinfo.ehash = NULL;
1227         dccp_hashinfo.bind_bucket_cachep = NULL;
1228         return rc;
1229 }
1230
1231 static void __exit dccp_fini(void)
1232 {
1233         ccid_cleanup_builtins();
1234         dccp_mib_exit();
1235         free_pages((unsigned long)dccp_hashinfo.bhash,
1236                    get_order(dccp_hashinfo.bhash_size *
1237                              sizeof(struct inet_bind_hashbucket)));
1238         free_pages((unsigned long)dccp_hashinfo.ehash,
1239                    get_order((dccp_hashinfo.ehash_mask + 1) *
1240                              sizeof(struct inet_ehash_bucket)));
1241         inet_ehash_locks_free(&dccp_hashinfo);
1242         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1243         dccp_ackvec_exit();
1244         dccp_sysctl_exit();
1245         percpu_counter_destroy(&dccp_orphan_count);
1246 }
1247
1248 module_init(dccp_init);
1249 module_exit(dccp_fini);
1250
1251 MODULE_LICENSE("GPL");
1252 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1253 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");