4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
93 EXPORT_SYMBOL_GPL(dccp_set_state);
95 static void dccp_finish_passive_close(struct sock *sk)
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
103 case DCCP_PASSIVE_CLOSEREQ:
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
113 void dccp_done(struct sock *sk)
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
118 sk->sk_shutdown = SHUTDOWN_MASK;
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
123 inet_csk_destroy_sock(sk);
126 EXPORT_SYMBOL_GPL(dccp_done);
128 const char *dccp_packet_name(const int type)
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
143 if (type >= DCCP_NR_PKT_TYPES)
146 return dccp_packet_names[type];
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
151 const char *dccp_state_name(const int state)
153 static char *dccp_state_names[] = {
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
170 return dccp_state_names[state];
173 EXPORT_SYMBOL_GPL(dccp_state_name);
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
177 struct dccp_sock *dp = dccp_sk(sk);
178 struct dccp_minisock *dmsk = dccp_msk(sk);
179 struct inet_connection_sock *icsk = inet_csk(sk);
181 dccp_minisock_init(&dp->dccps_minisock);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(dmsk);
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 void dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_list_purge(&dp->dccps_featneg);
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
278 struct dccp_sock *dp = dccp_sk(sk);
280 dp->dccps_role = DCCP_ROLE_LISTEN;
281 return inet_csk_listen_start(sk, backlog);
284 static inline int dccp_need_reset(int state)
286 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
287 state != DCCP_REQUESTING;
290 int dccp_disconnect(struct sock *sk, int flags)
292 struct inet_connection_sock *icsk = inet_csk(sk);
293 struct inet_sock *inet = inet_sk(sk);
295 const int old_state = sk->sk_state;
297 if (old_state != DCCP_CLOSED)
298 dccp_set_state(sk, DCCP_CLOSED);
301 * This corresponds to the ABORT function of RFC793, sec. 3.8
302 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
304 if (old_state == DCCP_LISTEN) {
305 inet_csk_listen_stop(sk);
306 } else if (dccp_need_reset(old_state)) {
307 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
308 sk->sk_err = ECONNRESET;
309 } else if (old_state == DCCP_REQUESTING)
310 sk->sk_err = ECONNRESET;
312 dccp_clear_xmit_timers(sk);
314 __skb_queue_purge(&sk->sk_receive_queue);
315 __skb_queue_purge(&sk->sk_write_queue);
316 if (sk->sk_send_head != NULL) {
317 __kfree_skb(sk->sk_send_head);
318 sk->sk_send_head = NULL;
323 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324 inet_reset_saddr(sk);
327 sock_reset_flag(sk, SOCK_DONE);
329 icsk->icsk_backoff = 0;
330 inet_csk_delack_init(sk);
333 WARN_ON(inet->num && !icsk->icsk_bind_hash);
335 sk->sk_error_report(sk);
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
342 * Wait for a DCCP event.
344 * Note that we don't need to lock the socket, as the upper poll layers
345 * take care of normal races (between the test and the event) and we don't
346 * go look at any of the socket buffers directly.
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
352 struct sock *sk = sock->sk;
354 poll_wait(file, sk->sk_sleep, wait);
355 if (sk->sk_state == DCCP_LISTEN)
356 return inet_csk_listen_poll(sk);
358 /* Socket is not locked. We are protected from async events
359 by poll logic and correct handling of state changes
360 made by another threads is impossible in any case.
367 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
369 if (sk->sk_shutdown & RCV_SHUTDOWN)
370 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
373 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375 mask |= POLLIN | POLLRDNORM;
377 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379 mask |= POLLOUT | POLLWRNORM;
380 } else { /* send SIGIO later */
381 set_bit(SOCK_ASYNC_NOSPACE,
382 &sk->sk_socket->flags);
383 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
385 /* Race breaker. If space is freed after
386 * wspace test but before the flags are set,
387 * IO signal will be lost.
389 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390 mask |= POLLOUT | POLLWRNORM;
397 EXPORT_SYMBOL_GPL(dccp_poll);
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
405 if (sk->sk_state == DCCP_LISTEN)
411 unsigned long amount = 0;
413 skb = skb_peek(&sk->sk_receive_queue);
416 * We will only return the amount of this packet since
417 * that is all that will be read.
421 rc = put_user(amount, (int __user *)arg);
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436 char __user *optval, int optlen)
438 struct dccp_sock *dp = dccp_sk(sk);
439 struct dccp_service_list *sl = NULL;
441 if (service == DCCP_SERVICE_INVALID_VALUE ||
442 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
445 if (optlen > sizeof(service)) {
446 sl = kmalloc(optlen, GFP_KERNEL);
450 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451 if (copy_from_user(sl->dccpsl_list,
452 optval + sizeof(service),
453 optlen - sizeof(service)) ||
454 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
461 dp->dccps_service = service;
463 kfree(dp->dccps_service_list);
465 dp->dccps_service_list = sl;
470 /* byte 1 is feature. the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472 struct dccp_so_feat __user *optval)
474 struct dccp_so_feat opt;
478 if (copy_from_user(&opt, optval, sizeof(opt)))
481 * rfc4340: 6.1. Change Options
483 if (opt.dccpsf_len < 1)
486 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
490 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
495 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
496 val, opt.dccpsf_len, GFP_KERNEL);
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509 char __user *optval, int optlen)
511 struct dccp_sock *dp = dccp_sk(sk);
514 if (optlen < sizeof(int))
517 if (get_user(val, (int __user *)optval))
520 if (optname == DCCP_SOCKOPT_SERVICE)
521 return dccp_setsockopt_service(sk, val, optval, optlen);
525 case DCCP_SOCKOPT_PACKET_SIZE:
526 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
529 case DCCP_SOCKOPT_CHANGE_L:
530 if (optlen != sizeof(struct dccp_so_feat))
533 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
534 (struct dccp_so_feat __user *)
537 case DCCP_SOCKOPT_CHANGE_R:
538 if (optlen != sizeof(struct dccp_so_feat))
541 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
542 (struct dccp_so_feat __user *)
545 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
546 if (dp->dccps_role != DCCP_ROLE_SERVER)
549 dp->dccps_server_timewait = (val != 0);
551 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
552 if (val < 0 || val > 15)
555 dp->dccps_pcslen = val;
557 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
558 if (val < 0 || val > 15)
561 dp->dccps_pcrlen = val;
562 /* FIXME: add feature negotiation,
563 * ChangeL(MinimumChecksumCoverage, val) */
575 int dccp_setsockopt(struct sock *sk, int level, int optname,
576 char __user *optval, int optlen)
578 if (level != SOL_DCCP)
579 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
582 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
588 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
589 char __user *optval, int optlen)
591 if (level != SOL_DCCP)
592 return inet_csk_compat_setsockopt(sk, level, optname,
594 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
597 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
600 static int dccp_getsockopt_service(struct sock *sk, int len,
601 __be32 __user *optval,
604 const struct dccp_sock *dp = dccp_sk(sk);
605 const struct dccp_service_list *sl;
606 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
609 if ((sl = dp->dccps_service_list) != NULL) {
610 slen = sl->dccpsl_nr * sizeof(u32);
619 if (put_user(total_len, optlen) ||
620 put_user(dp->dccps_service, optval) ||
621 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
628 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
629 char __user *optval, int __user *optlen)
631 struct dccp_sock *dp;
634 if (get_user(len, optlen))
637 if (len < (int)sizeof(int))
643 case DCCP_SOCKOPT_PACKET_SIZE:
644 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
646 case DCCP_SOCKOPT_SERVICE:
647 return dccp_getsockopt_service(sk, len,
648 (__be32 __user *)optval, optlen);
649 case DCCP_SOCKOPT_GET_CUR_MPS:
650 val = dp->dccps_mss_cache;
652 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
653 val = dp->dccps_server_timewait;
655 case DCCP_SOCKOPT_SEND_CSCOV:
656 val = dp->dccps_pcslen;
658 case DCCP_SOCKOPT_RECV_CSCOV:
659 val = dp->dccps_pcrlen;
662 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
663 len, (u32 __user *)optval, optlen);
665 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
666 len, (u32 __user *)optval, optlen);
672 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
678 int dccp_getsockopt(struct sock *sk, int level, int optname,
679 char __user *optval, int __user *optlen)
681 if (level != SOL_DCCP)
682 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
685 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688 EXPORT_SYMBOL_GPL(dccp_getsockopt);
691 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
692 char __user *optval, int __user *optlen)
694 if (level != SOL_DCCP)
695 return inet_csk_compat_getsockopt(sk, level, optname,
697 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
703 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
706 const struct dccp_sock *dp = dccp_sk(sk);
707 const int flags = msg->msg_flags;
708 const int noblock = flags & MSG_DONTWAIT;
713 if (len > dp->dccps_mss_cache)
718 if (sysctl_dccp_tx_qlen &&
719 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
724 timeo = sock_sndtimeo(sk, noblock);
727 * We have to use sk_stream_wait_connect here to set sk_write_pending,
728 * so that the trick in dccp_rcv_request_sent_state_process.
730 /* Wait for a connection to finish. */
731 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
732 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
735 size = sk->sk_prot->max_header + len;
737 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
742 skb_reserve(skb, sk->sk_prot->max_header);
743 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
747 skb_queue_tail(&sk->sk_write_queue, skb);
748 dccp_write_xmit(sk,0);
757 EXPORT_SYMBOL_GPL(dccp_sendmsg);
759 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
760 size_t len, int nonblock, int flags, int *addr_len)
762 const struct dccp_hdr *dh;
767 if (sk->sk_state == DCCP_LISTEN) {
772 timeo = sock_rcvtimeo(sk, nonblock);
775 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
778 goto verify_sock_status;
782 switch (dh->dccph_type) {
784 case DCCP_PKT_DATAACK:
788 case DCCP_PKT_CLOSEREQ:
789 if (!(flags & MSG_PEEK))
790 dccp_finish_passive_close(sk);
793 dccp_pr_debug("found fin (%s) ok!\n",
794 dccp_packet_name(dh->dccph_type));
798 dccp_pr_debug("packet_type=%s\n",
799 dccp_packet_name(dh->dccph_type));
800 sk_eat_skb(sk, skb, 0);
803 if (sock_flag(sk, SOCK_DONE)) {
809 len = sock_error(sk);
813 if (sk->sk_shutdown & RCV_SHUTDOWN) {
818 if (sk->sk_state == DCCP_CLOSED) {
819 if (!sock_flag(sk, SOCK_DONE)) {
820 /* This occurs when user tries to read
821 * from never connected socket.
835 if (signal_pending(current)) {
836 len = sock_intr_errno(timeo);
840 sk_wait_data(sk, &timeo);
845 else if (len < skb->len)
846 msg->msg_flags |= MSG_TRUNC;
848 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
849 /* Exception. Bailout! */
854 if (!(flags & MSG_PEEK))
855 sk_eat_skb(sk, skb, 0);
863 EXPORT_SYMBOL_GPL(dccp_recvmsg);
865 int inet_dccp_listen(struct socket *sock, int backlog)
867 struct sock *sk = sock->sk;
868 unsigned char old_state;
874 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
877 old_state = sk->sk_state;
878 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
881 /* Really, if the socket is already in listen state
882 * we can only allow the backlog to be adjusted.
884 if (old_state != DCCP_LISTEN) {
886 * FIXME: here it probably should be sk->sk_prot->listen_start
887 * see tcp_listen_start
889 err = dccp_listen_start(sk, backlog);
893 sk->sk_max_ack_backlog = backlog;
901 EXPORT_SYMBOL_GPL(inet_dccp_listen);
903 static void dccp_terminate_connection(struct sock *sk)
905 u8 next_state = DCCP_CLOSED;
907 switch (sk->sk_state) {
908 case DCCP_PASSIVE_CLOSE:
909 case DCCP_PASSIVE_CLOSEREQ:
910 dccp_finish_passive_close(sk);
913 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
914 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
917 dccp_send_close(sk, 1);
919 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
920 !dccp_sk(sk)->dccps_server_timewait)
921 next_state = DCCP_ACTIVE_CLOSEREQ;
923 next_state = DCCP_CLOSING;
926 dccp_set_state(sk, next_state);
930 void dccp_close(struct sock *sk, long timeout)
932 struct dccp_sock *dp = dccp_sk(sk);
934 u32 data_was_unread = 0;
939 sk->sk_shutdown = SHUTDOWN_MASK;
941 if (sk->sk_state == DCCP_LISTEN) {
942 dccp_set_state(sk, DCCP_CLOSED);
945 inet_csk_listen_stop(sk);
947 goto adjudge_to_death;
950 sk_stop_timer(sk, &dp->dccps_xmit_timer);
953 * We need to flush the recv. buffs. We do this only on the
954 * descriptor close, not protocol-sourced closes, because the
955 *reader process may not have drained the data yet!
957 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
958 data_was_unread += skb->len;
962 if (data_was_unread) {
963 /* Unread data was tossed, send an appropriate Reset Code */
964 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
965 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
966 dccp_set_state(sk, DCCP_CLOSED);
967 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
968 /* Check zero linger _after_ checking for unread data. */
969 sk->sk_prot->disconnect(sk, 0);
970 } else if (sk->sk_state != DCCP_CLOSED) {
971 dccp_terminate_connection(sk);
974 sk_stream_wait_close(sk, timeout);
977 state = sk->sk_state;
980 atomic_inc(sk->sk_prot->orphan_count);
983 * It is the last release_sock in its life. It will remove backlog.
987 * Now socket is owned by kernel and we acquire BH lock
988 * to finish close. No need to check for user refs.
992 WARN_ON(sock_owned_by_user(sk));
994 /* Have we already been destroyed by a softirq or backlog? */
995 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
998 if (sk->sk_state == DCCP_CLOSED)
999 inet_csk_destroy_sock(sk);
1001 /* Otherwise, socket is reprieved until protocol close. */
1009 EXPORT_SYMBOL_GPL(dccp_close);
1011 void dccp_shutdown(struct sock *sk, int how)
1013 dccp_pr_debug("called shutdown(%x)\n", how);
1016 EXPORT_SYMBOL_GPL(dccp_shutdown);
1018 static inline int dccp_mib_init(void)
1020 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1023 static inline void dccp_mib_exit(void)
1025 snmp_mib_free((void**)dccp_statistics);
1028 static int thash_entries;
1029 module_param(thash_entries, int, 0444);
1030 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1032 #ifdef CONFIG_IP_DCCP_DEBUG
1034 module_param(dccp_debug, bool, 0644);
1035 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1037 EXPORT_SYMBOL_GPL(dccp_debug);
1040 static int __init dccp_init(void)
1043 int ehash_order, bhash_order, i;
1046 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1047 FIELD_SIZEOF(struct sk_buff, cb));
1049 dccp_hashinfo.bind_bucket_cachep =
1050 kmem_cache_create("dccp_bind_bucket",
1051 sizeof(struct inet_bind_bucket), 0,
1052 SLAB_HWCACHE_ALIGN, NULL);
1053 if (!dccp_hashinfo.bind_bucket_cachep)
1057 * Size and allocate the main established and bind bucket
1060 * The methodology is similar to that of the buffer cache.
1062 if (num_physpages >= (128 * 1024))
1063 goal = num_physpages >> (21 - PAGE_SHIFT);
1065 goal = num_physpages >> (23 - PAGE_SHIFT);
1068 goal = (thash_entries *
1069 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1070 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1073 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1074 sizeof(struct inet_ehash_bucket);
1075 while (dccp_hashinfo.ehash_size &
1076 (dccp_hashinfo.ehash_size - 1))
1077 dccp_hashinfo.ehash_size--;
1078 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1079 __get_free_pages(GFP_ATOMIC, ehash_order);
1080 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1082 if (!dccp_hashinfo.ehash) {
1083 DCCP_CRIT("Failed to allocate DCCP established hash table");
1084 goto out_free_bind_bucket_cachep;
1087 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1088 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1089 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1092 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1093 goto out_free_dccp_ehash;
1095 bhash_order = ehash_order;
1098 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1099 sizeof(struct inet_bind_hashbucket);
1100 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1103 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1104 __get_free_pages(GFP_ATOMIC, bhash_order);
1105 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1107 if (!dccp_hashinfo.bhash) {
1108 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1109 goto out_free_dccp_locks;
1112 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1113 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1114 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1117 rc = dccp_mib_init();
1119 goto out_free_dccp_bhash;
1121 rc = dccp_ackvec_init();
1123 goto out_free_dccp_mib;
1125 rc = dccp_sysctl_init();
1127 goto out_ackvec_exit;
1129 dccp_timestamping_init();
1136 out_free_dccp_bhash:
1137 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1138 dccp_hashinfo.bhash = NULL;
1139 out_free_dccp_locks:
1140 inet_ehash_locks_free(&dccp_hashinfo);
1141 out_free_dccp_ehash:
1142 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1143 dccp_hashinfo.ehash = NULL;
1144 out_free_bind_bucket_cachep:
1145 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1146 dccp_hashinfo.bind_bucket_cachep = NULL;
1150 static void __exit dccp_fini(void)
1153 free_pages((unsigned long)dccp_hashinfo.bhash,
1154 get_order(dccp_hashinfo.bhash_size *
1155 sizeof(struct inet_bind_hashbucket)));
1156 free_pages((unsigned long)dccp_hashinfo.ehash,
1157 get_order(dccp_hashinfo.ehash_size *
1158 sizeof(struct inet_ehash_bucket)));
1159 inet_ehash_locks_free(&dccp_hashinfo);
1160 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1165 module_init(dccp_init);
1166 module_exit(dccp_fini);
1168 MODULE_LICENSE("GPL");
1169 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1170 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");