3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/jiffies.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41 #include <linux/times.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
65 #include <asm/uaccess.h>
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
70 #include <linux/crypto.h>
71 #include <linux/scatterlist.h>
73 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
74 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
75 static void tcp_v6_send_check(struct sock *sk, int len,
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static struct inet_connection_sock_af_ops ipv6_mapped;
81 static struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
87 struct in6_addr *addr)
93 static void tcp_v6_hash(struct sock *sk)
95 if (sk->sk_state != TCP_CLOSE) {
96 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
106 static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
107 struct in6_addr *saddr,
108 struct in6_addr *daddr,
111 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
114 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
116 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
117 ipv6_hdr(skb)->saddr.s6_addr32,
119 tcp_hdr(skb)->source);
122 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
125 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
126 struct inet_sock *inet = inet_sk(sk);
127 struct inet_connection_sock *icsk = inet_csk(sk);
128 struct ipv6_pinfo *np = inet6_sk(sk);
129 struct tcp_sock *tp = tcp_sk(sk);
130 struct in6_addr *saddr = NULL, *final_p = NULL, final;
132 struct dst_entry *dst;
136 if (addr_len < SIN6_LEN_RFC2133)
139 if (usin->sin6_family != AF_INET6)
140 return(-EAFNOSUPPORT);
142 memset(&fl, 0, sizeof(fl));
145 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
146 IP6_ECN_flow_init(fl.fl6_flowlabel);
147 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
148 struct ip6_flowlabel *flowlabel;
149 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
150 if (flowlabel == NULL)
152 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
153 fl6_sock_release(flowlabel);
158 * connect() to INADDR_ANY means loopback (BSD'ism).
161 if(ipv6_addr_any(&usin->sin6_addr))
162 usin->sin6_addr.s6_addr[15] = 0x1;
164 addr_type = ipv6_addr_type(&usin->sin6_addr);
166 if(addr_type & IPV6_ADDR_MULTICAST)
169 if (addr_type&IPV6_ADDR_LINKLOCAL) {
170 if (addr_len >= sizeof(struct sockaddr_in6) &&
171 usin->sin6_scope_id) {
172 /* If interface is set while binding, indices
175 if (sk->sk_bound_dev_if &&
176 sk->sk_bound_dev_if != usin->sin6_scope_id)
179 sk->sk_bound_dev_if = usin->sin6_scope_id;
182 /* Connect to link-local address requires an interface */
183 if (!sk->sk_bound_dev_if)
187 if (tp->rx_opt.ts_recent_stamp &&
188 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
189 tp->rx_opt.ts_recent = 0;
190 tp->rx_opt.ts_recent_stamp = 0;
194 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
195 np->flow_label = fl.fl6_flowlabel;
201 if (addr_type == IPV6_ADDR_MAPPED) {
202 u32 exthdrlen = icsk->icsk_ext_hdr_len;
203 struct sockaddr_in sin;
205 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
207 if (__ipv6_only_sock(sk))
210 sin.sin_family = AF_INET;
211 sin.sin_port = usin->sin6_port;
212 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
214 icsk->icsk_af_ops = &ipv6_mapped;
215 sk->sk_backlog_rcv = tcp_v4_do_rcv;
216 #ifdef CONFIG_TCP_MD5SIG
217 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
220 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
223 icsk->icsk_ext_hdr_len = exthdrlen;
224 icsk->icsk_af_ops = &ipv6_specific;
225 sk->sk_backlog_rcv = tcp_v6_do_rcv;
226 #ifdef CONFIG_TCP_MD5SIG
227 tp->af_specific = &tcp_sock_ipv6_specific;
231 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
233 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
240 if (!ipv6_addr_any(&np->rcv_saddr))
241 saddr = &np->rcv_saddr;
243 fl.proto = IPPROTO_TCP;
244 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
245 ipv6_addr_copy(&fl.fl6_src,
246 (saddr ? saddr : &np->saddr));
247 fl.oif = sk->sk_bound_dev_if;
248 fl.fl_ip_dport = usin->sin6_port;
249 fl.fl_ip_sport = inet->sport;
251 if (np->opt && np->opt->srcrt) {
252 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
253 ipv6_addr_copy(&final, &fl.fl6_dst);
254 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
258 security_sk_classify_flow(sk, &fl);
260 err = ip6_dst_lookup(sk, &dst, &fl);
264 ipv6_addr_copy(&fl.fl6_dst, final_p);
266 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
268 err = ip6_dst_blackhole(sk, &dst, &fl);
275 ipv6_addr_copy(&np->rcv_saddr, saddr);
278 /* set the source address */
279 ipv6_addr_copy(&np->saddr, saddr);
280 inet->rcv_saddr = LOOPBACK4_IPV6;
282 sk->sk_gso_type = SKB_GSO_TCPV6;
283 __ip6_dst_store(sk, dst, NULL, NULL);
285 icsk->icsk_ext_hdr_len = 0;
287 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
290 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
292 inet->dport = usin->sin6_port;
294 tcp_set_state(sk, TCP_SYN_SENT);
295 err = inet6_hash_connect(&tcp_death_row, sk);
300 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
305 err = tcp_connect(sk);
312 tcp_set_state(sk, TCP_CLOSE);
316 sk->sk_route_caps = 0;
320 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
321 int type, int code, int offset, __be32 info)
323 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
324 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
325 struct ipv6_pinfo *np;
331 sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr,
332 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
335 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
339 if (sk->sk_state == TCP_TIME_WAIT) {
340 inet_twsk_put(inet_twsk(sk));
345 if (sock_owned_by_user(sk))
346 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
348 if (sk->sk_state == TCP_CLOSE)
352 seq = ntohl(th->seq);
353 if (sk->sk_state != TCP_LISTEN &&
354 !between(seq, tp->snd_una, tp->snd_nxt)) {
355 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
361 if (type == ICMPV6_PKT_TOOBIG) {
362 struct dst_entry *dst = NULL;
364 if (sock_owned_by_user(sk))
366 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
369 /* icmp should have updated the destination cache entry */
370 dst = __sk_dst_check(sk, np->dst_cookie);
373 struct inet_sock *inet = inet_sk(sk);
376 /* BUGGG_FUTURE: Again, it is not clear how
377 to handle rthdr case. Ignore this complexity
380 memset(&fl, 0, sizeof(fl));
381 fl.proto = IPPROTO_TCP;
382 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
383 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
384 fl.oif = sk->sk_bound_dev_if;
385 fl.fl_ip_dport = inet->dport;
386 fl.fl_ip_sport = inet->sport;
387 security_skb_classify_flow(skb, &fl);
389 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
390 sk->sk_err_soft = -err;
394 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
395 sk->sk_err_soft = -err;
402 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
403 tcp_sync_mss(sk, dst_mtu(dst));
404 tcp_simple_retransmit(sk);
405 } /* else let the usual retransmit timer handle it */
410 icmpv6_err_convert(type, code, &err);
412 /* Might be for an request_sock */
413 switch (sk->sk_state) {
414 struct request_sock *req, **prev;
416 if (sock_owned_by_user(sk))
419 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
420 &hdr->saddr, inet6_iif(skb));
424 /* ICMPs are not backlogged, hence we cannot get
425 * an established socket here.
427 BUG_TRAP(req->sk == NULL);
429 if (seq != tcp_rsk(req)->snt_isn) {
430 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
434 inet_csk_reqsk_queue_drop(sk, req, prev);
438 case TCP_SYN_RECV: /* Cannot happen.
439 It can, it SYNs are crossed. --ANK */
440 if (!sock_owned_by_user(sk)) {
442 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
446 sk->sk_err_soft = err;
450 if (!sock_owned_by_user(sk) && np->recverr) {
452 sk->sk_error_report(sk);
454 sk->sk_err_soft = err;
462 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
464 struct inet6_request_sock *treq = inet6_rsk(req);
465 struct ipv6_pinfo *np = inet6_sk(sk);
466 struct sk_buff * skb;
467 struct ipv6_txoptions *opt = NULL;
468 struct in6_addr * final_p = NULL, final;
470 struct dst_entry *dst;
473 memset(&fl, 0, sizeof(fl));
474 fl.proto = IPPROTO_TCP;
475 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
476 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
477 fl.fl6_flowlabel = 0;
479 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
480 fl.fl_ip_sport = inet_sk(sk)->sport;
481 security_req_classify_flow(req, &fl);
484 if (opt && opt->srcrt) {
485 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
486 ipv6_addr_copy(&final, &fl.fl6_dst);
487 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
491 err = ip6_dst_lookup(sk, &dst, &fl);
495 ipv6_addr_copy(&fl.fl6_dst, final_p);
496 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
499 skb = tcp_make_synack(sk, dst, req);
501 struct tcphdr *th = tcp_hdr(skb);
503 th->check = tcp_v6_check(th, skb->len,
504 &treq->loc_addr, &treq->rmt_addr,
505 csum_partial((char *)th, skb->len, skb->csum));
507 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
508 err = ip6_xmit(sk, skb, &fl, opt, 0);
509 err = net_xmit_eval(err);
513 if (opt && opt != np->opt)
514 sock_kfree_s(sk, opt, opt->tot_len);
519 static inline void syn_flood_warning(struct sk_buff *skb)
521 #ifdef CONFIG_SYN_COOKIES
522 if (sysctl_tcp_syncookies)
524 "TCPv6: Possible SYN flooding on port %d. "
525 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
529 "TCPv6: Possible SYN flooding on port %d. "
530 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
533 static void tcp_v6_reqsk_destructor(struct request_sock *req)
535 if (inet6_rsk(req)->pktopts)
536 kfree_skb(inet6_rsk(req)->pktopts);
539 #ifdef CONFIG_TCP_MD5SIG
540 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
541 struct in6_addr *addr)
543 struct tcp_sock *tp = tcp_sk(sk);
548 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
551 for (i = 0; i < tp->md5sig_info->entries6; i++) {
552 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
553 return &tp->md5sig_info->keys6[i].base;
558 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
559 struct sock *addr_sk)
561 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
564 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
565 struct request_sock *req)
567 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
570 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
571 char *newkey, u8 newkeylen)
573 /* Add key to the list */
574 struct tcp_md5sig_key *key;
575 struct tcp_sock *tp = tcp_sk(sk);
576 struct tcp6_md5sig_key *keys;
578 key = tcp_v6_md5_do_lookup(sk, peer);
580 /* modify existing entry - just update that one */
583 key->keylen = newkeylen;
585 /* reallocate new list if current one is full. */
586 if (!tp->md5sig_info) {
587 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
588 if (!tp->md5sig_info) {
592 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
594 if (tcp_alloc_md5sig_pool() == NULL) {
598 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
599 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
600 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
603 tcp_free_md5sig_pool();
608 if (tp->md5sig_info->entries6)
609 memmove(keys, tp->md5sig_info->keys6,
610 (sizeof (tp->md5sig_info->keys6[0]) *
611 tp->md5sig_info->entries6));
613 kfree(tp->md5sig_info->keys6);
614 tp->md5sig_info->keys6 = keys;
615 tp->md5sig_info->alloced6++;
618 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
620 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
621 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
623 tp->md5sig_info->entries6++;
628 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
629 u8 *newkey, __u8 newkeylen)
631 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
635 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
637 struct tcp_sock *tp = tcp_sk(sk);
640 for (i = 0; i < tp->md5sig_info->entries6; i++) {
641 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
643 kfree(tp->md5sig_info->keys6[i].base.key);
644 tp->md5sig_info->entries6--;
646 if (tp->md5sig_info->entries6 == 0) {
647 kfree(tp->md5sig_info->keys6);
648 tp->md5sig_info->keys6 = NULL;
649 tp->md5sig_info->alloced6 = 0;
651 /* shrink the database */
652 if (tp->md5sig_info->entries6 != i)
653 memmove(&tp->md5sig_info->keys6[i],
654 &tp->md5sig_info->keys6[i+1],
655 (tp->md5sig_info->entries6 - i)
656 * sizeof (tp->md5sig_info->keys6[0]));
658 tcp_free_md5sig_pool();
665 static void tcp_v6_clear_md5_list (struct sock *sk)
667 struct tcp_sock *tp = tcp_sk(sk);
670 if (tp->md5sig_info->entries6) {
671 for (i = 0; i < tp->md5sig_info->entries6; i++)
672 kfree(tp->md5sig_info->keys6[i].base.key);
673 tp->md5sig_info->entries6 = 0;
674 tcp_free_md5sig_pool();
677 kfree(tp->md5sig_info->keys6);
678 tp->md5sig_info->keys6 = NULL;
679 tp->md5sig_info->alloced6 = 0;
681 if (tp->md5sig_info->entries4) {
682 for (i = 0; i < tp->md5sig_info->entries4; i++)
683 kfree(tp->md5sig_info->keys4[i].base.key);
684 tp->md5sig_info->entries4 = 0;
685 tcp_free_md5sig_pool();
688 kfree(tp->md5sig_info->keys4);
689 tp->md5sig_info->keys4 = NULL;
690 tp->md5sig_info->alloced4 = 0;
693 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
696 struct tcp_md5sig cmd;
697 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
700 if (optlen < sizeof(cmd))
703 if (copy_from_user(&cmd, optval, sizeof(cmd)))
706 if (sin6->sin6_family != AF_INET6)
709 if (!cmd.tcpm_keylen) {
710 if (!tcp_sk(sk)->md5sig_info)
712 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
713 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
714 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
717 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
720 if (!tcp_sk(sk)->md5sig_info) {
721 struct tcp_sock *tp = tcp_sk(sk);
722 struct tcp_md5sig_info *p;
724 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
729 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
732 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
735 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
736 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
737 newkey, cmd.tcpm_keylen);
739 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
742 static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
743 struct in6_addr *saddr,
744 struct in6_addr *daddr,
745 struct tcphdr *th, unsigned int tcplen)
747 struct tcp_md5sig_pool *hp;
748 struct tcp6_pseudohdr *bp;
751 hp = tcp_get_md5sig_pool();
753 printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
754 goto clear_hash_noput;
757 bp = &hp->md5_blk.ip6;
759 /* 1. TCP pseudo-header (RFC2460) */
760 ipv6_addr_copy(&bp->saddr, saddr);
761 ipv6_addr_copy(&bp->daddr, daddr);
762 bp->len = htonl(tcplen);
763 bp->protocol = htonl(IPPROTO_TCP);
765 err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
771 /* Free up the crypto pool */
772 tcp_put_md5sig_pool();
776 tcp_put_md5sig_pool();
778 memset(md5_hash, 0, 16);
782 static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
784 struct dst_entry *dst,
785 struct request_sock *req,
786 struct tcphdr *th, unsigned int tcplen)
788 struct in6_addr *saddr, *daddr;
791 saddr = &inet6_sk(sk)->saddr;
792 daddr = &inet6_sk(sk)->daddr;
794 saddr = &inet6_rsk(req)->loc_addr;
795 daddr = &inet6_rsk(req)->rmt_addr;
797 return tcp_v6_do_calc_md5_hash(md5_hash, key,
802 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
804 __u8 *hash_location = NULL;
805 struct tcp_md5sig_key *hash_expected;
806 struct ipv6hdr *ip6h = ipv6_hdr(skb);
807 struct tcphdr *th = tcp_hdr(skb);
811 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
812 hash_location = tcp_parse_md5sig_option(th);
814 /* do we have a hash as expected? */
815 if (!hash_expected) {
818 if (net_ratelimit()) {
819 printk(KERN_INFO "MD5 Hash NOT expected but found "
820 "(" NIP6_FMT ", %u)->"
821 "(" NIP6_FMT ", %u)\n",
822 NIP6(ip6h->saddr), ntohs(th->source),
823 NIP6(ip6h->daddr), ntohs(th->dest));
828 if (!hash_location) {
829 if (net_ratelimit()) {
830 printk(KERN_INFO "MD5 Hash expected but NOT found "
831 "(" NIP6_FMT ", %u)->"
832 "(" NIP6_FMT ", %u)\n",
833 NIP6(ip6h->saddr), ntohs(th->source),
834 NIP6(ip6h->daddr), ntohs(th->dest));
839 /* check the signature */
840 genhash = tcp_v6_do_calc_md5_hash(newhash,
842 &ip6h->saddr, &ip6h->daddr,
844 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
845 if (net_ratelimit()) {
846 printk(KERN_INFO "MD5 Hash %s for "
847 "(" NIP6_FMT ", %u)->"
848 "(" NIP6_FMT ", %u)\n",
849 genhash ? "failed" : "mismatch",
850 NIP6(ip6h->saddr), ntohs(th->source),
851 NIP6(ip6h->daddr), ntohs(th->dest));
859 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
861 .obj_size = sizeof(struct tcp6_request_sock),
862 .rtx_syn_ack = tcp_v6_send_synack,
863 .send_ack = tcp_v6_reqsk_send_ack,
864 .destructor = tcp_v6_reqsk_destructor,
865 .send_reset = tcp_v6_send_reset
868 #ifdef CONFIG_TCP_MD5SIG
869 static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
870 .md5_lookup = tcp_v6_reqsk_md5_lookup,
874 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
875 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
876 .twsk_unique = tcp_twsk_unique,
877 .twsk_destructor= tcp_twsk_destructor,
880 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
882 struct ipv6_pinfo *np = inet6_sk(sk);
883 struct tcphdr *th = tcp_hdr(skb);
885 if (skb->ip_summed == CHECKSUM_PARTIAL) {
886 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
887 skb->csum_start = skb_transport_header(skb) - skb->head;
888 skb->csum_offset = offsetof(struct tcphdr, check);
890 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
891 csum_partial((char *)th, th->doff<<2,
896 static int tcp_v6_gso_send_check(struct sk_buff *skb)
898 struct ipv6hdr *ipv6h;
901 if (!pskb_may_pull(skb, sizeof(*th)))
904 ipv6h = ipv6_hdr(skb);
908 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
910 skb->csum_start = skb_transport_header(skb) - skb->head;
911 skb->csum_offset = offsetof(struct tcphdr, check);
912 skb->ip_summed = CHECKSUM_PARTIAL;
916 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
918 struct tcphdr *th = tcp_hdr(skb), *t1;
919 struct sk_buff *buff;
921 struct net *net = dev_net(skb->dst->dev);
922 struct sock *ctl_sk = net->ipv6.tcp_sk;
923 unsigned int tot_len = sizeof(*th);
924 #ifdef CONFIG_TCP_MD5SIG
925 struct tcp_md5sig_key *key;
931 if (!ipv6_unicast_destination(skb))
934 #ifdef CONFIG_TCP_MD5SIG
936 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
941 tot_len += TCPOLEN_MD5SIG_ALIGNED;
945 * We need to grab some memory, and put together an RST,
946 * and then put it into the queue to be sent.
949 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
954 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
956 t1 = (struct tcphdr *) skb_push(buff, tot_len);
958 /* Swap the send and the receive. */
959 memset(t1, 0, sizeof(*t1));
960 t1->dest = th->source;
961 t1->source = th->dest;
962 t1->doff = tot_len / 4;
966 t1->seq = th->ack_seq;
969 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
970 + skb->len - (th->doff<<2));
973 #ifdef CONFIG_TCP_MD5SIG
975 __be32 *opt = (__be32*)(t1 + 1);
976 opt[0] = htonl((TCPOPT_NOP << 24) |
978 (TCPOPT_MD5SIG << 8) |
980 tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
981 &ipv6_hdr(skb)->daddr,
982 &ipv6_hdr(skb)->saddr,
987 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
989 memset(&fl, 0, sizeof(fl));
990 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
991 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
993 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
994 sizeof(*t1), IPPROTO_TCP,
997 fl.proto = IPPROTO_TCP;
998 fl.oif = inet6_iif(skb);
999 fl.fl_ip_dport = t1->dest;
1000 fl.fl_ip_sport = t1->source;
1001 security_skb_classify_flow(skb, &fl);
1003 /* Pass a socket to ip6_dst_lookup either it is for RST
1004 * Underlying function will use this to retrieve the network
1007 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1009 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1010 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1011 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1012 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1020 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1021 struct tcp_md5sig_key *key)
1023 struct tcphdr *th = tcp_hdr(skb), *t1;
1024 struct sk_buff *buff;
1026 struct net *net = dev_net(skb->dev);
1027 struct sock *ctl_sk = net->ipv6.tcp_sk;
1028 unsigned int tot_len = sizeof(struct tcphdr);
1032 tot_len += TCPOLEN_TSTAMP_ALIGNED;
1033 #ifdef CONFIG_TCP_MD5SIG
1035 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1038 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1043 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1045 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1047 /* Swap the send and the receive. */
1048 memset(t1, 0, sizeof(*t1));
1049 t1->dest = th->source;
1050 t1->source = th->dest;
1051 t1->doff = tot_len/4;
1052 t1->seq = htonl(seq);
1053 t1->ack_seq = htonl(ack);
1055 t1->window = htons(win);
1057 topt = (__be32 *)(t1 + 1);
1060 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1061 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1062 *topt++ = htonl(tcp_time_stamp);
1066 #ifdef CONFIG_TCP_MD5SIG
1068 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1069 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1070 tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
1071 &ipv6_hdr(skb)->daddr,
1072 &ipv6_hdr(skb)->saddr,
1077 buff->csum = csum_partial((char *)t1, tot_len, 0);
1079 memset(&fl, 0, sizeof(fl));
1080 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1081 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1083 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1084 tot_len, IPPROTO_TCP,
1087 fl.proto = IPPROTO_TCP;
1088 fl.oif = inet6_iif(skb);
1089 fl.fl_ip_dport = t1->dest;
1090 fl.fl_ip_sport = t1->source;
1091 security_skb_classify_flow(skb, &fl);
1093 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1094 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1095 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1096 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1104 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1106 struct inet_timewait_sock *tw = inet_twsk(sk);
1107 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1109 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1110 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1111 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1116 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1118 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1119 tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr));
1123 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1125 struct request_sock *req, **prev;
1126 const struct tcphdr *th = tcp_hdr(skb);
1129 /* Find possible connection requests. */
1130 req = inet6_csk_search_req(sk, &prev, th->source,
1131 &ipv6_hdr(skb)->saddr,
1132 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1134 return tcp_check_req(sk, skb, req, prev);
1136 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1137 &ipv6_hdr(skb)->saddr, th->source,
1138 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1141 if (nsk->sk_state != TCP_TIME_WAIT) {
1145 inet_twsk_put(inet_twsk(nsk));
1149 #ifdef CONFIG_SYN_COOKIES
1150 if (!th->rst && !th->syn && th->ack)
1151 sk = cookie_v6_check(sk, skb);
1156 /* FIXME: this is substantially similar to the ipv4 code.
1157 * Can some kind of merge be done? -- erics
1159 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1161 struct inet6_request_sock *treq;
1162 struct ipv6_pinfo *np = inet6_sk(sk);
1163 struct tcp_options_received tmp_opt;
1164 struct tcp_sock *tp = tcp_sk(sk);
1165 struct request_sock *req = NULL;
1166 __u32 isn = TCP_SKB_CB(skb)->when;
1167 #ifdef CONFIG_SYN_COOKIES
1168 int want_cookie = 0;
1170 #define want_cookie 0
1173 if (skb->protocol == htons(ETH_P_IP))
1174 return tcp_v4_conn_request(sk, skb);
1176 if (!ipv6_unicast_destination(skb))
1179 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1180 if (net_ratelimit())
1181 syn_flood_warning(skb);
1182 #ifdef CONFIG_SYN_COOKIES
1183 if (sysctl_tcp_syncookies)
1190 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1193 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1197 #ifdef CONFIG_TCP_MD5SIG
1198 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1201 tcp_clear_options(&tmp_opt);
1202 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1203 tmp_opt.user_mss = tp->rx_opt.user_mss;
1205 tcp_parse_options(skb, &tmp_opt, 0);
1207 if (want_cookie && !tmp_opt.saw_tstamp)
1208 tcp_clear_options(&tmp_opt);
1210 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1211 tcp_openreq_init(req, &tmp_opt, skb);
1213 treq = inet6_rsk(req);
1214 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1215 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1216 treq->pktopts = NULL;
1218 TCP_ECN_create_request(req, tcp_hdr(skb));
1221 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1222 req->cookie_ts = tmp_opt.tstamp_ok;
1224 if (ipv6_opt_accepted(sk, skb) ||
1225 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1226 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1227 atomic_inc(&skb->users);
1228 treq->pktopts = skb;
1230 treq->iif = sk->sk_bound_dev_if;
1232 /* So that link locals have meaning */
1233 if (!sk->sk_bound_dev_if &&
1234 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1235 treq->iif = inet6_iif(skb);
1237 isn = tcp_v6_init_sequence(skb);
1240 tcp_rsk(req)->snt_isn = isn;
1242 security_inet_conn_request(sk, skb, req);
1244 if (tcp_v6_send_synack(sk, req))
1248 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1256 return 0; /* don't send reset */
1259 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1260 struct request_sock *req,
1261 struct dst_entry *dst)
1263 struct inet6_request_sock *treq = inet6_rsk(req);
1264 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1265 struct tcp6_sock *newtcp6sk;
1266 struct inet_sock *newinet;
1267 struct tcp_sock *newtp;
1269 struct ipv6_txoptions *opt;
1270 #ifdef CONFIG_TCP_MD5SIG
1271 struct tcp_md5sig_key *key;
1274 if (skb->protocol == htons(ETH_P_IP)) {
1279 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1284 newtcp6sk = (struct tcp6_sock *)newsk;
1285 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1287 newinet = inet_sk(newsk);
1288 newnp = inet6_sk(newsk);
1289 newtp = tcp_sk(newsk);
1291 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1296 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1299 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1301 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1302 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1303 #ifdef CONFIG_TCP_MD5SIG
1304 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1307 newnp->pktoptions = NULL;
1309 newnp->mcast_oif = inet6_iif(skb);
1310 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1313 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1314 * here, tcp_create_openreq_child now does this for us, see the comment in
1315 * that function for the gory details. -acme
1318 /* It is tricky place. Until this moment IPv4 tcp
1319 worked with IPv6 icsk.icsk_af_ops.
1322 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1329 if (sk_acceptq_is_full(sk))
1333 struct in6_addr *final_p = NULL, final;
1336 memset(&fl, 0, sizeof(fl));
1337 fl.proto = IPPROTO_TCP;
1338 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1339 if (opt && opt->srcrt) {
1340 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1341 ipv6_addr_copy(&final, &fl.fl6_dst);
1342 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1345 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1346 fl.oif = sk->sk_bound_dev_if;
1347 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1348 fl.fl_ip_sport = inet_sk(sk)->sport;
1349 security_req_classify_flow(req, &fl);
1351 if (ip6_dst_lookup(sk, &dst, &fl))
1355 ipv6_addr_copy(&fl.fl6_dst, final_p);
1357 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1361 newsk = tcp_create_openreq_child(sk, req, skb);
1366 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1367 * count here, tcp_create_openreq_child now does this for us, see the
1368 * comment in that function for the gory details. -acme
1371 newsk->sk_gso_type = SKB_GSO_TCPV6;
1372 __ip6_dst_store(newsk, dst, NULL, NULL);
1374 newtcp6sk = (struct tcp6_sock *)newsk;
1375 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1377 newtp = tcp_sk(newsk);
1378 newinet = inet_sk(newsk);
1379 newnp = inet6_sk(newsk);
1381 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1383 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1384 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1385 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1386 newsk->sk_bound_dev_if = treq->iif;
1388 /* Now IPv6 options...
1390 First: no IPv4 options.
1392 newinet->opt = NULL;
1393 newnp->ipv6_fl_list = NULL;
1396 newnp->rxopt.all = np->rxopt.all;
1398 /* Clone pktoptions received with SYN */
1399 newnp->pktoptions = NULL;
1400 if (treq->pktopts != NULL) {
1401 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1402 kfree_skb(treq->pktopts);
1403 treq->pktopts = NULL;
1404 if (newnp->pktoptions)
1405 skb_set_owner_r(newnp->pktoptions, newsk);
1408 newnp->mcast_oif = inet6_iif(skb);
1409 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1411 /* Clone native IPv6 options from listening socket (if any)
1413 Yes, keeping reference count would be much more clever,
1414 but we make one more one thing there: reattach optmem
1418 newnp->opt = ipv6_dup_options(newsk, opt);
1420 sock_kfree_s(sk, opt, opt->tot_len);
1423 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1425 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1426 newnp->opt->opt_flen);
1428 tcp_mtup_init(newsk);
1429 tcp_sync_mss(newsk, dst_mtu(dst));
1430 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1431 tcp_initialize_rcv_mss(newsk);
1433 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1435 #ifdef CONFIG_TCP_MD5SIG
1436 /* Copy over the MD5 key from the original socket */
1437 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1438 /* We're using one, so create a matching key
1439 * on the newsk structure. If we fail to get
1440 * memory, then we end up not copying the key
1443 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1445 tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr,
1446 newkey, key->keylen);
1450 __inet6_hash(newsk);
1451 __inet_inherit_port(sk, newsk);
1456 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1458 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1459 if (opt && opt != np->opt)
1460 sock_kfree_s(sk, opt, opt->tot_len);
1465 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1467 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1468 if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
1469 &ipv6_hdr(skb)->daddr, skb->csum)) {
1470 skb->ip_summed = CHECKSUM_UNNECESSARY;
1475 skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
1476 &ipv6_hdr(skb)->saddr,
1477 &ipv6_hdr(skb)->daddr, 0));
1479 if (skb->len <= 76) {
1480 return __skb_checksum_complete(skb);
1485 /* The socket must have it's spinlock held when we get
1488 * We have a potential double-lock case here, so even when
1489 * doing backlog processing we use the BH locking scheme.
1490 * This is because we cannot sleep with the original spinlock
1493 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1495 struct ipv6_pinfo *np = inet6_sk(sk);
1496 struct tcp_sock *tp;
1497 struct sk_buff *opt_skb = NULL;
1499 /* Imagine: socket is IPv6. IPv4 packet arrives,
1500 goes to IPv4 receive handler and backlogged.
1501 From backlog it always goes here. Kerboom...
1502 Fortunately, tcp_rcv_established and rcv_established
1503 handle them correctly, but it is not case with
1504 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1507 if (skb->protocol == htons(ETH_P_IP))
1508 return tcp_v4_do_rcv(sk, skb);
1510 #ifdef CONFIG_TCP_MD5SIG
1511 if (tcp_v6_inbound_md5_hash (sk, skb))
1515 if (sk_filter(sk, skb))
1519 * socket locking is here for SMP purposes as backlog rcv
1520 * is currently called with bh processing disabled.
1523 /* Do Stevens' IPV6_PKTOPTIONS.
1525 Yes, guys, it is the only place in our code, where we
1526 may make it not affecting IPv4.
1527 The rest of code is protocol independent,
1528 and I do not like idea to uglify IPv4.
1530 Actually, all the idea behind IPV6_PKTOPTIONS
1531 looks not very well thought. For now we latch
1532 options, received in the last packet, enqueued
1533 by tcp. Feel free to propose better solution.
1537 opt_skb = skb_clone(skb, GFP_ATOMIC);
1539 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1540 TCP_CHECK_TIMER(sk);
1541 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1543 TCP_CHECK_TIMER(sk);
1545 goto ipv6_pktoptions;
1549 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1552 if (sk->sk_state == TCP_LISTEN) {
1553 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1558 * Queue it on the new socket if the new socket is active,
1559 * otherwise we just shortcircuit this and continue with
1563 if (tcp_child_process(sk, nsk, skb))
1566 __kfree_skb(opt_skb);
1571 TCP_CHECK_TIMER(sk);
1572 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1574 TCP_CHECK_TIMER(sk);
1576 goto ipv6_pktoptions;
1580 tcp_v6_send_reset(sk, skb);
1583 __kfree_skb(opt_skb);
1587 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1592 /* Do you ask, what is it?
1594 1. skb was enqueued by tcp.
1595 2. skb is added to tail of read queue, rather than out of order.
1596 3. socket is not in passive state.
1597 4. Finally, it really contains options, which user wants to receive.
1600 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1601 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1602 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1603 np->mcast_oif = inet6_iif(opt_skb);
1604 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1605 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1606 if (ipv6_opt_accepted(sk, opt_skb)) {
1607 skb_set_owner_r(opt_skb, sk);
1608 opt_skb = xchg(&np->pktoptions, opt_skb);
1610 __kfree_skb(opt_skb);
1611 opt_skb = xchg(&np->pktoptions, NULL);
1620 static int tcp_v6_rcv(struct sk_buff *skb)
1626 if (skb->pkt_type != PACKET_HOST)
1630 * Count it even if it's bad.
1632 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1634 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1639 if (th->doff < sizeof(struct tcphdr)/4)
1641 if (!pskb_may_pull(skb, th->doff*4))
1644 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1648 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1649 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1650 skb->len - th->doff*4);
1651 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1652 TCP_SKB_CB(skb)->when = 0;
1653 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1654 TCP_SKB_CB(skb)->sacked = 0;
1656 sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo,
1657 &ipv6_hdr(skb)->saddr, th->source,
1658 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
1665 if (sk->sk_state == TCP_TIME_WAIT)
1668 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1669 goto discard_and_relse;
1671 if (sk_filter(sk, skb))
1672 goto discard_and_relse;
1676 bh_lock_sock_nested(sk);
1678 if (!sock_owned_by_user(sk)) {
1679 #ifdef CONFIG_NET_DMA
1680 struct tcp_sock *tp = tcp_sk(sk);
1681 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1682 tp->ucopy.dma_chan = get_softnet_dma();
1683 if (tp->ucopy.dma_chan)
1684 ret = tcp_v6_do_rcv(sk, skb);
1688 if (!tcp_prequeue(sk, skb))
1689 ret = tcp_v6_do_rcv(sk, skb);
1692 sk_add_backlog(sk, skb);
1696 return ret ? -1 : 0;
1699 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1702 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1704 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1706 tcp_v6_send_reset(NULL, skb);
1723 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1724 inet_twsk_put(inet_twsk(sk));
1728 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1729 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1730 inet_twsk_put(inet_twsk(sk));
1734 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1739 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1740 &ipv6_hdr(skb)->daddr,
1741 ntohs(th->dest), inet6_iif(skb));
1743 struct inet_timewait_sock *tw = inet_twsk(sk);
1744 inet_twsk_deschedule(tw, &tcp_death_row);
1749 /* Fall through to ACK */
1752 tcp_v6_timewait_ack(sk, skb);
1756 case TCP_TW_SUCCESS:;
1761 static int tcp_v6_remember_stamp(struct sock *sk)
1763 /* Alas, not yet... */
1767 static struct inet_connection_sock_af_ops ipv6_specific = {
1768 .queue_xmit = inet6_csk_xmit,
1769 .send_check = tcp_v6_send_check,
1770 .rebuild_header = inet6_sk_rebuild_header,
1771 .conn_request = tcp_v6_conn_request,
1772 .syn_recv_sock = tcp_v6_syn_recv_sock,
1773 .remember_stamp = tcp_v6_remember_stamp,
1774 .net_header_len = sizeof(struct ipv6hdr),
1775 .setsockopt = ipv6_setsockopt,
1776 .getsockopt = ipv6_getsockopt,
1777 .addr2sockaddr = inet6_csk_addr2sockaddr,
1778 .sockaddr_len = sizeof(struct sockaddr_in6),
1779 .bind_conflict = inet6_csk_bind_conflict,
1780 #ifdef CONFIG_COMPAT
1781 .compat_setsockopt = compat_ipv6_setsockopt,
1782 .compat_getsockopt = compat_ipv6_getsockopt,
1786 #ifdef CONFIG_TCP_MD5SIG
1787 static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1788 .md5_lookup = tcp_v6_md5_lookup,
1789 .calc_md5_hash = tcp_v6_calc_md5_hash,
1790 .md5_add = tcp_v6_md5_add_func,
1791 .md5_parse = tcp_v6_parse_md5_keys,
1796 * TCP over IPv4 via INET6 API
1799 static struct inet_connection_sock_af_ops ipv6_mapped = {
1800 .queue_xmit = ip_queue_xmit,
1801 .send_check = tcp_v4_send_check,
1802 .rebuild_header = inet_sk_rebuild_header,
1803 .conn_request = tcp_v6_conn_request,
1804 .syn_recv_sock = tcp_v6_syn_recv_sock,
1805 .remember_stamp = tcp_v4_remember_stamp,
1806 .net_header_len = sizeof(struct iphdr),
1807 .setsockopt = ipv6_setsockopt,
1808 .getsockopt = ipv6_getsockopt,
1809 .addr2sockaddr = inet6_csk_addr2sockaddr,
1810 .sockaddr_len = sizeof(struct sockaddr_in6),
1811 .bind_conflict = inet6_csk_bind_conflict,
1812 #ifdef CONFIG_COMPAT
1813 .compat_setsockopt = compat_ipv6_setsockopt,
1814 .compat_getsockopt = compat_ipv6_getsockopt,
1818 #ifdef CONFIG_TCP_MD5SIG
1819 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1820 .md5_lookup = tcp_v4_md5_lookup,
1821 .calc_md5_hash = tcp_v4_calc_md5_hash,
1822 .md5_add = tcp_v6_md5_add_func,
1823 .md5_parse = tcp_v6_parse_md5_keys,
1827 /* NOTE: A lot of things set to zero explicitly by call to
1828 * sk_alloc() so need not be done here.
1830 static int tcp_v6_init_sock(struct sock *sk)
1832 struct inet_connection_sock *icsk = inet_csk(sk);
1833 struct tcp_sock *tp = tcp_sk(sk);
1835 skb_queue_head_init(&tp->out_of_order_queue);
1836 tcp_init_xmit_timers(sk);
1837 tcp_prequeue_init(tp);
1839 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1840 tp->mdev = TCP_TIMEOUT_INIT;
1842 /* So many TCP implementations out there (incorrectly) count the
1843 * initial SYN frame in their delayed-ACK and congestion control
1844 * algorithms that we must have the following bandaid to talk
1845 * efficiently to them. -DaveM
1849 /* See draft-stevens-tcpca-spec-01 for discussion of the
1850 * initialization of these values.
1852 tp->snd_ssthresh = 0x7fffffff;
1853 tp->snd_cwnd_clamp = ~0;
1854 tp->mss_cache = 536;
1856 tp->reordering = sysctl_tcp_reordering;
1858 sk->sk_state = TCP_CLOSE;
1860 icsk->icsk_af_ops = &ipv6_specific;
1861 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1862 icsk->icsk_sync_mss = tcp_sync_mss;
1863 sk->sk_write_space = sk_stream_write_space;
1864 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1866 #ifdef CONFIG_TCP_MD5SIG
1867 tp->af_specific = &tcp_sock_ipv6_specific;
1870 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1871 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1873 atomic_inc(&tcp_sockets_allocated);
1878 static int tcp_v6_destroy_sock(struct sock *sk)
1880 #ifdef CONFIG_TCP_MD5SIG
1881 /* Clean up the MD5 key list */
1882 if (tcp_sk(sk)->md5sig_info)
1883 tcp_v6_clear_md5_list(sk);
1885 tcp_v4_destroy_sock(sk);
1886 return inet6_destroy_sock(sk);
1889 #ifdef CONFIG_PROC_FS
1890 /* Proc filesystem TCPv6 sock list dumping. */
1891 static void get_openreq6(struct seq_file *seq,
1892 struct sock *sk, struct request_sock *req, int i, int uid)
1894 int ttd = req->expires - jiffies;
1895 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1896 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1902 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1903 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1905 src->s6_addr32[0], src->s6_addr32[1],
1906 src->s6_addr32[2], src->s6_addr32[3],
1907 ntohs(inet_sk(sk)->sport),
1908 dest->s6_addr32[0], dest->s6_addr32[1],
1909 dest->s6_addr32[2], dest->s6_addr32[3],
1910 ntohs(inet_rsk(req)->rmt_port),
1912 0,0, /* could print option size, but that is af dependent. */
1913 1, /* timers active (only the expire timer) */
1914 jiffies_to_clock_t(ttd),
1917 0, /* non standard timer */
1918 0, /* open_requests have no inode */
1922 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1924 struct in6_addr *dest, *src;
1927 unsigned long timer_expires;
1928 struct inet_sock *inet = inet_sk(sp);
1929 struct tcp_sock *tp = tcp_sk(sp);
1930 const struct inet_connection_sock *icsk = inet_csk(sp);
1931 struct ipv6_pinfo *np = inet6_sk(sp);
1934 src = &np->rcv_saddr;
1935 destp = ntohs(inet->dport);
1936 srcp = ntohs(inet->sport);
1938 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1940 timer_expires = icsk->icsk_timeout;
1941 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1943 timer_expires = icsk->icsk_timeout;
1944 } else if (timer_pending(&sp->sk_timer)) {
1946 timer_expires = sp->sk_timer.expires;
1949 timer_expires = jiffies;
1953 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1954 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1956 src->s6_addr32[0], src->s6_addr32[1],
1957 src->s6_addr32[2], src->s6_addr32[3], srcp,
1958 dest->s6_addr32[0], dest->s6_addr32[1],
1959 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1961 tp->write_seq-tp->snd_una,
1962 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1964 jiffies_to_clock_t(timer_expires - jiffies),
1965 icsk->icsk_retransmits,
1967 icsk->icsk_probes_out,
1969 atomic_read(&sp->sk_refcnt), sp,
1972 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1973 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1977 static void get_timewait6_sock(struct seq_file *seq,
1978 struct inet_timewait_sock *tw, int i)
1980 struct in6_addr *dest, *src;
1982 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1983 int ttd = tw->tw_ttd - jiffies;
1988 dest = &tw6->tw_v6_daddr;
1989 src = &tw6->tw_v6_rcv_saddr;
1990 destp = ntohs(tw->tw_dport);
1991 srcp = ntohs(tw->tw_sport);
1994 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1995 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1997 src->s6_addr32[0], src->s6_addr32[1],
1998 src->s6_addr32[2], src->s6_addr32[3], srcp,
1999 dest->s6_addr32[0], dest->s6_addr32[1],
2000 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2001 tw->tw_substate, 0, 0,
2002 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2003 atomic_read(&tw->tw_refcnt), tw);
2006 static int tcp6_seq_show(struct seq_file *seq, void *v)
2008 struct tcp_iter_state *st;
2010 if (v == SEQ_START_TOKEN) {
2015 "st tx_queue rx_queue tr tm->when retrnsmt"
2016 " uid timeout inode\n");
2021 switch (st->state) {
2022 case TCP_SEQ_STATE_LISTENING:
2023 case TCP_SEQ_STATE_ESTABLISHED:
2024 get_tcp6_sock(seq, v, st->num);
2026 case TCP_SEQ_STATE_OPENREQ:
2027 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2029 case TCP_SEQ_STATE_TIME_WAIT:
2030 get_timewait6_sock(seq, v, st->num);
2037 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2041 .owner = THIS_MODULE,
2044 .show = tcp6_seq_show,
2048 int tcp6_proc_init(struct net *net)
2050 return tcp_proc_register(net, &tcp6_seq_afinfo);
2053 void tcp6_proc_exit(struct net *net)
2055 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2059 struct proto tcpv6_prot = {
2061 .owner = THIS_MODULE,
2063 .connect = tcp_v6_connect,
2064 .disconnect = tcp_disconnect,
2065 .accept = inet_csk_accept,
2067 .init = tcp_v6_init_sock,
2068 .destroy = tcp_v6_destroy_sock,
2069 .shutdown = tcp_shutdown,
2070 .setsockopt = tcp_setsockopt,
2071 .getsockopt = tcp_getsockopt,
2072 .recvmsg = tcp_recvmsg,
2073 .backlog_rcv = tcp_v6_do_rcv,
2074 .hash = tcp_v6_hash,
2075 .unhash = inet_unhash,
2076 .get_port = inet_csk_get_port,
2077 .enter_memory_pressure = tcp_enter_memory_pressure,
2078 .sockets_allocated = &tcp_sockets_allocated,
2079 .memory_allocated = &tcp_memory_allocated,
2080 .memory_pressure = &tcp_memory_pressure,
2081 .orphan_count = &tcp_orphan_count,
2082 .sysctl_mem = sysctl_tcp_mem,
2083 .sysctl_wmem = sysctl_tcp_wmem,
2084 .sysctl_rmem = sysctl_tcp_rmem,
2085 .max_header = MAX_TCP_HEADER,
2086 .obj_size = sizeof(struct tcp6_sock),
2087 .twsk_prot = &tcp6_timewait_sock_ops,
2088 .rsk_prot = &tcp6_request_sock_ops,
2089 .h.hashinfo = &tcp_hashinfo,
2090 #ifdef CONFIG_COMPAT
2091 .compat_setsockopt = compat_tcp_setsockopt,
2092 .compat_getsockopt = compat_tcp_getsockopt,
2096 static struct inet6_protocol tcpv6_protocol = {
2097 .handler = tcp_v6_rcv,
2098 .err_handler = tcp_v6_err,
2099 .gso_send_check = tcp_v6_gso_send_check,
2100 .gso_segment = tcp_tso_segment,
2101 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2104 static struct inet_protosw tcpv6_protosw = {
2105 .type = SOCK_STREAM,
2106 .protocol = IPPROTO_TCP,
2107 .prot = &tcpv6_prot,
2108 .ops = &inet6_stream_ops,
2111 .flags = INET_PROTOSW_PERMANENT |
2115 static int tcpv6_net_init(struct net *net)
2117 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2118 SOCK_RAW, IPPROTO_TCP, net);
2121 static void tcpv6_net_exit(struct net *net)
2123 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2126 static struct pernet_operations tcpv6_net_ops = {
2127 .init = tcpv6_net_init,
2128 .exit = tcpv6_net_exit,
2131 int __init tcpv6_init(void)
2135 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2139 /* register inet6 protocol */
2140 ret = inet6_register_protosw(&tcpv6_protosw);
2142 goto out_tcpv6_protocol;
2144 ret = register_pernet_subsys(&tcpv6_net_ops);
2146 goto out_tcpv6_protosw;
2151 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2153 inet6_unregister_protosw(&tcpv6_protosw);
2157 void tcpv6_exit(void)
2159 unregister_pernet_subsys(&tcpv6_net_ops);
2160 inet6_unregister_protosw(&tcpv6_protosw);
2161 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);