3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
99 struct dst_entry *dst = skb_dst(skb);
100 const struct rt6_info *rt = (const struct rt6_info *)dst;
104 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
106 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
109 static void tcp_v6_hash(struct sock *sk)
111 if (sk->sk_state != TCP_CLOSE) {
112 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
117 __inet6_hash(sk, NULL);
122 static __inline__ __sum16 tcp_v6_check(int len,
123 const struct in6_addr *saddr,
124 const struct in6_addr *daddr,
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
132 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
133 ipv6_hdr(skb)->saddr.s6_addr32,
135 tcp_hdr(skb)->source);
138 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
141 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
142 struct inet_sock *inet = inet_sk(sk);
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct ipv6_pinfo *np = inet6_sk(sk);
145 struct tcp_sock *tp = tcp_sk(sk);
146 struct in6_addr *saddr = NULL, *final_p, final;
149 struct dst_entry *dst;
153 if (addr_len < SIN6_LEN_RFC2133)
156 if (usin->sin6_family != AF_INET6)
157 return -EAFNOSUPPORT;
159 memset(&fl6, 0, sizeof(fl6));
162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
163 IP6_ECN_flow_init(fl6.flowlabel);
164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
165 struct ip6_flowlabel *flowlabel;
166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 if (flowlabel == NULL)
169 usin->sin6_addr = flowlabel->dst;
170 fl6_sock_release(flowlabel);
175 * connect() to INADDR_ANY means loopback (BSD'ism).
178 if(ipv6_addr_any(&usin->sin6_addr))
179 usin->sin6_addr.s6_addr[15] = 0x1;
181 addr_type = ipv6_addr_type(&usin->sin6_addr);
183 if(addr_type & IPV6_ADDR_MULTICAST)
186 if (addr_type&IPV6_ADDR_LINKLOCAL) {
187 if (addr_len >= sizeof(struct sockaddr_in6) &&
188 usin->sin6_scope_id) {
189 /* If interface is set while binding, indices
192 if (sk->sk_bound_dev_if &&
193 sk->sk_bound_dev_if != usin->sin6_scope_id)
196 sk->sk_bound_dev_if = usin->sin6_scope_id;
199 /* Connect to link-local address requires an interface */
200 if (!sk->sk_bound_dev_if)
204 if (tp->rx_opt.ts_recent_stamp &&
205 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
206 tp->rx_opt.ts_recent = 0;
207 tp->rx_opt.ts_recent_stamp = 0;
211 np->daddr = usin->sin6_addr;
212 np->flow_label = fl6.flowlabel;
218 if (addr_type == IPV6_ADDR_MAPPED) {
219 u32 exthdrlen = icsk->icsk_ext_hdr_len;
220 struct sockaddr_in sin;
222 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
224 if (__ipv6_only_sock(sk))
227 sin.sin_family = AF_INET;
228 sin.sin_port = usin->sin6_port;
229 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
231 icsk->icsk_af_ops = &ipv6_mapped;
232 sk->sk_backlog_rcv = tcp_v4_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
240 icsk->icsk_ext_hdr_len = exthdrlen;
241 icsk->icsk_af_ops = &ipv6_specific;
242 sk->sk_backlog_rcv = tcp_v6_do_rcv;
243 #ifdef CONFIG_TCP_MD5SIG
244 tp->af_specific = &tcp_sock_ipv6_specific;
248 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
249 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
256 if (!ipv6_addr_any(&np->rcv_saddr))
257 saddr = &np->rcv_saddr;
259 fl6.flowi6_proto = IPPROTO_TCP;
260 fl6.daddr = np->daddr;
261 fl6.saddr = saddr ? *saddr : np->saddr;
262 fl6.flowi6_oif = sk->sk_bound_dev_if;
263 fl6.flowi6_mark = sk->sk_mark;
264 fl6.fl6_dport = usin->sin6_port;
265 fl6.fl6_sport = inet->inet_sport;
267 final_p = fl6_update_dst(&fl6, np->opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
279 np->rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 __ip6_dst_store(sk, dst, NULL, NULL);
289 rt = (struct rt6_info *) dst;
290 if (tcp_death_row.sysctl_tw_recycle &&
291 !tp->rx_opt.ts_recent_stamp &&
292 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
293 tcp_fetch_timewait_stamp(sk, dst);
295 icsk->icsk_ext_hdr_len = 0;
297 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 inet->inet_dport = usin->sin6_port;
304 tcp_set_state(sk, TCP_SYN_SENT);
305 err = inet6_hash_connect(&tcp_death_row, sk);
310 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
315 err = tcp_connect(sk);
322 tcp_set_state(sk, TCP_CLOSE);
325 inet->inet_dport = 0;
326 sk->sk_route_caps = 0;
330 static void tcp_v6_mtu_reduced(struct sock *sk)
332 struct dst_entry *dst;
334 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
337 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
341 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
342 tcp_sync_mss(sk, dst_mtu(dst));
343 tcp_simple_retransmit(sk);
347 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
348 u8 type, u8 code, int offset, __be32 info)
350 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
351 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
352 struct ipv6_pinfo *np;
357 struct net *net = dev_net(skb->dev);
359 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
360 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
363 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
368 if (sk->sk_state == TCP_TIME_WAIT) {
369 inet_twsk_put(inet_twsk(sk));
374 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
375 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
377 if (sk->sk_state == TCP_CLOSE)
380 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
381 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
386 seq = ntohl(th->seq);
387 if (sk->sk_state != TCP_LISTEN &&
388 !between(seq, tp->snd_una, tp->snd_nxt)) {
389 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 if (type == NDISC_REDIRECT) {
396 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
399 dst->ops->redirect(dst, sk, skb);
402 if (type == ICMPV6_PKT_TOOBIG) {
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
440 case TCP_SYN_RECV: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk)) {
444 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
448 sk->sk_err_soft = err;
452 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_error_report(sk);
456 sk->sk_err_soft = err;
464 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
466 struct request_sock *req,
467 struct request_values *rvp,
470 struct inet6_request_sock *treq = inet6_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
472 struct sk_buff * skb;
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
479 skb = tcp_make_synack(sk, dst, req, rvp);
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
484 fl6->daddr = treq->rmt_addr;
485 skb_set_queue_mapping(skb, queue_mapping);
486 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
487 err = net_xmit_eval(err);
494 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp)
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
500 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
503 static void tcp_v6_reqsk_destructor(struct request_sock *req)
505 kfree_skb(inet6_rsk(req)->pktopts);
508 #ifdef CONFIG_TCP_MD5SIG
509 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
510 const struct in6_addr *addr)
512 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
515 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
516 struct sock *addr_sk)
518 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
521 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
522 struct request_sock *req)
524 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
527 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
530 struct tcp_md5sig cmd;
531 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
533 if (optlen < sizeof(cmd))
536 if (copy_from_user(&cmd, optval, sizeof(cmd)))
539 if (sin6->sin6_family != AF_INET6)
542 if (!cmd.tcpm_keylen) {
543 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
544 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
546 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
550 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
553 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
554 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
555 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
557 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
558 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
561 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
562 const struct in6_addr *daddr,
563 const struct in6_addr *saddr, int nbytes)
565 struct tcp6_pseudohdr *bp;
566 struct scatterlist sg;
568 bp = &hp->md5_blk.ip6;
569 /* 1. TCP pseudo-header (RFC2460) */
572 bp->protocol = cpu_to_be32(IPPROTO_TCP);
573 bp->len = cpu_to_be32(nbytes);
575 sg_init_one(&sg, bp, sizeof(*bp));
576 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
579 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
580 const struct in6_addr *daddr, struct in6_addr *saddr,
581 const struct tcphdr *th)
583 struct tcp_md5sig_pool *hp;
584 struct hash_desc *desc;
586 hp = tcp_get_md5sig_pool();
588 goto clear_hash_noput;
589 desc = &hp->md5_desc;
591 if (crypto_hash_init(desc))
593 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
595 if (tcp_md5_hash_header(hp, th))
597 if (tcp_md5_hash_key(hp, key))
599 if (crypto_hash_final(desc, md5_hash))
602 tcp_put_md5sig_pool();
606 tcp_put_md5sig_pool();
608 memset(md5_hash, 0, 16);
612 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
613 const struct sock *sk,
614 const struct request_sock *req,
615 const struct sk_buff *skb)
617 const struct in6_addr *saddr, *daddr;
618 struct tcp_md5sig_pool *hp;
619 struct hash_desc *desc;
620 const struct tcphdr *th = tcp_hdr(skb);
623 saddr = &inet6_sk(sk)->saddr;
624 daddr = &inet6_sk(sk)->daddr;
626 saddr = &inet6_rsk(req)->loc_addr;
627 daddr = &inet6_rsk(req)->rmt_addr;
629 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
630 saddr = &ip6h->saddr;
631 daddr = &ip6h->daddr;
634 hp = tcp_get_md5sig_pool();
636 goto clear_hash_noput;
637 desc = &hp->md5_desc;
639 if (crypto_hash_init(desc))
642 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
644 if (tcp_md5_hash_header(hp, th))
646 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
648 if (tcp_md5_hash_key(hp, key))
650 if (crypto_hash_final(desc, md5_hash))
653 tcp_put_md5sig_pool();
657 tcp_put_md5sig_pool();
659 memset(md5_hash, 0, 16);
663 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
665 const __u8 *hash_location = NULL;
666 struct tcp_md5sig_key *hash_expected;
667 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
668 const struct tcphdr *th = tcp_hdr(skb);
672 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
673 hash_location = tcp_parse_md5sig_option(th);
675 /* We've parsed the options - do we have a hash? */
676 if (!hash_expected && !hash_location)
679 if (hash_expected && !hash_location) {
680 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
684 if (!hash_expected && hash_location) {
685 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
689 /* check the signature */
690 genhash = tcp_v6_md5_hash_skb(newhash,
694 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
695 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
696 genhash ? "failed" : "mismatch",
697 &ip6h->saddr, ntohs(th->source),
698 &ip6h->daddr, ntohs(th->dest));
705 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
707 .obj_size = sizeof(struct tcp6_request_sock),
708 .rtx_syn_ack = tcp_v6_rtx_synack,
709 .send_ack = tcp_v6_reqsk_send_ack,
710 .destructor = tcp_v6_reqsk_destructor,
711 .send_reset = tcp_v6_send_reset,
712 .syn_ack_timeout = tcp_syn_ack_timeout,
715 #ifdef CONFIG_TCP_MD5SIG
716 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
717 .md5_lookup = tcp_v6_reqsk_md5_lookup,
718 .calc_md5_hash = tcp_v6_md5_hash_skb,
722 static void __tcp_v6_send_check(struct sk_buff *skb,
723 const struct in6_addr *saddr, const struct in6_addr *daddr)
725 struct tcphdr *th = tcp_hdr(skb);
727 if (skb->ip_summed == CHECKSUM_PARTIAL) {
728 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
729 skb->csum_start = skb_transport_header(skb) - skb->head;
730 skb->csum_offset = offsetof(struct tcphdr, check);
732 th->check = tcp_v6_check(skb->len, saddr, daddr,
733 csum_partial(th, th->doff << 2,
738 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
740 struct ipv6_pinfo *np = inet6_sk(sk);
742 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
745 static int tcp_v6_gso_send_check(struct sk_buff *skb)
747 const struct ipv6hdr *ipv6h;
750 if (!pskb_may_pull(skb, sizeof(*th)))
753 ipv6h = ipv6_hdr(skb);
757 skb->ip_summed = CHECKSUM_PARTIAL;
758 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
762 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
765 const struct ipv6hdr *iph = skb_gro_network_header(skb);
767 switch (skb->ip_summed) {
768 case CHECKSUM_COMPLETE:
769 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
771 skb->ip_summed = CHECKSUM_UNNECESSARY;
777 NAPI_GRO_CB(skb)->flush = 1;
781 return tcp_gro_receive(head, skb);
784 static int tcp6_gro_complete(struct sk_buff *skb)
786 const struct ipv6hdr *iph = ipv6_hdr(skb);
787 struct tcphdr *th = tcp_hdr(skb);
789 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
790 &iph->saddr, &iph->daddr, 0);
791 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
793 return tcp_gro_complete(skb);
796 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
797 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
799 const struct tcphdr *th = tcp_hdr(skb);
801 struct sk_buff *buff;
803 struct net *net = dev_net(skb_dst(skb)->dev);
804 struct sock *ctl_sk = net->ipv6.tcp_sk;
805 unsigned int tot_len = sizeof(struct tcphdr);
806 struct dst_entry *dst;
810 tot_len += TCPOLEN_TSTAMP_ALIGNED;
811 #ifdef CONFIG_TCP_MD5SIG
813 tot_len += TCPOLEN_MD5SIG_ALIGNED;
816 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
821 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
823 t1 = (struct tcphdr *) skb_push(buff, tot_len);
824 skb_reset_transport_header(buff);
826 /* Swap the send and the receive. */
827 memset(t1, 0, sizeof(*t1));
828 t1->dest = th->source;
829 t1->source = th->dest;
830 t1->doff = tot_len / 4;
831 t1->seq = htonl(seq);
832 t1->ack_seq = htonl(ack);
833 t1->ack = !rst || !th->ack;
835 t1->window = htons(win);
837 topt = (__be32 *)(t1 + 1);
840 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
841 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
842 *topt++ = htonl(tcp_time_stamp);
846 #ifdef CONFIG_TCP_MD5SIG
848 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
849 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
850 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
851 &ipv6_hdr(skb)->saddr,
852 &ipv6_hdr(skb)->daddr, t1);
856 memset(&fl6, 0, sizeof(fl6));
857 fl6.daddr = ipv6_hdr(skb)->saddr;
858 fl6.saddr = ipv6_hdr(skb)->daddr;
860 buff->ip_summed = CHECKSUM_PARTIAL;
863 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
865 fl6.flowi6_proto = IPPROTO_TCP;
866 fl6.flowi6_oif = inet6_iif(skb);
867 fl6.fl6_dport = t1->dest;
868 fl6.fl6_sport = t1->source;
869 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
871 /* Pass a socket to ip6_dst_lookup either it is for RST
872 * Underlying function will use this to retrieve the network
875 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
877 skb_dst_set(buff, dst);
878 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
879 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
881 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
888 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
890 const struct tcphdr *th = tcp_hdr(skb);
891 u32 seq = 0, ack_seq = 0;
892 struct tcp_md5sig_key *key = NULL;
893 #ifdef CONFIG_TCP_MD5SIG
894 const __u8 *hash_location = NULL;
895 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
896 unsigned char newhash[16];
898 struct sock *sk1 = NULL;
904 if (!ipv6_unicast_destination(skb))
907 #ifdef CONFIG_TCP_MD5SIG
908 hash_location = tcp_parse_md5sig_option(th);
909 if (!sk && hash_location) {
911 * active side is lost. Try to find listening socket through
912 * source port, and then find md5 key through listening socket.
913 * we are not loose security here:
914 * Incoming packet is checked with md5 hash with finding key,
915 * no RST generated if md5 hash doesn't match.
917 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
918 &tcp_hashinfo, &ipv6h->daddr,
919 ntohs(th->source), inet6_iif(skb));
924 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
928 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
929 if (genhash || memcmp(hash_location, newhash, 16) != 0)
932 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
937 seq = ntohl(th->ack_seq);
939 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
942 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
944 #ifdef CONFIG_TCP_MD5SIG
953 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
954 struct tcp_md5sig_key *key, u8 tclass)
956 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
959 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
961 struct inet_timewait_sock *tw = inet_twsk(sk);
962 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
964 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
965 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
966 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
972 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
973 struct request_sock *req)
975 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
976 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
980 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
982 struct request_sock *req, **prev;
983 const struct tcphdr *th = tcp_hdr(skb);
986 /* Find possible connection requests. */
987 req = inet6_csk_search_req(sk, &prev, th->source,
988 &ipv6_hdr(skb)->saddr,
989 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
991 return tcp_check_req(sk, skb, req, prev);
993 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
994 &ipv6_hdr(skb)->saddr, th->source,
995 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
998 if (nsk->sk_state != TCP_TIME_WAIT) {
1002 inet_twsk_put(inet_twsk(nsk));
1006 #ifdef CONFIG_SYN_COOKIES
1008 sk = cookie_v6_check(sk, skb);
1013 /* FIXME: this is substantially similar to the ipv4 code.
1014 * Can some kind of merge be done? -- erics
1016 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1018 struct tcp_extend_values tmp_ext;
1019 struct tcp_options_received tmp_opt;
1020 const u8 *hash_location;
1021 struct request_sock *req;
1022 struct inet6_request_sock *treq;
1023 struct ipv6_pinfo *np = inet6_sk(sk);
1024 struct tcp_sock *tp = tcp_sk(sk);
1025 __u32 isn = TCP_SKB_CB(skb)->when;
1026 struct dst_entry *dst = NULL;
1028 bool want_cookie = false;
1030 if (skb->protocol == htons(ETH_P_IP))
1031 return tcp_v4_conn_request(sk, skb);
1033 if (!ipv6_unicast_destination(skb))
1036 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1037 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1042 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1045 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1049 #ifdef CONFIG_TCP_MD5SIG
1050 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1053 tcp_clear_options(&tmp_opt);
1054 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1055 tmp_opt.user_mss = tp->rx_opt.user_mss;
1056 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1058 if (tmp_opt.cookie_plus > 0 &&
1059 tmp_opt.saw_tstamp &&
1060 !tp->rx_opt.cookie_out_never &&
1061 (sysctl_tcp_cookie_size > 0 ||
1062 (tp->cookie_values != NULL &&
1063 tp->cookie_values->cookie_desired > 0))) {
1066 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1067 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1069 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1072 /* Secret recipe starts with IP addresses */
1073 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1078 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1084 /* plus variable length Initiator Cookie */
1087 *c++ ^= *hash_location++;
1089 want_cookie = false; /* not our kind of cookie */
1090 tmp_ext.cookie_out_never = 0; /* false */
1091 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1092 } else if (!tp->rx_opt.cookie_in_always) {
1093 /* redundant indications, but ensure initialization. */
1094 tmp_ext.cookie_out_never = 1; /* true */
1095 tmp_ext.cookie_plus = 0;
1099 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1101 if (want_cookie && !tmp_opt.saw_tstamp)
1102 tcp_clear_options(&tmp_opt);
1104 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1105 tcp_openreq_init(req, &tmp_opt, skb);
1107 treq = inet6_rsk(req);
1108 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1109 treq->loc_addr = ipv6_hdr(skb)->daddr;
1110 if (!want_cookie || tmp_opt.tstamp_ok)
1111 TCP_ECN_create_request(req, skb);
1113 treq->iif = sk->sk_bound_dev_if;
1115 /* So that link locals have meaning */
1116 if (!sk->sk_bound_dev_if &&
1117 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1118 treq->iif = inet6_iif(skb);
1121 if (ipv6_opt_accepted(sk, skb) ||
1122 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1123 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1124 atomic_inc(&skb->users);
1125 treq->pktopts = skb;
1129 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1130 req->cookie_ts = tmp_opt.tstamp_ok;
1134 /* VJ's idea. We save last timestamp seen
1135 * from the destination in peer table, when entering
1136 * state TIME-WAIT, and check against it before
1137 * accepting new connection request.
1139 * If "isn" is not zero, this request hit alive
1140 * timewait bucket, so that all the necessary checks
1141 * are made in the function processing timewait state.
1143 if (tmp_opt.saw_tstamp &&
1144 tcp_death_row.sysctl_tw_recycle &&
1145 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1146 if (!tcp_peer_is_proven(req, dst, true)) {
1147 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1148 goto drop_and_release;
1151 /* Kill the following clause, if you dislike this way. */
1152 else if (!sysctl_tcp_syncookies &&
1153 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1154 (sysctl_max_syn_backlog >> 2)) &&
1155 !tcp_peer_is_proven(req, dst, false)) {
1156 /* Without syncookies last quarter of
1157 * backlog is filled with destinations,
1158 * proven to be alive.
1159 * It means that we continue to communicate
1160 * to destinations, already remembered
1161 * to the moment of synflood.
1163 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1164 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1165 goto drop_and_release;
1168 isn = tcp_v6_init_sequence(skb);
1171 tcp_rsk(req)->snt_isn = isn;
1172 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1174 if (security_inet_conn_request(sk, skb, req))
1175 goto drop_and_release;
1177 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1178 (struct request_values *)&tmp_ext,
1179 skb_get_queue_mapping(skb)) ||
1183 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1191 return 0; /* don't send reset */
1194 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1195 struct request_sock *req,
1196 struct dst_entry *dst)
1198 struct inet6_request_sock *treq;
1199 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1200 struct tcp6_sock *newtcp6sk;
1201 struct inet_sock *newinet;
1202 struct tcp_sock *newtp;
1204 #ifdef CONFIG_TCP_MD5SIG
1205 struct tcp_md5sig_key *key;
1209 if (skb->protocol == htons(ETH_P_IP)) {
1214 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1219 newtcp6sk = (struct tcp6_sock *)newsk;
1220 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1222 newinet = inet_sk(newsk);
1223 newnp = inet6_sk(newsk);
1224 newtp = tcp_sk(newsk);
1226 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1228 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1230 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1232 newnp->rcv_saddr = newnp->saddr;
1234 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1235 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1236 #ifdef CONFIG_TCP_MD5SIG
1237 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1240 newnp->ipv6_ac_list = NULL;
1241 newnp->ipv6_fl_list = NULL;
1242 newnp->pktoptions = NULL;
1244 newnp->mcast_oif = inet6_iif(skb);
1245 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1246 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1249 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1250 * here, tcp_create_openreq_child now does this for us, see the comment in
1251 * that function for the gory details. -acme
1254 /* It is tricky place. Until this moment IPv4 tcp
1255 worked with IPv6 icsk.icsk_af_ops.
1258 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1263 treq = inet6_rsk(req);
1265 if (sk_acceptq_is_full(sk))
1269 dst = inet6_csk_route_req(sk, &fl6, req);
1274 newsk = tcp_create_openreq_child(sk, req, skb);
1279 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1280 * count here, tcp_create_openreq_child now does this for us, see the
1281 * comment in that function for the gory details. -acme
1284 newsk->sk_gso_type = SKB_GSO_TCPV6;
1285 __ip6_dst_store(newsk, dst, NULL, NULL);
1286 inet6_sk_rx_dst_set(newsk, skb);
1288 newtcp6sk = (struct tcp6_sock *)newsk;
1289 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1291 newtp = tcp_sk(newsk);
1292 newinet = inet_sk(newsk);
1293 newnp = inet6_sk(newsk);
1295 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1297 newnp->daddr = treq->rmt_addr;
1298 newnp->saddr = treq->loc_addr;
1299 newnp->rcv_saddr = treq->loc_addr;
1300 newsk->sk_bound_dev_if = treq->iif;
1302 /* Now IPv6 options...
1304 First: no IPv4 options.
1306 newinet->inet_opt = NULL;
1307 newnp->ipv6_ac_list = NULL;
1308 newnp->ipv6_fl_list = NULL;
1311 newnp->rxopt.all = np->rxopt.all;
1313 /* Clone pktoptions received with SYN */
1314 newnp->pktoptions = NULL;
1315 if (treq->pktopts != NULL) {
1316 newnp->pktoptions = skb_clone(treq->pktopts,
1317 sk_gfp_atomic(sk, GFP_ATOMIC));
1318 consume_skb(treq->pktopts);
1319 treq->pktopts = NULL;
1320 if (newnp->pktoptions)
1321 skb_set_owner_r(newnp->pktoptions, newsk);
1324 newnp->mcast_oif = inet6_iif(skb);
1325 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1326 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1328 /* Clone native IPv6 options from listening socket (if any)
1330 Yes, keeping reference count would be much more clever,
1331 but we make one more one thing there: reattach optmem
1335 newnp->opt = ipv6_dup_options(newsk, np->opt);
1337 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1339 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1340 newnp->opt->opt_flen);
1342 tcp_mtup_init(newsk);
1343 tcp_sync_mss(newsk, dst_mtu(dst));
1344 newtp->advmss = dst_metric_advmss(dst);
1345 if (tcp_sk(sk)->rx_opt.user_mss &&
1346 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1347 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1349 tcp_initialize_rcv_mss(newsk);
1350 if (tcp_rsk(req)->snt_synack)
1351 tcp_valid_rtt_meas(newsk,
1352 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1353 newtp->total_retrans = req->retrans;
1355 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1356 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1358 #ifdef CONFIG_TCP_MD5SIG
1359 /* Copy over the MD5 key from the original socket */
1360 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1361 /* We're using one, so create a matching key
1362 * on the newsk structure. If we fail to get
1363 * memory, then we end up not copying the key
1366 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1367 AF_INET6, key->key, key->keylen,
1368 sk_gfp_atomic(sk, GFP_ATOMIC));
1372 if (__inet_inherit_port(sk, newsk) < 0) {
1376 __inet6_hash(newsk, NULL);
1381 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1385 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1389 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1391 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1392 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1393 &ipv6_hdr(skb)->daddr, skb->csum)) {
1394 skb->ip_summed = CHECKSUM_UNNECESSARY;
1399 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1400 &ipv6_hdr(skb)->saddr,
1401 &ipv6_hdr(skb)->daddr, 0));
1403 if (skb->len <= 76) {
1404 return __skb_checksum_complete(skb);
1409 /* The socket must have it's spinlock held when we get
1412 * We have a potential double-lock case here, so even when
1413 * doing backlog processing we use the BH locking scheme.
1414 * This is because we cannot sleep with the original spinlock
1417 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1419 struct ipv6_pinfo *np = inet6_sk(sk);
1420 struct tcp_sock *tp;
1421 struct sk_buff *opt_skb = NULL;
1423 /* Imagine: socket is IPv6. IPv4 packet arrives,
1424 goes to IPv4 receive handler and backlogged.
1425 From backlog it always goes here. Kerboom...
1426 Fortunately, tcp_rcv_established and rcv_established
1427 handle them correctly, but it is not case with
1428 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1431 if (skb->protocol == htons(ETH_P_IP))
1432 return tcp_v4_do_rcv(sk, skb);
1434 #ifdef CONFIG_TCP_MD5SIG
1435 if (tcp_v6_inbound_md5_hash (sk, skb))
1439 if (sk_filter(sk, skb))
1443 * socket locking is here for SMP purposes as backlog rcv
1444 * is currently called with bh processing disabled.
1447 /* Do Stevens' IPV6_PKTOPTIONS.
1449 Yes, guys, it is the only place in our code, where we
1450 may make it not affecting IPv4.
1451 The rest of code is protocol independent,
1452 and I do not like idea to uglify IPv4.
1454 Actually, all the idea behind IPV6_PKTOPTIONS
1455 looks not very well thought. For now we latch
1456 options, received in the last packet, enqueued
1457 by tcp. Feel free to propose better solution.
1461 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1463 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1464 struct dst_entry *dst = sk->sk_rx_dst;
1466 sock_rps_save_rxhash(sk, skb);
1468 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1469 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1471 sk->sk_rx_dst = NULL;
1475 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1478 goto ipv6_pktoptions;
1482 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1485 if (sk->sk_state == TCP_LISTEN) {
1486 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1491 * Queue it on the new socket if the new socket is active,
1492 * otherwise we just shortcircuit this and continue with
1496 sock_rps_save_rxhash(nsk, skb);
1497 if (tcp_child_process(sk, nsk, skb))
1500 __kfree_skb(opt_skb);
1504 sock_rps_save_rxhash(sk, skb);
1506 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1509 goto ipv6_pktoptions;
1513 tcp_v6_send_reset(sk, skb);
1516 __kfree_skb(opt_skb);
1520 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1525 /* Do you ask, what is it?
1527 1. skb was enqueued by tcp.
1528 2. skb is added to tail of read queue, rather than out of order.
1529 3. socket is not in passive state.
1530 4. Finally, it really contains options, which user wants to receive.
1533 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1534 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1535 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1536 np->mcast_oif = inet6_iif(opt_skb);
1537 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1538 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1539 if (np->rxopt.bits.rxtclass)
1540 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1541 if (ipv6_opt_accepted(sk, opt_skb)) {
1542 skb_set_owner_r(opt_skb, sk);
1543 opt_skb = xchg(&np->pktoptions, opt_skb);
1545 __kfree_skb(opt_skb);
1546 opt_skb = xchg(&np->pktoptions, NULL);
1554 static int tcp_v6_rcv(struct sk_buff *skb)
1556 const struct tcphdr *th;
1557 const struct ipv6hdr *hdr;
1560 struct net *net = dev_net(skb->dev);
1562 if (skb->pkt_type != PACKET_HOST)
1566 * Count it even if it's bad.
1568 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1570 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1575 if (th->doff < sizeof(struct tcphdr)/4)
1577 if (!pskb_may_pull(skb, th->doff*4))
1580 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1584 hdr = ipv6_hdr(skb);
1585 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1586 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1587 skb->len - th->doff*4);
1588 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1589 TCP_SKB_CB(skb)->when = 0;
1590 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1591 TCP_SKB_CB(skb)->sacked = 0;
1593 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1598 if (sk->sk_state == TCP_TIME_WAIT)
1601 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1602 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1603 goto discard_and_relse;
1606 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1607 goto discard_and_relse;
1609 if (sk_filter(sk, skb))
1610 goto discard_and_relse;
1614 bh_lock_sock_nested(sk);
1616 if (!sock_owned_by_user(sk)) {
1617 #ifdef CONFIG_NET_DMA
1618 struct tcp_sock *tp = tcp_sk(sk);
1619 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1620 tp->ucopy.dma_chan = net_dma_find_channel();
1621 if (tp->ucopy.dma_chan)
1622 ret = tcp_v6_do_rcv(sk, skb);
1626 if (!tcp_prequeue(sk, skb))
1627 ret = tcp_v6_do_rcv(sk, skb);
1629 } else if (unlikely(sk_add_backlog(sk, skb,
1630 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1632 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1633 goto discard_and_relse;
1638 return ret ? -1 : 0;
1641 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1644 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1646 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1648 tcp_v6_send_reset(NULL, skb);
1665 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1666 inet_twsk_put(inet_twsk(sk));
1670 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1671 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1672 inet_twsk_put(inet_twsk(sk));
1676 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1681 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1682 &ipv6_hdr(skb)->daddr,
1683 ntohs(th->dest), inet6_iif(skb));
1685 struct inet_timewait_sock *tw = inet_twsk(sk);
1686 inet_twsk_deschedule(tw, &tcp_death_row);
1691 /* Fall through to ACK */
1694 tcp_v6_timewait_ack(sk, skb);
1698 case TCP_TW_SUCCESS:;
1703 static void tcp_v6_early_demux(struct sk_buff *skb)
1705 const struct ipv6hdr *hdr;
1706 const struct tcphdr *th;
1709 if (skb->pkt_type != PACKET_HOST)
1712 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1715 hdr = ipv6_hdr(skb);
1718 if (th->doff < sizeof(struct tcphdr) / 4)
1721 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1722 &hdr->saddr, th->source,
1723 &hdr->daddr, ntohs(th->dest),
1727 skb->destructor = sock_edemux;
1728 if (sk->sk_state != TCP_TIME_WAIT) {
1729 struct dst_entry *dst = sk->sk_rx_dst;
1730 struct inet_sock *icsk = inet_sk(sk);
1732 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1734 icsk->rx_dst_ifindex == skb->skb_iif)
1735 skb_dst_set_noref(skb, dst);
1740 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1741 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1742 .twsk_unique = tcp_twsk_unique,
1743 .twsk_destructor= tcp_twsk_destructor,
1746 static const struct inet_connection_sock_af_ops ipv6_specific = {
1747 .queue_xmit = inet6_csk_xmit,
1748 .send_check = tcp_v6_send_check,
1749 .rebuild_header = inet6_sk_rebuild_header,
1750 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1751 .conn_request = tcp_v6_conn_request,
1752 .syn_recv_sock = tcp_v6_syn_recv_sock,
1753 .net_header_len = sizeof(struct ipv6hdr),
1754 .net_frag_header_len = sizeof(struct frag_hdr),
1755 .setsockopt = ipv6_setsockopt,
1756 .getsockopt = ipv6_getsockopt,
1757 .addr2sockaddr = inet6_csk_addr2sockaddr,
1758 .sockaddr_len = sizeof(struct sockaddr_in6),
1759 .bind_conflict = inet6_csk_bind_conflict,
1760 #ifdef CONFIG_COMPAT
1761 .compat_setsockopt = compat_ipv6_setsockopt,
1762 .compat_getsockopt = compat_ipv6_getsockopt,
1766 #ifdef CONFIG_TCP_MD5SIG
1767 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1768 .md5_lookup = tcp_v6_md5_lookup,
1769 .calc_md5_hash = tcp_v6_md5_hash_skb,
1770 .md5_parse = tcp_v6_parse_md5_keys,
1775 * TCP over IPv4 via INET6 API
1778 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1779 .queue_xmit = ip_queue_xmit,
1780 .send_check = tcp_v4_send_check,
1781 .rebuild_header = inet_sk_rebuild_header,
1782 .sk_rx_dst_set = inet_sk_rx_dst_set,
1783 .conn_request = tcp_v6_conn_request,
1784 .syn_recv_sock = tcp_v6_syn_recv_sock,
1785 .net_header_len = sizeof(struct iphdr),
1786 .setsockopt = ipv6_setsockopt,
1787 .getsockopt = ipv6_getsockopt,
1788 .addr2sockaddr = inet6_csk_addr2sockaddr,
1789 .sockaddr_len = sizeof(struct sockaddr_in6),
1790 .bind_conflict = inet6_csk_bind_conflict,
1791 #ifdef CONFIG_COMPAT
1792 .compat_setsockopt = compat_ipv6_setsockopt,
1793 .compat_getsockopt = compat_ipv6_getsockopt,
1797 #ifdef CONFIG_TCP_MD5SIG
1798 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1799 .md5_lookup = tcp_v4_md5_lookup,
1800 .calc_md5_hash = tcp_v4_md5_hash_skb,
1801 .md5_parse = tcp_v6_parse_md5_keys,
1805 /* NOTE: A lot of things set to zero explicitly by call to
1806 * sk_alloc() so need not be done here.
1808 static int tcp_v6_init_sock(struct sock *sk)
1810 struct inet_connection_sock *icsk = inet_csk(sk);
1814 icsk->icsk_af_ops = &ipv6_specific;
1816 #ifdef CONFIG_TCP_MD5SIG
1817 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1823 static void tcp_v6_destroy_sock(struct sock *sk)
1825 tcp_v4_destroy_sock(sk);
1826 inet6_destroy_sock(sk);
1829 #ifdef CONFIG_PROC_FS
1830 /* Proc filesystem TCPv6 sock list dumping. */
1831 static void get_openreq6(struct seq_file *seq,
1832 const struct sock *sk, struct request_sock *req, int i, int uid)
1834 int ttd = req->expires - jiffies;
1835 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1836 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1842 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1843 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1845 src->s6_addr32[0], src->s6_addr32[1],
1846 src->s6_addr32[2], src->s6_addr32[3],
1847 ntohs(inet_rsk(req)->loc_port),
1848 dest->s6_addr32[0], dest->s6_addr32[1],
1849 dest->s6_addr32[2], dest->s6_addr32[3],
1850 ntohs(inet_rsk(req)->rmt_port),
1852 0,0, /* could print option size, but that is af dependent. */
1853 1, /* timers active (only the expire timer) */
1854 jiffies_to_clock_t(ttd),
1857 0, /* non standard timer */
1858 0, /* open_requests have no inode */
1862 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1864 const struct in6_addr *dest, *src;
1867 unsigned long timer_expires;
1868 const struct inet_sock *inet = inet_sk(sp);
1869 const struct tcp_sock *tp = tcp_sk(sp);
1870 const struct inet_connection_sock *icsk = inet_csk(sp);
1871 const struct ipv6_pinfo *np = inet6_sk(sp);
1874 src = &np->rcv_saddr;
1875 destp = ntohs(inet->inet_dport);
1876 srcp = ntohs(inet->inet_sport);
1878 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1880 timer_expires = icsk->icsk_timeout;
1881 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1883 timer_expires = icsk->icsk_timeout;
1884 } else if (timer_pending(&sp->sk_timer)) {
1886 timer_expires = sp->sk_timer.expires;
1889 timer_expires = jiffies;
1893 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1894 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1896 src->s6_addr32[0], src->s6_addr32[1],
1897 src->s6_addr32[2], src->s6_addr32[3], srcp,
1898 dest->s6_addr32[0], dest->s6_addr32[1],
1899 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1901 tp->write_seq-tp->snd_una,
1902 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1904 jiffies_to_clock_t(timer_expires - jiffies),
1905 icsk->icsk_retransmits,
1907 icsk->icsk_probes_out,
1909 atomic_read(&sp->sk_refcnt), sp,
1910 jiffies_to_clock_t(icsk->icsk_rto),
1911 jiffies_to_clock_t(icsk->icsk_ack.ato),
1912 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1914 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1918 static void get_timewait6_sock(struct seq_file *seq,
1919 struct inet_timewait_sock *tw, int i)
1921 const struct in6_addr *dest, *src;
1923 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1924 int ttd = tw->tw_ttd - jiffies;
1929 dest = &tw6->tw_v6_daddr;
1930 src = &tw6->tw_v6_rcv_saddr;
1931 destp = ntohs(tw->tw_dport);
1932 srcp = ntohs(tw->tw_sport);
1935 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1936 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1938 src->s6_addr32[0], src->s6_addr32[1],
1939 src->s6_addr32[2], src->s6_addr32[3], srcp,
1940 dest->s6_addr32[0], dest->s6_addr32[1],
1941 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1942 tw->tw_substate, 0, 0,
1943 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1944 atomic_read(&tw->tw_refcnt), tw);
1947 static int tcp6_seq_show(struct seq_file *seq, void *v)
1949 struct tcp_iter_state *st;
1951 if (v == SEQ_START_TOKEN) {
1956 "st tx_queue rx_queue tr tm->when retrnsmt"
1957 " uid timeout inode\n");
1962 switch (st->state) {
1963 case TCP_SEQ_STATE_LISTENING:
1964 case TCP_SEQ_STATE_ESTABLISHED:
1965 get_tcp6_sock(seq, v, st->num);
1967 case TCP_SEQ_STATE_OPENREQ:
1968 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1970 case TCP_SEQ_STATE_TIME_WAIT:
1971 get_timewait6_sock(seq, v, st->num);
1978 static const struct file_operations tcp6_afinfo_seq_fops = {
1979 .owner = THIS_MODULE,
1980 .open = tcp_seq_open,
1982 .llseek = seq_lseek,
1983 .release = seq_release_net
1986 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1989 .seq_fops = &tcp6_afinfo_seq_fops,
1991 .show = tcp6_seq_show,
1995 int __net_init tcp6_proc_init(struct net *net)
1997 return tcp_proc_register(net, &tcp6_seq_afinfo);
2000 void tcp6_proc_exit(struct net *net)
2002 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2006 struct proto tcpv6_prot = {
2008 .owner = THIS_MODULE,
2010 .connect = tcp_v6_connect,
2011 .disconnect = tcp_disconnect,
2012 .accept = inet_csk_accept,
2014 .init = tcp_v6_init_sock,
2015 .destroy = tcp_v6_destroy_sock,
2016 .shutdown = tcp_shutdown,
2017 .setsockopt = tcp_setsockopt,
2018 .getsockopt = tcp_getsockopt,
2019 .recvmsg = tcp_recvmsg,
2020 .sendmsg = tcp_sendmsg,
2021 .sendpage = tcp_sendpage,
2022 .backlog_rcv = tcp_v6_do_rcv,
2023 .release_cb = tcp_release_cb,
2024 .mtu_reduced = tcp_v6_mtu_reduced,
2025 .hash = tcp_v6_hash,
2026 .unhash = inet_unhash,
2027 .get_port = inet_csk_get_port,
2028 .enter_memory_pressure = tcp_enter_memory_pressure,
2029 .sockets_allocated = &tcp_sockets_allocated,
2030 .memory_allocated = &tcp_memory_allocated,
2031 .memory_pressure = &tcp_memory_pressure,
2032 .orphan_count = &tcp_orphan_count,
2033 .sysctl_wmem = sysctl_tcp_wmem,
2034 .sysctl_rmem = sysctl_tcp_rmem,
2035 .max_header = MAX_TCP_HEADER,
2036 .obj_size = sizeof(struct tcp6_sock),
2037 .slab_flags = SLAB_DESTROY_BY_RCU,
2038 .twsk_prot = &tcp6_timewait_sock_ops,
2039 .rsk_prot = &tcp6_request_sock_ops,
2040 .h.hashinfo = &tcp_hashinfo,
2041 .no_autobind = true,
2042 #ifdef CONFIG_COMPAT
2043 .compat_setsockopt = compat_tcp_setsockopt,
2044 .compat_getsockopt = compat_tcp_getsockopt,
2046 #ifdef CONFIG_MEMCG_KMEM
2047 .proto_cgroup = tcp_proto_cgroup,
2051 static const struct inet6_protocol tcpv6_protocol = {
2052 .early_demux = tcp_v6_early_demux,
2053 .handler = tcp_v6_rcv,
2054 .err_handler = tcp_v6_err,
2055 .gso_send_check = tcp_v6_gso_send_check,
2056 .gso_segment = tcp_tso_segment,
2057 .gro_receive = tcp6_gro_receive,
2058 .gro_complete = tcp6_gro_complete,
2059 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2062 static struct inet_protosw tcpv6_protosw = {
2063 .type = SOCK_STREAM,
2064 .protocol = IPPROTO_TCP,
2065 .prot = &tcpv6_prot,
2066 .ops = &inet6_stream_ops,
2068 .flags = INET_PROTOSW_PERMANENT |
2072 static int __net_init tcpv6_net_init(struct net *net)
2074 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2075 SOCK_RAW, IPPROTO_TCP, net);
2078 static void __net_exit tcpv6_net_exit(struct net *net)
2080 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2083 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2085 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2088 static struct pernet_operations tcpv6_net_ops = {
2089 .init = tcpv6_net_init,
2090 .exit = tcpv6_net_exit,
2091 .exit_batch = tcpv6_net_exit_batch,
2094 int __init tcpv6_init(void)
2098 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2102 /* register inet6 protocol */
2103 ret = inet6_register_protosw(&tcpv6_protosw);
2105 goto out_tcpv6_protocol;
2107 ret = register_pernet_subsys(&tcpv6_net_ops);
2109 goto out_tcpv6_protosw;
2114 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2116 inet6_unregister_protosw(&tcpv6_protosw);
2120 void tcpv6_exit(void)
2122 unregister_pernet_subsys(&tcpv6_net_ops);
2123 inet6_unregister_protosw(&tcpv6_protosw);
2124 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);