3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 /* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
107 struct inet_bind_hashbucket *head;
108 struct inet_bind_bucket *tb;
109 struct hlist_node *node;
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
119 spin_lock(&tcp_hashinfo.portalloc_lock);
120 if (tcp_hashinfo.port_rover < low)
123 rover = tcp_hashinfo.port_rover;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain)
130 if (tb->port == rover)
134 spin_unlock(&head->lock);
135 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
139 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash
141 * locks if this test triggers, because if 'remaining'
142 * drops to zero, we broke out of the do/while loop at
143 * the top level, not from the 'break;' statement.
146 if (unlikely(remaining <= 0))
149 /* OK, here is the one we will use. */
152 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
153 spin_lock(&head->lock);
154 inet_bind_bucket_for_each(tb, node, &head->chain)
155 if (tb->port == snum)
161 if (tb && !hlist_empty(&tb->owners)) {
162 if (tb->fastreuse > 0 && sk->sk_reuse &&
163 sk->sk_state != TCP_LISTEN) {
167 if (tcp_v6_bind_conflict(sk, tb))
174 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
178 if (hlist_empty(&tb->owners)) {
179 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
183 } else if (tb->fastreuse &&
184 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
188 if (!inet_csk(sk)->icsk_bind_hash)
189 inet_bind_hash(sk, tb, snum);
190 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
194 spin_unlock(&head->lock);
200 static __inline__ void __tcp_v6_hash(struct sock *sk)
202 struct hlist_head *list;
205 BUG_TRAP(sk_unhashed(sk));
207 if (sk->sk_state == TCP_LISTEN) {
208 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 lock = &tcp_hashinfo.lhash_lock;
210 inet_listen_wlock(&tcp_hashinfo);
212 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
213 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
214 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
218 __sk_add_node(sk, list);
219 sock_prot_inc_use(sk->sk_prot);
224 static void tcp_v6_hash(struct sock *sk)
226 if (sk->sk_state != TCP_CLOSE) {
227 struct tcp_sock *tp = tcp_sk(sk);
229 if (tp->af_specific == &ipv6_mapped) {
240 * Open request hash tables.
243 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
247 a = raddr->s6_addr32[0];
248 b = raddr->s6_addr32[1];
249 c = raddr->s6_addr32[2];
251 a += JHASH_GOLDEN_RATIO;
252 b += JHASH_GOLDEN_RATIO;
254 __jhash_mix(a, b, c);
256 a += raddr->s6_addr32[3];
258 __jhash_mix(a, b, c);
260 return c & (TCP_SYNQ_HSIZE - 1);
263 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
264 struct request_sock ***prevp,
266 struct in6_addr *raddr,
267 struct in6_addr *laddr,
270 const struct inet_connection_sock *icsk = inet_csk(sk);
271 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
272 struct request_sock *req, **prev;
274 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
275 (req = *prev) != NULL;
276 prev = &req->dl_next) {
277 const struct tcp6_request_sock *treq = tcp6_rsk(req);
279 if (inet_rsk(req)->rmt_port == rport &&
280 req->rsk_ops->family == AF_INET6 &&
281 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
282 ipv6_addr_equal(&treq->loc_addr, laddr) &&
283 (!treq->iif || treq->iif == iif)) {
284 BUG_TRAP(req->sk == NULL);
293 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
294 struct in6_addr *saddr,
295 struct in6_addr *daddr,
298 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
301 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
303 if (skb->protocol == htons(ETH_P_IPV6)) {
304 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
305 skb->nh.ipv6h->saddr.s6_addr32,
309 return secure_tcp_sequence_number(skb->nh.iph->daddr,
316 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
317 struct inet_timewait_sock **twp)
319 struct inet_sock *inet = inet_sk(sk);
320 const struct ipv6_pinfo *np = inet6_sk(sk);
321 const struct in6_addr *daddr = &np->rcv_saddr;
322 const struct in6_addr *saddr = &np->daddr;
323 const int dif = sk->sk_bound_dev_if;
324 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
325 const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
326 tcp_hashinfo.ehash_size);
327 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
329 const struct hlist_node *node;
330 struct inet_timewait_sock *tw;
332 write_lock(&head->lock);
334 /* Check TIME-WAIT sockets first. */
335 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
336 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
340 if(*((__u32 *)&(tw->tw_dport)) == ports &&
341 sk2->sk_family == PF_INET6 &&
342 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
343 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
344 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
345 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
346 struct tcp_sock *tp = tcp_sk(sk);
348 if (tcptw->tw_ts_recent_stamp &&
350 (sysctl_tcp_tw_reuse &&
351 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
352 /* See comment in tcp_ipv4.c */
353 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
356 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
357 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
366 /* And established part... */
367 sk_for_each(sk2, node, &head->chain) {
368 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
373 BUG_TRAP(sk_unhashed(sk));
374 __sk_add_node(sk, &head->chain);
375 sk->sk_hashent = hash;
376 sock_prot_inc_use(sk->sk_prot);
377 write_unlock(&head->lock);
381 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
383 /* Silly. Should hash-dance instead... */
384 inet_twsk_deschedule(tw, &tcp_death_row);
385 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
392 write_unlock(&head->lock);
393 return -EADDRNOTAVAIL;
396 static inline u32 tcpv6_port_offset(const struct sock *sk)
398 const struct inet_sock *inet = inet_sk(sk);
399 const struct ipv6_pinfo *np = inet6_sk(sk);
401 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
406 static int tcp_v6_hash_connect(struct sock *sk)
408 unsigned short snum = inet_sk(sk)->num;
409 struct inet_bind_hashbucket *head;
410 struct inet_bind_bucket *tb;
414 int low = sysctl_local_port_range[0];
415 int high = sysctl_local_port_range[1];
416 int range = high - low;
420 u32 offset = hint + tcpv6_port_offset(sk);
421 struct hlist_node *node;
422 struct inet_timewait_sock *tw = NULL;
425 for (i = 1; i <= range; i++) {
426 port = low + (i + offset) % range;
427 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
428 spin_lock(&head->lock);
430 /* Does not bother with rcv_saddr checks,
431 * because the established check is already
434 inet_bind_bucket_for_each(tb, node, &head->chain) {
435 if (tb->port == port) {
436 BUG_TRAP(!hlist_empty(&tb->owners));
437 if (tb->fastreuse >= 0)
439 if (!__tcp_v6_check_established(sk,
447 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
449 spin_unlock(&head->lock);
456 spin_unlock(&head->lock);
460 return -EADDRNOTAVAIL;
465 /* Head lock still held and bh's disabled */
466 inet_bind_hash(sk, tb, port);
467 if (sk_unhashed(sk)) {
468 inet_sk(sk)->sport = htons(port);
471 spin_unlock(&head->lock);
474 inet_twsk_deschedule(tw, &tcp_death_row);
482 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
483 tb = inet_csk(sk)->icsk_bind_hash;
484 spin_lock_bh(&head->lock);
486 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
488 spin_unlock_bh(&head->lock);
491 spin_unlock(&head->lock);
492 /* No definite answer... Walk to established hash table */
493 ret = __tcp_v6_check_established(sk, snum, NULL);
500 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
503 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
504 struct inet_sock *inet = inet_sk(sk);
505 struct ipv6_pinfo *np = inet6_sk(sk);
506 struct tcp_sock *tp = tcp_sk(sk);
507 struct in6_addr *saddr = NULL, *final_p = NULL, final;
509 struct dst_entry *dst;
513 if (addr_len < SIN6_LEN_RFC2133)
516 if (usin->sin6_family != AF_INET6)
517 return(-EAFNOSUPPORT);
519 memset(&fl, 0, sizeof(fl));
522 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
523 IP6_ECN_flow_init(fl.fl6_flowlabel);
524 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
525 struct ip6_flowlabel *flowlabel;
526 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
527 if (flowlabel == NULL)
529 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
530 fl6_sock_release(flowlabel);
535 * connect() to INADDR_ANY means loopback (BSD'ism).
538 if(ipv6_addr_any(&usin->sin6_addr))
539 usin->sin6_addr.s6_addr[15] = 0x1;
541 addr_type = ipv6_addr_type(&usin->sin6_addr);
543 if(addr_type & IPV6_ADDR_MULTICAST)
546 if (addr_type&IPV6_ADDR_LINKLOCAL) {
547 if (addr_len >= sizeof(struct sockaddr_in6) &&
548 usin->sin6_scope_id) {
549 /* If interface is set while binding, indices
552 if (sk->sk_bound_dev_if &&
553 sk->sk_bound_dev_if != usin->sin6_scope_id)
556 sk->sk_bound_dev_if = usin->sin6_scope_id;
559 /* Connect to link-local address requires an interface */
560 if (!sk->sk_bound_dev_if)
564 if (tp->rx_opt.ts_recent_stamp &&
565 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
566 tp->rx_opt.ts_recent = 0;
567 tp->rx_opt.ts_recent_stamp = 0;
571 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
572 np->flow_label = fl.fl6_flowlabel;
578 if (addr_type == IPV6_ADDR_MAPPED) {
579 u32 exthdrlen = tp->ext_header_len;
580 struct sockaddr_in sin;
582 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
584 if (__ipv6_only_sock(sk))
587 sin.sin_family = AF_INET;
588 sin.sin_port = usin->sin6_port;
589 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
591 tp->af_specific = &ipv6_mapped;
592 sk->sk_backlog_rcv = tcp_v4_do_rcv;
594 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
597 tp->ext_header_len = exthdrlen;
598 tp->af_specific = &ipv6_specific;
599 sk->sk_backlog_rcv = tcp_v6_do_rcv;
602 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
604 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
611 if (!ipv6_addr_any(&np->rcv_saddr))
612 saddr = &np->rcv_saddr;
614 fl.proto = IPPROTO_TCP;
615 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
616 ipv6_addr_copy(&fl.fl6_src,
617 (saddr ? saddr : &np->saddr));
618 fl.oif = sk->sk_bound_dev_if;
619 fl.fl_ip_dport = usin->sin6_port;
620 fl.fl_ip_sport = inet->sport;
622 if (np->opt && np->opt->srcrt) {
623 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
624 ipv6_addr_copy(&final, &fl.fl6_dst);
625 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
629 err = ip6_dst_lookup(sk, &dst, &fl);
633 ipv6_addr_copy(&fl.fl6_dst, final_p);
635 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
642 ipv6_addr_copy(&np->rcv_saddr, saddr);
645 /* set the source address */
646 ipv6_addr_copy(&np->saddr, saddr);
647 inet->rcv_saddr = LOOPBACK4_IPV6;
649 ip6_dst_store(sk, dst, NULL);
650 sk->sk_route_caps = dst->dev->features &
651 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
653 tp->ext_header_len = 0;
655 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
657 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
659 inet->dport = usin->sin6_port;
661 tcp_set_state(sk, TCP_SYN_SENT);
662 err = tcp_v6_hash_connect(sk);
667 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
672 err = tcp_connect(sk);
679 tcp_set_state(sk, TCP_CLOSE);
683 sk->sk_route_caps = 0;
687 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
688 int type, int code, int offset, __u32 info)
690 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
691 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
692 struct ipv6_pinfo *np;
698 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
699 th->source, skb->dev->ifindex);
702 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
706 if (sk->sk_state == TCP_TIME_WAIT) {
707 inet_twsk_put((struct inet_timewait_sock *)sk);
712 if (sock_owned_by_user(sk))
713 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
715 if (sk->sk_state == TCP_CLOSE)
719 seq = ntohl(th->seq);
720 if (sk->sk_state != TCP_LISTEN &&
721 !between(seq, tp->snd_una, tp->snd_nxt)) {
722 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
728 if (type == ICMPV6_PKT_TOOBIG) {
729 struct dst_entry *dst = NULL;
731 if (sock_owned_by_user(sk))
733 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
736 /* icmp should have updated the destination cache entry */
737 dst = __sk_dst_check(sk, np->dst_cookie);
740 struct inet_sock *inet = inet_sk(sk);
743 /* BUGGG_FUTURE: Again, it is not clear how
744 to handle rthdr case. Ignore this complexity
747 memset(&fl, 0, sizeof(fl));
748 fl.proto = IPPROTO_TCP;
749 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
751 fl.oif = sk->sk_bound_dev_if;
752 fl.fl_ip_dport = inet->dport;
753 fl.fl_ip_sport = inet->sport;
755 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
756 sk->sk_err_soft = -err;
760 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
761 sk->sk_err_soft = -err;
768 if (tp->pmtu_cookie > dst_mtu(dst)) {
769 tcp_sync_mss(sk, dst_mtu(dst));
770 tcp_simple_retransmit(sk);
771 } /* else let the usual retransmit timer handle it */
776 icmpv6_err_convert(type, code, &err);
778 /* Might be for an request_sock */
779 switch (sk->sk_state) {
780 struct request_sock *req, **prev;
782 if (sock_owned_by_user(sk))
785 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
786 &hdr->saddr, inet6_iif(skb));
790 /* ICMPs are not backlogged, hence we cannot get
791 * an established socket here.
793 BUG_TRAP(req->sk == NULL);
795 if (seq != tcp_rsk(req)->snt_isn) {
796 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
800 inet_csk_reqsk_queue_drop(sk, req, prev);
804 case TCP_SYN_RECV: /* Cannot happen.
805 It can, it SYNs are crossed. --ANK */
806 if (!sock_owned_by_user(sk)) {
807 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
809 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
813 sk->sk_err_soft = err;
817 if (!sock_owned_by_user(sk) && np->recverr) {
819 sk->sk_error_report(sk);
821 sk->sk_err_soft = err;
829 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
830 struct dst_entry *dst)
832 struct tcp6_request_sock *treq = tcp6_rsk(req);
833 struct ipv6_pinfo *np = inet6_sk(sk);
834 struct sk_buff * skb;
835 struct ipv6_txoptions *opt = NULL;
836 struct in6_addr * final_p = NULL, final;
840 memset(&fl, 0, sizeof(fl));
841 fl.proto = IPPROTO_TCP;
842 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
843 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
844 fl.fl6_flowlabel = 0;
846 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
847 fl.fl_ip_sport = inet_sk(sk)->sport;
852 np->rxopt.bits.osrcrt == 2 &&
854 struct sk_buff *pktopts = treq->pktopts;
855 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
857 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
860 if (opt && opt->srcrt) {
861 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
862 ipv6_addr_copy(&final, &fl.fl6_dst);
863 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
867 err = ip6_dst_lookup(sk, &dst, &fl);
871 ipv6_addr_copy(&fl.fl6_dst, final_p);
872 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
876 skb = tcp_make_synack(sk, dst, req);
878 struct tcphdr *th = skb->h.th;
880 th->check = tcp_v6_check(th, skb->len,
881 &treq->loc_addr, &treq->rmt_addr,
882 csum_partial((char *)th, skb->len, skb->csum));
884 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
885 err = ip6_xmit(sk, skb, &fl, opt, 0);
886 if (err == NET_XMIT_CN)
892 if (opt && opt != np->opt)
893 sock_kfree_s(sk, opt, opt->tot_len);
897 static void tcp_v6_reqsk_destructor(struct request_sock *req)
899 if (tcp6_rsk(req)->pktopts)
900 kfree_skb(tcp6_rsk(req)->pktopts);
903 static struct request_sock_ops tcp6_request_sock_ops = {
905 .obj_size = sizeof(struct tcp6_request_sock),
906 .rtx_syn_ack = tcp_v6_send_synack,
907 .send_ack = tcp_v6_reqsk_send_ack,
908 .destructor = tcp_v6_reqsk_destructor,
909 .send_reset = tcp_v6_send_reset
912 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct inet6_skb_parm *opt = IP6CB(skb);
918 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
919 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
920 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
921 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
928 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
931 struct ipv6_pinfo *np = inet6_sk(sk);
933 if (skb->ip_summed == CHECKSUM_HW) {
934 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
935 skb->csum = offsetof(struct tcphdr, check);
937 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
938 csum_partial((char *)th, th->doff<<2,
944 static void tcp_v6_send_reset(struct sk_buff *skb)
946 struct tcphdr *th = skb->h.th, *t1;
947 struct sk_buff *buff;
953 if (!ipv6_unicast_destination(skb))
957 * We need to grab some memory, and put together an RST,
958 * and then put it into the queue to be sent.
961 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
966 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
968 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
970 /* Swap the send and the receive. */
971 memset(t1, 0, sizeof(*t1));
972 t1->dest = th->source;
973 t1->source = th->dest;
974 t1->doff = sizeof(*t1)/4;
978 t1->seq = th->ack_seq;
981 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
982 + skb->len - (th->doff<<2));
985 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
987 memset(&fl, 0, sizeof(fl));
988 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
989 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
991 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
992 sizeof(*t1), IPPROTO_TCP,
995 fl.proto = IPPROTO_TCP;
996 fl.oif = inet6_iif(skb);
997 fl.fl_ip_dport = t1->dest;
998 fl.fl_ip_sport = t1->source;
1000 /* sk = NULL, but it is safe for now. RST socket required. */
1001 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1003 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1004 dst_release(buff->dst);
1008 ip6_xmit(NULL, buff, &fl, NULL, 0);
1009 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1010 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1017 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1019 struct tcphdr *th = skb->h.th, *t1;
1020 struct sk_buff *buff;
1022 int tot_len = sizeof(struct tcphdr);
1027 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1032 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1034 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1036 /* Swap the send and the receive. */
1037 memset(t1, 0, sizeof(*t1));
1038 t1->dest = th->source;
1039 t1->source = th->dest;
1040 t1->doff = tot_len/4;
1041 t1->seq = htonl(seq);
1042 t1->ack_seq = htonl(ack);
1044 t1->window = htons(win);
1047 u32 *ptr = (u32*)(t1 + 1);
1048 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1049 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1050 *ptr++ = htonl(tcp_time_stamp);
1054 buff->csum = csum_partial((char *)t1, tot_len, 0);
1056 memset(&fl, 0, sizeof(fl));
1057 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1058 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1060 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1061 tot_len, IPPROTO_TCP,
1064 fl.proto = IPPROTO_TCP;
1065 fl.oif = inet6_iif(skb);
1066 fl.fl_ip_dport = t1->dest;
1067 fl.fl_ip_sport = t1->source;
1069 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1070 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1071 dst_release(buff->dst);
1074 ip6_xmit(NULL, buff, &fl, NULL, 0);
1075 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1082 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1084 struct inet_timewait_sock *tw = inet_twsk(sk);
1085 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1087 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1088 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1089 tcptw->tw_ts_recent);
1094 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1096 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1100 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1102 struct request_sock *req, **prev;
1103 const struct tcphdr *th = skb->h.th;
1106 /* Find possible connection requests. */
1107 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1108 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1110 return tcp_check_req(sk, skb, req, prev);
1112 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1113 th->source, &skb->nh.ipv6h->daddr,
1114 ntohs(th->dest), inet6_iif(skb));
1117 if (nsk->sk_state != TCP_TIME_WAIT) {
1121 inet_twsk_put((struct inet_timewait_sock *)nsk);
1125 #if 0 /*def CONFIG_SYN_COOKIES*/
1126 if (!th->rst && !th->syn && th->ack)
1127 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1132 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1134 struct inet_connection_sock *icsk = inet_csk(sk);
1135 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1136 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1138 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1139 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1143 /* FIXME: this is substantially similar to the ipv4 code.
1144 * Can some kind of merge be done? -- erics
1146 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1148 struct tcp6_request_sock *treq;
1149 struct ipv6_pinfo *np = inet6_sk(sk);
1150 struct tcp_options_received tmp_opt;
1151 struct tcp_sock *tp = tcp_sk(sk);
1152 struct request_sock *req = NULL;
1153 __u32 isn = TCP_SKB_CB(skb)->when;
1155 if (skb->protocol == htons(ETH_P_IP))
1156 return tcp_v4_conn_request(sk, skb);
1158 if (!ipv6_unicast_destination(skb))
1162 * There are no SYN attacks on IPv6, yet...
1164 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1165 if (net_ratelimit())
1166 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1170 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1173 req = reqsk_alloc(&tcp6_request_sock_ops);
1177 tcp_clear_options(&tmp_opt);
1178 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1179 tmp_opt.user_mss = tp->rx_opt.user_mss;
1181 tcp_parse_options(skb, &tmp_opt, 0);
1183 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1184 tcp_openreq_init(req, &tmp_opt, skb);
1186 treq = tcp6_rsk(req);
1187 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1188 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1189 TCP_ECN_create_request(req, skb->h.th);
1190 treq->pktopts = NULL;
1191 if (ipv6_opt_accepted(sk, skb) ||
1192 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1193 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1194 atomic_inc(&skb->users);
1195 treq->pktopts = skb;
1197 treq->iif = sk->sk_bound_dev_if;
1199 /* So that link locals have meaning */
1200 if (!sk->sk_bound_dev_if &&
1201 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1202 treq->iif = inet6_iif(skb);
1205 isn = tcp_v6_init_sequence(sk,skb);
1207 tcp_rsk(req)->snt_isn = isn;
1209 if (tcp_v6_send_synack(sk, req, NULL))
1212 tcp_v6_synq_add(sk, req);
1220 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1221 return 0; /* don't send reset */
1224 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1225 struct request_sock *req,
1226 struct dst_entry *dst)
1228 struct tcp6_request_sock *treq = tcp6_rsk(req);
1229 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1230 struct tcp6_sock *newtcp6sk;
1231 struct inet_sock *newinet;
1232 struct tcp_sock *newtp;
1234 struct ipv6_txoptions *opt;
1236 if (skb->protocol == htons(ETH_P_IP)) {
1241 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1246 newtcp6sk = (struct tcp6_sock *)newsk;
1247 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1249 newinet = inet_sk(newsk);
1250 newnp = inet6_sk(newsk);
1251 newtp = tcp_sk(newsk);
1253 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1255 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1258 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1261 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1263 newtp->af_specific = &ipv6_mapped;
1264 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1265 newnp->pktoptions = NULL;
1267 newnp->mcast_oif = inet6_iif(skb);
1268 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1271 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1272 * here, tcp_create_openreq_child now does this for us, see the comment in
1273 * that function for the gory details. -acme
1276 /* It is tricky place. Until this moment IPv4 tcp
1277 worked with IPv6 af_tcp.af_specific.
1280 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1287 if (sk_acceptq_is_full(sk))
1290 if (np->rxopt.bits.osrcrt == 2 &&
1291 opt == NULL && treq->pktopts) {
1292 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1294 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1298 struct in6_addr *final_p = NULL, final;
1301 memset(&fl, 0, sizeof(fl));
1302 fl.proto = IPPROTO_TCP;
1303 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1304 if (opt && opt->srcrt) {
1305 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1306 ipv6_addr_copy(&final, &fl.fl6_dst);
1307 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1310 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1311 fl.oif = sk->sk_bound_dev_if;
1312 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1313 fl.fl_ip_sport = inet_sk(sk)->sport;
1315 if (ip6_dst_lookup(sk, &dst, &fl))
1319 ipv6_addr_copy(&fl.fl6_dst, final_p);
1321 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1325 newsk = tcp_create_openreq_child(sk, req, skb);
1330 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1331 * count here, tcp_create_openreq_child now does this for us, see the
1332 * comment in that function for the gory details. -acme
1335 ip6_dst_store(newsk, dst, NULL);
1336 newsk->sk_route_caps = dst->dev->features &
1337 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1339 newtcp6sk = (struct tcp6_sock *)newsk;
1340 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1342 newtp = tcp_sk(newsk);
1343 newinet = inet_sk(newsk);
1344 newnp = inet6_sk(newsk);
1346 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1348 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1349 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1350 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1351 newsk->sk_bound_dev_if = treq->iif;
1353 /* Now IPv6 options...
1355 First: no IPv4 options.
1357 newinet->opt = NULL;
1360 newnp->rxopt.all = np->rxopt.all;
1362 /* Clone pktoptions received with SYN */
1363 newnp->pktoptions = NULL;
1364 if (treq->pktopts != NULL) {
1365 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1366 kfree_skb(treq->pktopts);
1367 treq->pktopts = NULL;
1368 if (newnp->pktoptions)
1369 skb_set_owner_r(newnp->pktoptions, newsk);
1372 newnp->mcast_oif = inet6_iif(skb);
1373 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1375 /* Clone native IPv6 options from listening socket (if any)
1377 Yes, keeping reference count would be much more clever,
1378 but we make one more one thing there: reattach optmem
1382 newnp->opt = ipv6_dup_options(newsk, opt);
1384 sock_kfree_s(sk, opt, opt->tot_len);
1387 newtp->ext_header_len = 0;
1389 newtp->ext_header_len = newnp->opt->opt_nflen +
1390 newnp->opt->opt_flen;
1392 tcp_sync_mss(newsk, dst_mtu(dst));
1393 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1394 tcp_initialize_rcv_mss(newsk);
1396 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1398 __tcp_v6_hash(newsk);
1399 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1404 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1406 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1407 if (opt && opt != np->opt)
1408 sock_kfree_s(sk, opt, opt->tot_len);
1413 static int tcp_v6_checksum_init(struct sk_buff *skb)
1415 if (skb->ip_summed == CHECKSUM_HW) {
1416 skb->ip_summed = CHECKSUM_UNNECESSARY;
1417 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1418 &skb->nh.ipv6h->daddr,skb->csum))
1420 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1422 if (skb->len <= 76) {
1423 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1424 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1426 skb->ip_summed = CHECKSUM_UNNECESSARY;
1428 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1429 &skb->nh.ipv6h->daddr,0);
1434 /* The socket must have it's spinlock held when we get
1437 * We have a potential double-lock case here, so even when
1438 * doing backlog processing we use the BH locking scheme.
1439 * This is because we cannot sleep with the original spinlock
1442 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1444 struct ipv6_pinfo *np = inet6_sk(sk);
1445 struct tcp_sock *tp;
1446 struct sk_buff *opt_skb = NULL;
1448 /* Imagine: socket is IPv6. IPv4 packet arrives,
1449 goes to IPv4 receive handler and backlogged.
1450 From backlog it always goes here. Kerboom...
1451 Fortunately, tcp_rcv_established and rcv_established
1452 handle them correctly, but it is not case with
1453 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1456 if (skb->protocol == htons(ETH_P_IP))
1457 return tcp_v4_do_rcv(sk, skb);
1459 if (sk_filter(sk, skb, 0))
1463 * socket locking is here for SMP purposes as backlog rcv
1464 * is currently called with bh processing disabled.
1467 /* Do Stevens' IPV6_PKTOPTIONS.
1469 Yes, guys, it is the only place in our code, where we
1470 may make it not affecting IPv4.
1471 The rest of code is protocol independent,
1472 and I do not like idea to uglify IPv4.
1474 Actually, all the idea behind IPV6_PKTOPTIONS
1475 looks not very well thought. For now we latch
1476 options, received in the last packet, enqueued
1477 by tcp. Feel free to propose better solution.
1481 opt_skb = skb_clone(skb, GFP_ATOMIC);
1483 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1484 TCP_CHECK_TIMER(sk);
1485 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1487 TCP_CHECK_TIMER(sk);
1489 goto ipv6_pktoptions;
1493 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1496 if (sk->sk_state == TCP_LISTEN) {
1497 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1502 * Queue it on the new socket if the new socket is active,
1503 * otherwise we just shortcircuit this and continue with
1507 if (tcp_child_process(sk, nsk, skb))
1510 __kfree_skb(opt_skb);
1515 TCP_CHECK_TIMER(sk);
1516 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1518 TCP_CHECK_TIMER(sk);
1520 goto ipv6_pktoptions;
1524 tcp_v6_send_reset(skb);
1527 __kfree_skb(opt_skb);
1531 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1536 /* Do you ask, what is it?
1538 1. skb was enqueued by tcp.
1539 2. skb is added to tail of read queue, rather than out of order.
1540 3. socket is not in passive state.
1541 4. Finally, it really contains options, which user wants to receive.
1544 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1545 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1546 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1547 np->mcast_oif = inet6_iif(opt_skb);
1548 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1549 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1550 if (ipv6_opt_accepted(sk, opt_skb)) {
1551 skb_set_owner_r(opt_skb, sk);
1552 opt_skb = xchg(&np->pktoptions, opt_skb);
1554 __kfree_skb(opt_skb);
1555 opt_skb = xchg(&np->pktoptions, NULL);
1564 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1566 struct sk_buff *skb = *pskb;
1571 if (skb->pkt_type != PACKET_HOST)
1575 * Count it even if it's bad.
1577 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1579 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1584 if (th->doff < sizeof(struct tcphdr)/4)
1586 if (!pskb_may_pull(skb, th->doff*4))
1589 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1590 tcp_v6_checksum_init(skb) < 0))
1594 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1595 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1596 skb->len - th->doff*4);
1597 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1598 TCP_SKB_CB(skb)->when = 0;
1599 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1600 TCP_SKB_CB(skb)->sacked = 0;
1602 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1603 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1610 if (sk->sk_state == TCP_TIME_WAIT)
1613 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1614 goto discard_and_relse;
1616 if (sk_filter(sk, skb, 0))
1617 goto discard_and_relse;
1623 if (!sock_owned_by_user(sk)) {
1624 if (!tcp_prequeue(sk, skb))
1625 ret = tcp_v6_do_rcv(sk, skb);
1627 sk_add_backlog(sk, skb);
1631 return ret ? -1 : 0;
1634 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1637 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1639 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641 tcp_v6_send_reset(skb);
1658 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1659 inet_twsk_put((struct inet_timewait_sock *)sk);
1663 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1664 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1665 inet_twsk_put((struct inet_timewait_sock *)sk);
1669 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1675 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1676 &skb->nh.ipv6h->daddr,
1677 ntohs(th->dest), inet6_iif(skb));
1679 struct inet_timewait_sock *tw = inet_twsk(sk);
1680 inet_twsk_deschedule(tw, &tcp_death_row);
1685 /* Fall through to ACK */
1688 tcp_v6_timewait_ack(sk, skb);
1692 case TCP_TW_SUCCESS:;
1697 static int tcp_v6_rebuild_header(struct sock *sk)
1700 struct dst_entry *dst;
1701 struct ipv6_pinfo *np = inet6_sk(sk);
1703 dst = __sk_dst_check(sk, np->dst_cookie);
1706 struct inet_sock *inet = inet_sk(sk);
1707 struct in6_addr *final_p = NULL, final;
1710 memset(&fl, 0, sizeof(fl));
1711 fl.proto = IPPROTO_TCP;
1712 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1713 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1714 fl.fl6_flowlabel = np->flow_label;
1715 fl.oif = sk->sk_bound_dev_if;
1716 fl.fl_ip_dport = inet->dport;
1717 fl.fl_ip_sport = inet->sport;
1719 if (np->opt && np->opt->srcrt) {
1720 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1721 ipv6_addr_copy(&final, &fl.fl6_dst);
1722 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1726 err = ip6_dst_lookup(sk, &dst, &fl);
1728 sk->sk_route_caps = 0;
1732 ipv6_addr_copy(&fl.fl6_dst, final_p);
1734 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1735 sk->sk_err_soft = -err;
1740 ip6_dst_store(sk, dst, NULL);
1741 sk->sk_route_caps = dst->dev->features &
1742 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1748 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1750 struct sock *sk = skb->sk;
1751 struct inet_sock *inet = inet_sk(sk);
1752 struct ipv6_pinfo *np = inet6_sk(sk);
1754 struct dst_entry *dst;
1755 struct in6_addr *final_p = NULL, final;
1757 memset(&fl, 0, sizeof(fl));
1758 fl.proto = IPPROTO_TCP;
1759 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1760 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1761 fl.fl6_flowlabel = np->flow_label;
1762 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1763 fl.oif = sk->sk_bound_dev_if;
1764 fl.fl_ip_sport = inet->sport;
1765 fl.fl_ip_dport = inet->dport;
1767 if (np->opt && np->opt->srcrt) {
1768 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1769 ipv6_addr_copy(&final, &fl.fl6_dst);
1770 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1774 dst = __sk_dst_check(sk, np->dst_cookie);
1777 int err = ip6_dst_lookup(sk, &dst, &fl);
1780 sk->sk_err_soft = -err;
1785 ipv6_addr_copy(&fl.fl6_dst, final_p);
1787 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1788 sk->sk_route_caps = 0;
1793 ip6_dst_store(sk, dst, NULL);
1794 sk->sk_route_caps = dst->dev->features &
1795 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1798 skb->dst = dst_clone(dst);
1800 /* Restore final destination back after routing done */
1801 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1803 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1806 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1808 struct ipv6_pinfo *np = inet6_sk(sk);
1809 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1811 sin6->sin6_family = AF_INET6;
1812 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1813 sin6->sin6_port = inet_sk(sk)->dport;
1814 /* We do not store received flowlabel for TCP */
1815 sin6->sin6_flowinfo = 0;
1816 sin6->sin6_scope_id = 0;
1817 if (sk->sk_bound_dev_if &&
1818 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1819 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1822 static int tcp_v6_remember_stamp(struct sock *sk)
1824 /* Alas, not yet... */
1828 static struct tcp_func ipv6_specific = {
1829 .queue_xmit = tcp_v6_xmit,
1830 .send_check = tcp_v6_send_check,
1831 .rebuild_header = tcp_v6_rebuild_header,
1832 .conn_request = tcp_v6_conn_request,
1833 .syn_recv_sock = tcp_v6_syn_recv_sock,
1834 .remember_stamp = tcp_v6_remember_stamp,
1835 .net_header_len = sizeof(struct ipv6hdr),
1837 .setsockopt = ipv6_setsockopt,
1838 .getsockopt = ipv6_getsockopt,
1839 .addr2sockaddr = v6_addr2sockaddr,
1840 .sockaddr_len = sizeof(struct sockaddr_in6)
1844 * TCP over IPv4 via INET6 API
1847 static struct tcp_func ipv6_mapped = {
1848 .queue_xmit = ip_queue_xmit,
1849 .send_check = tcp_v4_send_check,
1850 .rebuild_header = inet_sk_rebuild_header,
1851 .conn_request = tcp_v6_conn_request,
1852 .syn_recv_sock = tcp_v6_syn_recv_sock,
1853 .remember_stamp = tcp_v4_remember_stamp,
1854 .net_header_len = sizeof(struct iphdr),
1856 .setsockopt = ipv6_setsockopt,
1857 .getsockopt = ipv6_getsockopt,
1858 .addr2sockaddr = v6_addr2sockaddr,
1859 .sockaddr_len = sizeof(struct sockaddr_in6)
1864 /* NOTE: A lot of things set to zero explicitly by call to
1865 * sk_alloc() so need not be done here.
1867 static int tcp_v6_init_sock(struct sock *sk)
1869 struct inet_connection_sock *icsk = inet_csk(sk);
1870 struct tcp_sock *tp = tcp_sk(sk);
1872 skb_queue_head_init(&tp->out_of_order_queue);
1873 tcp_init_xmit_timers(sk);
1874 tcp_prequeue_init(tp);
1876 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1877 tp->mdev = TCP_TIMEOUT_INIT;
1879 /* So many TCP implementations out there (incorrectly) count the
1880 * initial SYN frame in their delayed-ACK and congestion control
1881 * algorithms that we must have the following bandaid to talk
1882 * efficiently to them. -DaveM
1886 /* See draft-stevens-tcpca-spec-01 for discussion of the
1887 * initialization of these values.
1889 tp->snd_ssthresh = 0x7fffffff;
1890 tp->snd_cwnd_clamp = ~0;
1891 tp->mss_cache = 536;
1893 tp->reordering = sysctl_tcp_reordering;
1895 sk->sk_state = TCP_CLOSE;
1897 tp->af_specific = &ipv6_specific;
1898 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1899 sk->sk_write_space = sk_stream_write_space;
1900 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1903 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1905 atomic_inc(&tcp_sockets_allocated);
1910 static int tcp_v6_destroy_sock(struct sock *sk)
1912 tcp_v4_destroy_sock(sk);
1913 return inet6_destroy_sock(sk);
1916 /* Proc filesystem TCPv6 sock list dumping. */
1917 static void get_openreq6(struct seq_file *seq,
1918 struct sock *sk, struct request_sock *req, int i, int uid)
1920 struct in6_addr *dest, *src;
1921 int ttd = req->expires - jiffies;
1926 src = &tcp6_rsk(req)->loc_addr;
1927 dest = &tcp6_rsk(req)->rmt_addr;
1929 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1930 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1932 src->s6_addr32[0], src->s6_addr32[1],
1933 src->s6_addr32[2], src->s6_addr32[3],
1934 ntohs(inet_sk(sk)->sport),
1935 dest->s6_addr32[0], dest->s6_addr32[1],
1936 dest->s6_addr32[2], dest->s6_addr32[3],
1937 ntohs(inet_rsk(req)->rmt_port),
1939 0,0, /* could print option size, but that is af dependent. */
1940 1, /* timers active (only the expire timer) */
1941 jiffies_to_clock_t(ttd),
1944 0, /* non standard timer */
1945 0, /* open_requests have no inode */
1949 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1951 struct in6_addr *dest, *src;
1954 unsigned long timer_expires;
1955 struct inet_sock *inet = inet_sk(sp);
1956 struct tcp_sock *tp = tcp_sk(sp);
1957 const struct inet_connection_sock *icsk = inet_csk(sp);
1958 struct ipv6_pinfo *np = inet6_sk(sp);
1961 src = &np->rcv_saddr;
1962 destp = ntohs(inet->dport);
1963 srcp = ntohs(inet->sport);
1965 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1967 timer_expires = icsk->icsk_timeout;
1968 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1970 timer_expires = icsk->icsk_timeout;
1971 } else if (timer_pending(&sp->sk_timer)) {
1973 timer_expires = sp->sk_timer.expires;
1976 timer_expires = jiffies;
1980 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1981 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1983 src->s6_addr32[0], src->s6_addr32[1],
1984 src->s6_addr32[2], src->s6_addr32[3], srcp,
1985 dest->s6_addr32[0], dest->s6_addr32[1],
1986 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1988 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1990 jiffies_to_clock_t(timer_expires - jiffies),
1991 icsk->icsk_retransmits,
1993 icsk->icsk_probes_out,
1995 atomic_read(&sp->sk_refcnt), sp,
1998 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1999 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2003 static void get_timewait6_sock(struct seq_file *seq,
2004 struct inet_timewait_sock *tw, int i)
2006 struct in6_addr *dest, *src;
2008 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2009 int ttd = tw->tw_ttd - jiffies;
2014 dest = &tcp6tw->tw_v6_daddr;
2015 src = &tcp6tw->tw_v6_rcv_saddr;
2016 destp = ntohs(tw->tw_dport);
2017 srcp = ntohs(tw->tw_sport);
2020 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2021 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2023 src->s6_addr32[0], src->s6_addr32[1],
2024 src->s6_addr32[2], src->s6_addr32[3], srcp,
2025 dest->s6_addr32[0], dest->s6_addr32[1],
2026 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2027 tw->tw_substate, 0, 0,
2028 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2029 atomic_read(&tw->tw_refcnt), tw);
2032 #ifdef CONFIG_PROC_FS
2033 static int tcp6_seq_show(struct seq_file *seq, void *v)
2035 struct tcp_iter_state *st;
2037 if (v == SEQ_START_TOKEN) {
2042 "st tx_queue rx_queue tr tm->when retrnsmt"
2043 " uid timeout inode\n");
2048 switch (st->state) {
2049 case TCP_SEQ_STATE_LISTENING:
2050 case TCP_SEQ_STATE_ESTABLISHED:
2051 get_tcp6_sock(seq, v, st->num);
2053 case TCP_SEQ_STATE_OPENREQ:
2054 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2056 case TCP_SEQ_STATE_TIME_WAIT:
2057 get_timewait6_sock(seq, v, st->num);
2064 static struct file_operations tcp6_seq_fops;
2065 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2066 .owner = THIS_MODULE,
2069 .seq_show = tcp6_seq_show,
2070 .seq_fops = &tcp6_seq_fops,
2073 int __init tcp6_proc_init(void)
2075 return tcp_proc_register(&tcp6_seq_afinfo);
2078 void tcp6_proc_exit(void)
2080 tcp_proc_unregister(&tcp6_seq_afinfo);
2084 struct proto tcpv6_prot = {
2086 .owner = THIS_MODULE,
2088 .connect = tcp_v6_connect,
2089 .disconnect = tcp_disconnect,
2090 .accept = inet_csk_accept,
2092 .init = tcp_v6_init_sock,
2093 .destroy = tcp_v6_destroy_sock,
2094 .shutdown = tcp_shutdown,
2095 .setsockopt = tcp_setsockopt,
2096 .getsockopt = tcp_getsockopt,
2097 .sendmsg = tcp_sendmsg,
2098 .recvmsg = tcp_recvmsg,
2099 .backlog_rcv = tcp_v6_do_rcv,
2100 .hash = tcp_v6_hash,
2101 .unhash = tcp_unhash,
2102 .get_port = tcp_v6_get_port,
2103 .enter_memory_pressure = tcp_enter_memory_pressure,
2104 .sockets_allocated = &tcp_sockets_allocated,
2105 .memory_allocated = &tcp_memory_allocated,
2106 .memory_pressure = &tcp_memory_pressure,
2107 .orphan_count = &tcp_orphan_count,
2108 .sysctl_mem = sysctl_tcp_mem,
2109 .sysctl_wmem = sysctl_tcp_wmem,
2110 .sysctl_rmem = sysctl_tcp_rmem,
2111 .max_header = MAX_TCP_HEADER,
2112 .obj_size = sizeof(struct tcp6_sock),
2113 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2114 .rsk_prot = &tcp6_request_sock_ops,
2117 static struct inet6_protocol tcpv6_protocol = {
2118 .handler = tcp_v6_rcv,
2119 .err_handler = tcp_v6_err,
2120 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2123 static struct inet_protosw tcpv6_protosw = {
2124 .type = SOCK_STREAM,
2125 .protocol = IPPROTO_TCP,
2126 .prot = &tcpv6_prot,
2127 .ops = &inet6_stream_ops,
2130 .flags = INET_PROTOSW_PERMANENT,
2133 void __init tcpv6_init(void)
2135 /* register inet6 protocol */
2136 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2137 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2138 inet6_register_protosw(&tcpv6_protosw);