2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
157 * xmit an sk_buff (used by TCP)
160 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
161 struct ipv6_txoptions *opt, int ipfragok)
163 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
164 struct in6_addr *first_hop = &fl->fl6_dst;
165 struct dst_entry *dst = skb->dst;
167 u8 proto = fl->proto;
168 int seg_len = skb->len;
175 /* First: exthdrs may take lots of space (~8K for now)
176 MAX_HEADER is not enough.
178 head_room = opt->opt_nflen + opt->opt_flen;
179 seg_len += head_room;
180 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 if (skb_headroom(skb) < head_room) {
183 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
187 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
191 skb_set_owner_w(skb, sk);
194 ipv6_push_frag_opts(skb, opt, &proto);
196 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
202 * Fill in the IPv6 header
205 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
208 hlimit = np->hop_limit;
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
212 hlimit = ipv6_get_hoplimit(dst->dev);
214 hdr->payload_len = htons(seg_len);
215 hdr->nexthdr = proto;
216 hdr->hop_limit = hlimit;
218 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
219 ipv6_addr_copy(&hdr->daddr, first_hop);
222 if ((skb->len <= mtu) || ipfragok) {
223 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
224 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
229 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
231 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
232 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
238 * To avoid extra problems ND packets are send through this
239 * routine. It's code duplication but I really want to avoid
240 * extra checks since ipv6_build_header is used by TCP (which
241 * is for us performance critical)
244 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
245 struct in6_addr *saddr, struct in6_addr *daddr,
248 struct ipv6_pinfo *np = inet6_sk(sk);
252 skb->protocol = htons(ETH_P_IPV6);
255 totlen = len + sizeof(struct ipv6hdr);
257 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
260 *(u32*)hdr = htonl(0x60000000);
262 hdr->payload_len = htons(len);
263 hdr->nexthdr = proto;
264 hdr->hop_limit = np->hop_limit;
266 ipv6_addr_copy(&hdr->saddr, saddr);
267 ipv6_addr_copy(&hdr->daddr, daddr);
272 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
274 struct ip6_ra_chain *ra;
275 struct sock *last = NULL;
277 read_lock(&ip6_ra_lock);
278 for (ra = ip6_ra_chain; ra; ra = ra->next) {
279 struct sock *sk = ra->sk;
280 if (sk && ra->sel == sel) {
282 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
284 rawv6_rcv(last, skb2);
291 rawv6_rcv(last, skb);
292 read_unlock(&ip6_ra_lock);
295 read_unlock(&ip6_ra_lock);
299 static inline int ip6_forward_finish(struct sk_buff *skb)
301 return dst_output(skb);
304 int ip6_forward(struct sk_buff *skb)
306 struct dst_entry *dst = skb->dst;
307 struct ipv6hdr *hdr = skb->nh.ipv6h;
308 struct inet6_skb_parm *opt = IP6CB(skb);
310 if (ipv6_devconf.forwarding == 0)
313 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
314 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
318 skb->ip_summed = CHECKSUM_NONE;
321 * We DO NOT make any processing on
322 * RA packets, pushing them to user level AS IS
323 * without ane WARRANTY that application will be able
324 * to interpret them. The reason is that we
325 * cannot make anything clever here.
327 * We are not end-node, so that if packet contains
328 * AH/ESP, we cannot make anything.
329 * Defragmentation also would be mistake, RA packets
330 * cannot be fragmented, because there is no warranty
331 * that different fragments will go along one path. --ANK
334 u8 *ptr = skb->nh.raw + opt->ra;
335 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
340 * check and decrement ttl
342 if (hdr->hop_limit <= 1) {
343 /* Force OUTPUT device used as source address */
345 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
352 if (!xfrm6_route_forward(skb)) {
353 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
358 /* IPv6 specs say nothing about it, but it is clear that we cannot
359 send redirects to source routed frames.
361 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
362 struct in6_addr *target = NULL;
364 struct neighbour *n = dst->neighbour;
367 * incoming and outgoing devices are the same
371 rt = (struct rt6_info *) dst;
372 if ((rt->rt6i_flags & RTF_GATEWAY))
373 target = (struct in6_addr*)&n->primary_key;
375 target = &hdr->daddr;
377 /* Limit redirects both by destination (here)
378 and by source (inside ndisc_send_redirect)
380 if (xrlim_allow(dst, 1*HZ))
381 ndisc_send_redirect(skb, n, target);
382 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
383 |IPV6_ADDR_LINKLOCAL)) {
384 /* This check is security critical. */
388 if (skb->len > dst_mtu(dst)) {
389 /* Again, force OUTPUT device used as source address */
391 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
392 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
393 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
398 if (skb_cow(skb, dst->dev->hard_header_len)) {
399 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
405 /* Mangling hops number delayed to point after skb COW */
409 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
410 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
413 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
419 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
421 to->pkt_type = from->pkt_type;
422 to->priority = from->priority;
423 to->protocol = from->protocol;
424 dst_release(to->dst);
425 to->dst = dst_clone(from->dst);
428 #ifdef CONFIG_NET_SCHED
429 to->tc_index = from->tc_index;
431 #ifdef CONFIG_NETFILTER
432 to->nfmark = from->nfmark;
433 /* Connection association is same as pre-frag packet */
434 to->nfct = from->nfct;
435 nf_conntrack_get(to->nfct);
436 to->nfctinfo = from->nfctinfo;
437 #ifdef CONFIG_BRIDGE_NETFILTER
438 nf_bridge_put(to->nf_bridge);
439 to->nf_bridge = from->nf_bridge;
440 nf_bridge_get(to->nf_bridge);
445 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
447 u16 offset = sizeof(struct ipv6hdr);
448 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
449 unsigned int packet_len = skb->tail - skb->nh.raw;
451 *nexthdr = &skb->nh.ipv6h->nexthdr;
453 while (offset + 1 <= packet_len) {
458 case NEXTHDR_ROUTING:
460 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
461 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
462 offset += ipv6_optlen(exthdr);
463 *nexthdr = &exthdr->nexthdr;
464 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
474 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
476 struct net_device *dev;
477 struct sk_buff *frag;
478 struct rt6_info *rt = (struct rt6_info*)skb->dst;
479 struct ipv6hdr *tmp_hdr;
481 unsigned int mtu, hlen, left, len;
483 int ptr, offset = 0, err=0;
484 u8 *prevhdr, nexthdr = 0;
487 hlen = ip6_find_1stfragopt(skb, &prevhdr);
490 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
492 if (skb_shinfo(skb)->frag_list) {
493 int first_len = skb_pagelen(skb);
495 if (first_len - hlen > mtu ||
496 ((first_len - hlen) & 7) ||
500 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
501 /* Correct geometry. */
502 if (frag->len > mtu ||
503 ((frag->len & 7) && frag->next) ||
504 skb_headroom(frag) < hlen)
507 /* Partially cloned skb? */
508 if (skb_shared(frag))
515 frag->destructor = sock_wfree;
516 skb->truesize -= frag->truesize;
522 frag = skb_shinfo(skb)->frag_list;
523 skb_shinfo(skb)->frag_list = NULL;
526 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
528 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
532 *prevhdr = NEXTHDR_FRAGMENT;
533 memcpy(tmp_hdr, skb->nh.raw, hlen);
534 __skb_pull(skb, hlen);
535 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
536 skb->nh.raw = __skb_push(skb, hlen);
537 memcpy(skb->nh.raw, tmp_hdr, hlen);
539 ipv6_select_ident(skb, fh);
540 fh->nexthdr = nexthdr;
542 fh->frag_off = htons(IP6_MF);
543 frag_id = fh->identification;
545 first_len = skb_pagelen(skb);
546 skb->data_len = first_len - skb_headlen(skb);
547 skb->len = first_len;
548 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
552 /* Prepare header of the next frame,
553 * before previous one went down. */
555 frag->ip_summed = CHECKSUM_NONE;
556 frag->h.raw = frag->data;
557 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
558 frag->nh.raw = __skb_push(frag, hlen);
559 memcpy(frag->nh.raw, tmp_hdr, hlen);
560 offset += skb->len - hlen - sizeof(struct frag_hdr);
561 fh->nexthdr = nexthdr;
563 fh->frag_off = htons(offset);
564 if (frag->next != NULL)
565 fh->frag_off |= htons(IP6_MF);
566 fh->identification = frag_id;
567 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
568 ip6_copy_metadata(frag, skb);
584 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
594 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
599 left = skb->len - hlen; /* Space per frame */
600 ptr = hlen; /* Where to start from */
603 * Fragment the datagram.
606 *prevhdr = NEXTHDR_FRAGMENT;
609 * Keep copying data until we run out.
613 /* IF: it doesn't fit, use 'mtu' - the data space left */
616 /* IF: we are not sending upto and including the packet end
617 then align the next start on an eight byte boundary */
625 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
626 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
627 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
633 * Set up data on packet
636 ip6_copy_metadata(frag, skb);
637 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
638 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
639 frag->nh.raw = frag->data;
640 fh = (struct frag_hdr*)(frag->data + hlen);
641 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
644 * Charge the memory for the fragment to any owner
648 skb_set_owner_w(frag, skb->sk);
651 * Copy the packet header into the new buffer.
653 memcpy(frag->nh.raw, skb->data, hlen);
656 * Build fragment header.
658 fh->nexthdr = nexthdr;
661 ipv6_select_ident(skb, fh);
662 frag_id = fh->identification;
664 fh->identification = frag_id;
667 * Copy a block of the IP datagram.
669 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
673 fh->frag_off = htons(offset);
675 fh->frag_off |= htons(IP6_MF);
676 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
682 * Put this fragment into the sending queue.
685 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
692 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
697 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
701 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
707 struct ipv6_pinfo *np = inet6_sk(sk);
709 *dst = sk_dst_check(sk, np->dst_cookie);
711 struct rt6_info *rt = (struct rt6_info*)*dst;
713 /* Yes, checking route validity in not connected
714 case is not very simple. Take into account,
715 that we do not support routing by source, TOS,
716 and MSG_DONTROUTE --ANK (980726)
718 1. If route was host route, check that
719 cached destination is current.
720 If it is network route, we still may
721 check its validity using saved pointer
722 to the last used address: daddr_cache.
723 We do not want to save whole address now,
724 (because main consumer of this service
725 is tcp, which has not this problem),
726 so that the last trick works only on connected
728 2. oif also should be the same.
731 if (((rt->rt6i_dst.plen != 128 ||
732 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
733 && (np->daddr_cache == NULL ||
734 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
735 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
743 *dst = ip6_route_output(sk, fl);
745 if ((err = (*dst)->error))
746 goto out_err_release;
748 if (ipv6_addr_any(&fl->fl6_src)) {
749 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
752 goto out_err_release;
763 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
764 void *from, int length, int transhdrlen,
765 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
768 struct inet_sock *inet = inet_sk(sk);
769 struct ipv6_pinfo *np = inet6_sk(sk);
771 unsigned int maxfraglen, fragheaderlen;
778 int csummode = CHECKSUM_NONE;
782 if (skb_queue_empty(&sk->sk_write_queue)) {
787 if (np->cork.opt == NULL) {
788 np->cork.opt = kmalloc(opt->tot_len,
790 if (unlikely(np->cork.opt == NULL))
792 } else if (np->cork.opt->tot_len < opt->tot_len) {
793 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
796 memcpy(np->cork.opt, opt, opt->tot_len);
797 inet->cork.flags |= IPCORK_OPT;
798 /* need source address above miyazawa*/
800 dst_hold(&rt->u.dst);
803 np->cork.hop_limit = hlimit;
804 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
805 if (dst_allfrag(rt->u.dst.path))
806 inet->cork.flags |= IPCORK_ALLFRAG;
807 inet->cork.length = 0;
808 sk->sk_sndmsg_page = NULL;
809 sk->sk_sndmsg_off = 0;
810 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
812 transhdrlen += exthdrlen;
816 if (inet->cork.flags & IPCORK_OPT)
820 mtu = inet->cork.fragsize;
823 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
825 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
826 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
828 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
829 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
830 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
836 * Let's try using as much space as possible.
837 * Use MTU if total length of the message fits into the MTU.
838 * Otherwise, we need to reserve fragment header and
839 * fragment alignment (= 8-15 octects, in total).
841 * Note that we may need to "move" the data from the tail of
842 * of the buffer to the new fragment when we split
845 * FIXME: It may be fragmented into multiple chunks
846 * at once if non-fragmentable extension headers
851 inet->cork.length += length;
853 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
857 /* Check if the remaining data fits into current packet. */
858 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
860 copy = maxfraglen - skb->len;
864 unsigned int datalen;
865 unsigned int fraglen;
866 unsigned int fraggap;
867 unsigned int alloclen;
868 struct sk_buff *skb_prev;
872 /* There's no room in the current skb */
874 fraggap = skb_prev->len - maxfraglen;
879 * If remaining data exceeds the mtu,
880 * we know we need more fragment(s).
882 datalen = length + fraggap;
883 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
884 datalen = maxfraglen - fragheaderlen;
886 fraglen = datalen + fragheaderlen;
887 if ((flags & MSG_MORE) &&
888 !(rt->u.dst.dev->features&NETIF_F_SG))
891 alloclen = datalen + fragheaderlen;
894 * The last fragment gets additional space at tail.
895 * Note: we overallocate on fragments with MSG_MODE
896 * because we have no idea if we're the last one.
898 if (datalen == length + fraggap)
899 alloclen += rt->u.dst.trailer_len;
902 * We just reserve space for fragment header.
903 * Note: this may be overallocation if the message
904 * (without MSG_MORE) fits into the MTU.
906 alloclen += sizeof(struct frag_hdr);
909 skb = sock_alloc_send_skb(sk,
911 (flags & MSG_DONTWAIT), &err);
914 if (atomic_read(&sk->sk_wmem_alloc) <=
916 skb = sock_wmalloc(sk,
917 alloclen + hh_len, 1,
919 if (unlikely(skb == NULL))
925 * Fill in the control structures
927 skb->ip_summed = csummode;
929 /* reserve for fragmentation */
930 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
933 * Find where to start putting bytes
935 data = skb_put(skb, fraglen);
936 skb->nh.raw = data + exthdrlen;
937 data += fragheaderlen;
938 skb->h.raw = data + exthdrlen;
941 skb->csum = skb_copy_and_csum_bits(
942 skb_prev, maxfraglen,
943 data + transhdrlen, fraggap, 0);
944 skb_prev->csum = csum_sub(skb_prev->csum,
947 skb_trim(skb_prev, maxfraglen);
949 copy = datalen - transhdrlen - fraggap;
954 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
961 length -= datalen - fraggap;
964 csummode = CHECKSUM_NONE;
967 * Put the packet on the pending queue
969 __skb_queue_tail(&sk->sk_write_queue, skb);
976 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
980 if (getfrag(from, skb_put(skb, copy),
981 offset, copy, off, skb) < 0) {
982 __skb_trim(skb, off);
987 int i = skb_shinfo(skb)->nr_frags;
988 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
989 struct page *page = sk->sk_sndmsg_page;
990 int off = sk->sk_sndmsg_off;
993 if (page && (left = PAGE_SIZE - off) > 0) {
996 if (page != frag->page) {
997 if (i == MAX_SKB_FRAGS) {
1002 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1003 frag = &skb_shinfo(skb)->frags[i];
1005 } else if(i < MAX_SKB_FRAGS) {
1006 if (copy > PAGE_SIZE)
1008 page = alloc_pages(sk->sk_allocation, 0);
1013 sk->sk_sndmsg_page = page;
1014 sk->sk_sndmsg_off = 0;
1016 skb_fill_page_desc(skb, i, page, 0, 0);
1017 frag = &skb_shinfo(skb)->frags[i];
1018 skb->truesize += PAGE_SIZE;
1019 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1024 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1028 sk->sk_sndmsg_off += copy;
1031 skb->data_len += copy;
1038 inet->cork.length -= length;
1039 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1043 int ip6_push_pending_frames(struct sock *sk)
1045 struct sk_buff *skb, *tmp_skb;
1046 struct sk_buff **tail_skb;
1047 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1048 struct inet_sock *inet = inet_sk(sk);
1049 struct ipv6_pinfo *np = inet6_sk(sk);
1050 struct ipv6hdr *hdr;
1051 struct ipv6_txoptions *opt = np->cork.opt;
1052 struct rt6_info *rt = np->cork.rt;
1053 struct flowi *fl = &inet->cork.fl;
1054 unsigned char proto = fl->proto;
1057 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1059 tail_skb = &(skb_shinfo(skb)->frag_list);
1061 /* move skb->data to ip header from ext header */
1062 if (skb->data < skb->nh.raw)
1063 __skb_pull(skb, skb->nh.raw - skb->data);
1064 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1065 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1066 *tail_skb = tmp_skb;
1067 tail_skb = &(tmp_skb->next);
1068 skb->len += tmp_skb->len;
1069 skb->data_len += tmp_skb->len;
1070 skb->truesize += tmp_skb->truesize;
1071 __sock_put(tmp_skb->sk);
1072 tmp_skb->destructor = NULL;
1076 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1077 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1078 if (opt && opt->opt_flen)
1079 ipv6_push_frag_opts(skb, opt, &proto);
1080 if (opt && opt->opt_nflen)
1081 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1083 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1085 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1087 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1088 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1090 hdr->payload_len = 0;
1091 hdr->hop_limit = np->cork.hop_limit;
1092 hdr->nexthdr = proto;
1093 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1094 ipv6_addr_copy(&hdr->daddr, final_dst);
1096 skb->dst = dst_clone(&rt->u.dst);
1097 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1098 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1101 err = np->recverr ? net_xmit_errno(err) : 0;
1107 inet->cork.flags &= ~IPCORK_OPT;
1109 kfree(np->cork.opt);
1110 np->cork.opt = NULL;
1113 dst_release(&np->cork.rt->u.dst);
1115 inet->cork.flags &= ~IPCORK_ALLFRAG;
1117 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1123 void ip6_flush_pending_frames(struct sock *sk)
1125 struct inet_sock *inet = inet_sk(sk);
1126 struct ipv6_pinfo *np = inet6_sk(sk);
1127 struct sk_buff *skb;
1129 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1130 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1134 inet->cork.flags &= ~IPCORK_OPT;
1137 kfree(np->cork.opt);
1138 np->cork.opt = NULL;
1141 dst_release(&np->cork.rt->u.dst);
1143 inet->cork.flags &= ~IPCORK_ALLFRAG;
1145 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));