2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
157 * xmit an sk_buff (used by TCP)
160 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
161 struct ipv6_txoptions *opt, int ipfragok)
163 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
164 struct in6_addr *first_hop = &fl->fl6_dst;
165 struct dst_entry *dst = skb->dst;
167 u8 proto = fl->proto;
168 int seg_len = skb->len;
175 /* First: exthdrs may take lots of space (~8K for now)
176 MAX_HEADER is not enough.
178 head_room = opt->opt_nflen + opt->opt_flen;
179 seg_len += head_room;
180 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 if (skb_headroom(skb) < head_room) {
183 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
187 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
191 skb_set_owner_w(skb, sk);
194 ipv6_push_frag_opts(skb, opt, &proto);
196 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
202 * Fill in the IPv6 header
205 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
208 hlimit = np->hop_limit;
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
212 hlimit = ipv6_get_hoplimit(dst->dev);
214 hdr->payload_len = htons(seg_len);
215 hdr->nexthdr = proto;
216 hdr->hop_limit = hlimit;
218 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
219 ipv6_addr_copy(&hdr->daddr, first_hop);
222 if ((skb->len <= mtu) || ipfragok) {
223 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
224 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
229 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
231 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
232 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
238 * To avoid extra problems ND packets are send through this
239 * routine. It's code duplication but I really want to avoid
240 * extra checks since ipv6_build_header is used by TCP (which
241 * is for us performance critical)
244 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
245 struct in6_addr *saddr, struct in6_addr *daddr,
248 struct ipv6_pinfo *np = inet6_sk(sk);
252 skb->protocol = htons(ETH_P_IPV6);
255 totlen = len + sizeof(struct ipv6hdr);
257 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
260 *(u32*)hdr = htonl(0x60000000);
262 hdr->payload_len = htons(len);
263 hdr->nexthdr = proto;
264 hdr->hop_limit = np->hop_limit;
266 ipv6_addr_copy(&hdr->saddr, saddr);
267 ipv6_addr_copy(&hdr->daddr, daddr);
272 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
274 struct ip6_ra_chain *ra;
275 struct sock *last = NULL;
277 read_lock(&ip6_ra_lock);
278 for (ra = ip6_ra_chain; ra; ra = ra->next) {
279 struct sock *sk = ra->sk;
280 if (sk && ra->sel == sel &&
281 (!sk->sk_bound_dev_if ||
282 sk->sk_bound_dev_if == skb->dev->ifindex)) {
284 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
286 rawv6_rcv(last, skb2);
293 rawv6_rcv(last, skb);
294 read_unlock(&ip6_ra_lock);
297 read_unlock(&ip6_ra_lock);
301 static inline int ip6_forward_finish(struct sk_buff *skb)
303 return dst_output(skb);
306 int ip6_forward(struct sk_buff *skb)
308 struct dst_entry *dst = skb->dst;
309 struct ipv6hdr *hdr = skb->nh.ipv6h;
310 struct inet6_skb_parm *opt = IP6CB(skb);
312 if (ipv6_devconf.forwarding == 0)
315 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
316 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
320 skb->ip_summed = CHECKSUM_NONE;
323 * We DO NOT make any processing on
324 * RA packets, pushing them to user level AS IS
325 * without ane WARRANTY that application will be able
326 * to interpret them. The reason is that we
327 * cannot make anything clever here.
329 * We are not end-node, so that if packet contains
330 * AH/ESP, we cannot make anything.
331 * Defragmentation also would be mistake, RA packets
332 * cannot be fragmented, because there is no warranty
333 * that different fragments will go along one path. --ANK
336 u8 *ptr = skb->nh.raw + opt->ra;
337 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
342 * check and decrement ttl
344 if (hdr->hop_limit <= 1) {
345 /* Force OUTPUT device used as source address */
347 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
354 if (!xfrm6_route_forward(skb)) {
355 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
360 /* IPv6 specs say nothing about it, but it is clear that we cannot
361 send redirects to source routed frames.
363 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
364 struct in6_addr *target = NULL;
366 struct neighbour *n = dst->neighbour;
369 * incoming and outgoing devices are the same
373 rt = (struct rt6_info *) dst;
374 if ((rt->rt6i_flags & RTF_GATEWAY))
375 target = (struct in6_addr*)&n->primary_key;
377 target = &hdr->daddr;
379 /* Limit redirects both by destination (here)
380 and by source (inside ndisc_send_redirect)
382 if (xrlim_allow(dst, 1*HZ))
383 ndisc_send_redirect(skb, n, target);
384 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
385 |IPV6_ADDR_LINKLOCAL)) {
386 /* This check is security critical. */
390 if (skb->len > dst_mtu(dst)) {
391 /* Again, force OUTPUT device used as source address */
393 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
394 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
395 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
400 if (skb_cow(skb, dst->dev->hard_header_len)) {
401 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
407 /* Mangling hops number delayed to point after skb COW */
411 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
412 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
415 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
421 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
423 to->pkt_type = from->pkt_type;
424 to->priority = from->priority;
425 to->protocol = from->protocol;
426 dst_release(to->dst);
427 to->dst = dst_clone(from->dst);
430 #ifdef CONFIG_NET_SCHED
431 to->tc_index = from->tc_index;
433 #ifdef CONFIG_NETFILTER
434 to->nfmark = from->nfmark;
435 /* Connection association is same as pre-frag packet */
436 to->nfct = from->nfct;
437 nf_conntrack_get(to->nfct);
438 to->nfctinfo = from->nfctinfo;
439 #ifdef CONFIG_BRIDGE_NETFILTER
440 nf_bridge_put(to->nf_bridge);
441 to->nf_bridge = from->nf_bridge;
442 nf_bridge_get(to->nf_bridge);
447 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
449 u16 offset = sizeof(struct ipv6hdr);
450 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
451 unsigned int packet_len = skb->tail - skb->nh.raw;
453 *nexthdr = &skb->nh.ipv6h->nexthdr;
455 while (offset + 1 <= packet_len) {
460 case NEXTHDR_ROUTING:
462 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
463 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
464 offset += ipv6_optlen(exthdr);
465 *nexthdr = &exthdr->nexthdr;
466 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
476 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
478 struct net_device *dev;
479 struct sk_buff *frag;
480 struct rt6_info *rt = (struct rt6_info*)skb->dst;
481 struct ipv6hdr *tmp_hdr;
483 unsigned int mtu, hlen, left, len;
485 int ptr, offset = 0, err=0;
486 u8 *prevhdr, nexthdr = 0;
489 hlen = ip6_find_1stfragopt(skb, &prevhdr);
492 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
494 if (skb_shinfo(skb)->frag_list) {
495 int first_len = skb_pagelen(skb);
497 if (first_len - hlen > mtu ||
498 ((first_len - hlen) & 7) ||
502 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
503 /* Correct geometry. */
504 if (frag->len > mtu ||
505 ((frag->len & 7) && frag->next) ||
506 skb_headroom(frag) < hlen)
509 /* Partially cloned skb? */
510 if (skb_shared(frag))
517 frag->destructor = sock_wfree;
518 skb->truesize -= frag->truesize;
524 frag = skb_shinfo(skb)->frag_list;
525 skb_shinfo(skb)->frag_list = NULL;
528 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
530 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
534 *prevhdr = NEXTHDR_FRAGMENT;
535 memcpy(tmp_hdr, skb->nh.raw, hlen);
536 __skb_pull(skb, hlen);
537 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
538 skb->nh.raw = __skb_push(skb, hlen);
539 memcpy(skb->nh.raw, tmp_hdr, hlen);
541 ipv6_select_ident(skb, fh);
542 fh->nexthdr = nexthdr;
544 fh->frag_off = htons(IP6_MF);
545 frag_id = fh->identification;
547 first_len = skb_pagelen(skb);
548 skb->data_len = first_len - skb_headlen(skb);
549 skb->len = first_len;
550 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
554 /* Prepare header of the next frame,
555 * before previous one went down. */
557 frag->ip_summed = CHECKSUM_NONE;
558 frag->h.raw = frag->data;
559 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
560 frag->nh.raw = __skb_push(frag, hlen);
561 memcpy(frag->nh.raw, tmp_hdr, hlen);
562 offset += skb->len - hlen - sizeof(struct frag_hdr);
563 fh->nexthdr = nexthdr;
565 fh->frag_off = htons(offset);
566 if (frag->next != NULL)
567 fh->frag_off |= htons(IP6_MF);
568 fh->identification = frag_id;
569 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
570 ip6_copy_metadata(frag, skb);
586 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
596 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
601 left = skb->len - hlen; /* Space per frame */
602 ptr = hlen; /* Where to start from */
605 * Fragment the datagram.
608 *prevhdr = NEXTHDR_FRAGMENT;
611 * Keep copying data until we run out.
615 /* IF: it doesn't fit, use 'mtu' - the data space left */
618 /* IF: we are not sending upto and including the packet end
619 then align the next start on an eight byte boundary */
627 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
628 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
629 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
635 * Set up data on packet
638 ip6_copy_metadata(frag, skb);
639 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
640 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
641 frag->nh.raw = frag->data;
642 fh = (struct frag_hdr*)(frag->data + hlen);
643 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
646 * Charge the memory for the fragment to any owner
650 skb_set_owner_w(frag, skb->sk);
653 * Copy the packet header into the new buffer.
655 memcpy(frag->nh.raw, skb->data, hlen);
658 * Build fragment header.
660 fh->nexthdr = nexthdr;
663 ipv6_select_ident(skb, fh);
664 frag_id = fh->identification;
666 fh->identification = frag_id;
669 * Copy a block of the IP datagram.
671 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
675 fh->frag_off = htons(offset);
677 fh->frag_off |= htons(IP6_MF);
678 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
684 * Put this fragment into the sending queue.
687 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
694 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
699 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
703 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
709 struct ipv6_pinfo *np = inet6_sk(sk);
711 *dst = sk_dst_check(sk, np->dst_cookie);
713 struct rt6_info *rt = (struct rt6_info*)*dst;
715 /* Yes, checking route validity in not connected
716 case is not very simple. Take into account,
717 that we do not support routing by source, TOS,
718 and MSG_DONTROUTE --ANK (980726)
720 1. If route was host route, check that
721 cached destination is current.
722 If it is network route, we still may
723 check its validity using saved pointer
724 to the last used address: daddr_cache.
725 We do not want to save whole address now,
726 (because main consumer of this service
727 is tcp, which has not this problem),
728 so that the last trick works only on connected
730 2. oif also should be the same.
733 if (((rt->rt6i_dst.plen != 128 ||
734 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
735 && (np->daddr_cache == NULL ||
736 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
737 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
745 *dst = ip6_route_output(sk, fl);
747 if ((err = (*dst)->error))
748 goto out_err_release;
750 if (ipv6_addr_any(&fl->fl6_src)) {
751 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
754 goto out_err_release;
765 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
766 void *from, int length, int transhdrlen,
767 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
770 struct inet_sock *inet = inet_sk(sk);
771 struct ipv6_pinfo *np = inet6_sk(sk);
773 unsigned int maxfraglen, fragheaderlen;
780 int csummode = CHECKSUM_NONE;
784 if (skb_queue_empty(&sk->sk_write_queue)) {
789 if (np->cork.opt == NULL) {
790 np->cork.opt = kmalloc(opt->tot_len,
792 if (unlikely(np->cork.opt == NULL))
794 } else if (np->cork.opt->tot_len < opt->tot_len) {
795 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
798 memcpy(np->cork.opt, opt, opt->tot_len);
799 inet->cork.flags |= IPCORK_OPT;
800 /* need source address above miyazawa*/
802 dst_hold(&rt->u.dst);
805 np->cork.hop_limit = hlimit;
806 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
807 if (dst_allfrag(rt->u.dst.path))
808 inet->cork.flags |= IPCORK_ALLFRAG;
809 inet->cork.length = 0;
810 sk->sk_sndmsg_page = NULL;
811 sk->sk_sndmsg_off = 0;
812 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
814 transhdrlen += exthdrlen;
818 if (inet->cork.flags & IPCORK_OPT)
822 mtu = inet->cork.fragsize;
825 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
827 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
828 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
830 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
831 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
832 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
838 * Let's try using as much space as possible.
839 * Use MTU if total length of the message fits into the MTU.
840 * Otherwise, we need to reserve fragment header and
841 * fragment alignment (= 8-15 octects, in total).
843 * Note that we may need to "move" the data from the tail of
844 * of the buffer to the new fragment when we split
847 * FIXME: It may be fragmented into multiple chunks
848 * at once if non-fragmentable extension headers
853 inet->cork.length += length;
855 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
859 /* Check if the remaining data fits into current packet. */
860 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
862 copy = maxfraglen - skb->len;
866 unsigned int datalen;
867 unsigned int fraglen;
868 unsigned int fraggap;
869 unsigned int alloclen;
870 struct sk_buff *skb_prev;
874 /* There's no room in the current skb */
876 fraggap = skb_prev->len - maxfraglen;
881 * If remaining data exceeds the mtu,
882 * we know we need more fragment(s).
884 datalen = length + fraggap;
885 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
886 datalen = maxfraglen - fragheaderlen;
888 fraglen = datalen + fragheaderlen;
889 if ((flags & MSG_MORE) &&
890 !(rt->u.dst.dev->features&NETIF_F_SG))
893 alloclen = datalen + fragheaderlen;
896 * The last fragment gets additional space at tail.
897 * Note: we overallocate on fragments with MSG_MODE
898 * because we have no idea if we're the last one.
900 if (datalen == length + fraggap)
901 alloclen += rt->u.dst.trailer_len;
904 * We just reserve space for fragment header.
905 * Note: this may be overallocation if the message
906 * (without MSG_MORE) fits into the MTU.
908 alloclen += sizeof(struct frag_hdr);
911 skb = sock_alloc_send_skb(sk,
913 (flags & MSG_DONTWAIT), &err);
916 if (atomic_read(&sk->sk_wmem_alloc) <=
918 skb = sock_wmalloc(sk,
919 alloclen + hh_len, 1,
921 if (unlikely(skb == NULL))
927 * Fill in the control structures
929 skb->ip_summed = csummode;
931 /* reserve for fragmentation */
932 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
935 * Find where to start putting bytes
937 data = skb_put(skb, fraglen);
938 skb->nh.raw = data + exthdrlen;
939 data += fragheaderlen;
940 skb->h.raw = data + exthdrlen;
943 skb->csum = skb_copy_and_csum_bits(
944 skb_prev, maxfraglen,
945 data + transhdrlen, fraggap, 0);
946 skb_prev->csum = csum_sub(skb_prev->csum,
949 skb_trim(skb_prev, maxfraglen);
951 copy = datalen - transhdrlen - fraggap;
956 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
963 length -= datalen - fraggap;
966 csummode = CHECKSUM_NONE;
969 * Put the packet on the pending queue
971 __skb_queue_tail(&sk->sk_write_queue, skb);
978 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
982 if (getfrag(from, skb_put(skb, copy),
983 offset, copy, off, skb) < 0) {
984 __skb_trim(skb, off);
989 int i = skb_shinfo(skb)->nr_frags;
990 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
991 struct page *page = sk->sk_sndmsg_page;
992 int off = sk->sk_sndmsg_off;
995 if (page && (left = PAGE_SIZE - off) > 0) {
998 if (page != frag->page) {
999 if (i == MAX_SKB_FRAGS) {
1004 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1005 frag = &skb_shinfo(skb)->frags[i];
1007 } else if(i < MAX_SKB_FRAGS) {
1008 if (copy > PAGE_SIZE)
1010 page = alloc_pages(sk->sk_allocation, 0);
1015 sk->sk_sndmsg_page = page;
1016 sk->sk_sndmsg_off = 0;
1018 skb_fill_page_desc(skb, i, page, 0, 0);
1019 frag = &skb_shinfo(skb)->frags[i];
1020 skb->truesize += PAGE_SIZE;
1021 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1026 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1030 sk->sk_sndmsg_off += copy;
1033 skb->data_len += copy;
1040 inet->cork.length -= length;
1041 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1045 int ip6_push_pending_frames(struct sock *sk)
1047 struct sk_buff *skb, *tmp_skb;
1048 struct sk_buff **tail_skb;
1049 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1050 struct inet_sock *inet = inet_sk(sk);
1051 struct ipv6_pinfo *np = inet6_sk(sk);
1052 struct ipv6hdr *hdr;
1053 struct ipv6_txoptions *opt = np->cork.opt;
1054 struct rt6_info *rt = np->cork.rt;
1055 struct flowi *fl = &inet->cork.fl;
1056 unsigned char proto = fl->proto;
1059 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1061 tail_skb = &(skb_shinfo(skb)->frag_list);
1063 /* move skb->data to ip header from ext header */
1064 if (skb->data < skb->nh.raw)
1065 __skb_pull(skb, skb->nh.raw - skb->data);
1066 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1067 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1068 *tail_skb = tmp_skb;
1069 tail_skb = &(tmp_skb->next);
1070 skb->len += tmp_skb->len;
1071 skb->data_len += tmp_skb->len;
1072 skb->truesize += tmp_skb->truesize;
1073 __sock_put(tmp_skb->sk);
1074 tmp_skb->destructor = NULL;
1078 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1079 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1080 if (opt && opt->opt_flen)
1081 ipv6_push_frag_opts(skb, opt, &proto);
1082 if (opt && opt->opt_nflen)
1083 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1085 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1087 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1089 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1090 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1092 hdr->payload_len = 0;
1093 hdr->hop_limit = np->cork.hop_limit;
1094 hdr->nexthdr = proto;
1095 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1096 ipv6_addr_copy(&hdr->daddr, final_dst);
1098 skb->dst = dst_clone(&rt->u.dst);
1099 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1100 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1103 err = np->recverr ? net_xmit_errno(err) : 0;
1109 inet->cork.flags &= ~IPCORK_OPT;
1111 kfree(np->cork.opt);
1112 np->cork.opt = NULL;
1115 dst_release(&np->cork.rt->u.dst);
1117 inet->cork.flags &= ~IPCORK_ALLFRAG;
1119 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1125 void ip6_flush_pending_frames(struct sock *sk)
1127 struct inet_sock *inet = inet_sk(sk);
1128 struct ipv6_pinfo *np = inet6_sk(sk);
1129 struct sk_buff *skb;
1131 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1132 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1136 inet->cork.flags &= ~IPCORK_OPT;
1139 kfree(np->cork.opt);
1140 np->cork.opt = NULL;
1143 dst_release(&np->cork.rt->u.dst);
1145 inet->cork.flags &= ~IPCORK_ALLFRAG;
1147 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));