]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv6/ip6_output.c
net: TX timestamps for IPv6 UDP packets
[karo-tx-linux.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63         int len;
64
65         len = skb->len - sizeof(struct ipv6hdr);
66         if (len > IPV6_MAXPLEN)
67                 len = 0;
68         ipv6_hdr(skb)->payload_len = htons(len);
69
70         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71                        skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76         int err;
77
78         err = __ip6_local_out(skb);
79         if (likely(err == 1))
80                 err = dst_output(skb);
81
82         return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89         skb_reset_mac_header(newskb);
90         __skb_pull(newskb, skb_network_offset(newskb));
91         newskb->pkt_type = PACKET_LOOPBACK;
92         newskb->ip_summed = CHECKSUM_UNNECESSARY;
93         WARN_ON(!skb_dst(newskb));
94
95         netif_rx_ni(newskb);
96         return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102         struct net_device *dev = dst->dev;
103
104         skb->protocol = htons(ETH_P_IPV6);
105         skb->dev = dev;
106
107         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109
110                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111                     ((mroute6_socket(dev_net(dev), skb) &&
112                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114                                          &ipv6_hdr(skb)->saddr))) {
115                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117                         /* Do not check for IFF_ALLMULTI; multicast routing
118                            is not supported in any case.
119                          */
120                         if (newskb)
121                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122                                         newskb, NULL, newskb->dev,
123                                         ip6_dev_loopback_xmit);
124
125                         if (ipv6_hdr(skb)->hop_limit == 0) {
126                                 IP6_INC_STATS(dev_net(dev), idev,
127                                               IPSTATS_MIB_OUTDISCARDS);
128                                 kfree_skb(skb);
129                                 return 0;
130                         }
131                 }
132
133                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134                                 skb->len);
135         }
136
137         if (dst->hh)
138                 return neigh_hh_output(dst->hh, skb);
139         else if (dst->neighbour)
140                 return dst->neighbour->output(skb);
141
142         IP6_INC_STATS_BH(dev_net(dst->dev),
143                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144         kfree_skb(skb);
145         return -EINVAL;
146 }
147
148 static int ip6_finish_output(struct sk_buff *skb)
149 {
150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151             dst_allfrag(skb_dst(skb)))
152                 return ip6_fragment(skb, ip6_finish_output2);
153         else
154                 return ip6_finish_output2(skb);
155 }
156
157 int ip6_output(struct sk_buff *skb)
158 {
159         struct net_device *dev = skb_dst(skb)->dev;
160         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161         if (unlikely(idev->cnf.disable_ipv6)) {
162                 IP6_INC_STATS(dev_net(dev), idev,
163                               IPSTATS_MIB_OUTDISCARDS);
164                 kfree_skb(skb);
165                 return 0;
166         }
167
168         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169                             ip6_finish_output,
170                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
171 }
172
173 /*
174  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
175  */
176
177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
178              struct ipv6_txoptions *opt)
179 {
180         struct net *net = sock_net(sk);
181         struct ipv6_pinfo *np = inet6_sk(sk);
182         struct in6_addr *first_hop = &fl->fl6_dst;
183         struct dst_entry *dst = skb_dst(skb);
184         struct ipv6hdr *hdr;
185         u8  proto = fl->proto;
186         int seg_len = skb->len;
187         int hlimit = -1;
188         int tclass = 0;
189         u32 mtu;
190
191         if (opt) {
192                 unsigned int head_room;
193
194                 /* First: exthdrs may take lots of space (~8K for now)
195                    MAX_HEADER is not enough.
196                  */
197                 head_room = opt->opt_nflen + opt->opt_flen;
198                 seg_len += head_room;
199                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201                 if (skb_headroom(skb) < head_room) {
202                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203                         if (skb2 == NULL) {
204                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205                                               IPSTATS_MIB_OUTDISCARDS);
206                                 kfree_skb(skb);
207                                 return -ENOBUFS;
208                         }
209                         kfree_skb(skb);
210                         skb = skb2;
211                         skb_set_owner_w(skb, sk);
212                 }
213                 if (opt->opt_flen)
214                         ipv6_push_frag_opts(skb, opt, &proto);
215                 if (opt->opt_nflen)
216                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217         }
218
219         skb_push(skb, sizeof(struct ipv6hdr));
220         skb_reset_network_header(skb);
221         hdr = ipv6_hdr(skb);
222
223         /*
224          *      Fill in the IPv6 header
225          */
226         if (np) {
227                 tclass = np->tclass;
228                 hlimit = np->hop_limit;
229         }
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232
233         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
234
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238
239         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
240         ipv6_addr_copy(&hdr->daddr, first_hop);
241
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         if (net_ratelimit())
254                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255         skb->dev = dst->dev;
256         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258         kfree_skb(skb);
259         return -EMSGSIZE;
260 }
261
262 EXPORT_SYMBOL(ip6_xmit);
263
264 /*
265  *      To avoid extra problems ND packets are send through this
266  *      routine. It's code duplication but I really want to avoid
267  *      extra checks since ipv6_build_header is used by TCP (which
268  *      is for us performance critical)
269  */
270
271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272                const struct in6_addr *saddr, const struct in6_addr *daddr,
273                int proto, int len)
274 {
275         struct ipv6_pinfo *np = inet6_sk(sk);
276         struct ipv6hdr *hdr;
277
278         skb->protocol = htons(ETH_P_IPV6);
279         skb->dev = dev;
280
281         skb_reset_network_header(skb);
282         skb_put(skb, sizeof(struct ipv6hdr));
283         hdr = ipv6_hdr(skb);
284
285         *(__be32*)hdr = htonl(0x60000000);
286
287         hdr->payload_len = htons(len);
288         hdr->nexthdr = proto;
289         hdr->hop_limit = np->hop_limit;
290
291         ipv6_addr_copy(&hdr->saddr, saddr);
292         ipv6_addr_copy(&hdr->daddr, daddr);
293
294         return 0;
295 }
296
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299         struct ip6_ra_chain *ra;
300         struct sock *last = NULL;
301
302         read_lock(&ip6_ra_lock);
303         for (ra = ip6_ra_chain; ra; ra = ra->next) {
304                 struct sock *sk = ra->sk;
305                 if (sk && ra->sel == sel &&
306                     (!sk->sk_bound_dev_if ||
307                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
308                         if (last) {
309                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310                                 if (skb2)
311                                         rawv6_rcv(last, skb2);
312                         }
313                         last = sk;
314                 }
315         }
316
317         if (last) {
318                 rawv6_rcv(last, skb);
319                 read_unlock(&ip6_ra_lock);
320                 return 1;
321         }
322         read_unlock(&ip6_ra_lock);
323         return 0;
324 }
325
326 static int ip6_forward_proxy_check(struct sk_buff *skb)
327 {
328         struct ipv6hdr *hdr = ipv6_hdr(skb);
329         u8 nexthdr = hdr->nexthdr;
330         int offset;
331
332         if (ipv6_ext_hdr(nexthdr)) {
333                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334                 if (offset < 0)
335                         return 0;
336         } else
337                 offset = sizeof(struct ipv6hdr);
338
339         if (nexthdr == IPPROTO_ICMPV6) {
340                 struct icmp6hdr *icmp6;
341
342                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
343                                          offset + 1 - skb->data)))
344                         return 0;
345
346                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
347
348                 switch (icmp6->icmp6_type) {
349                 case NDISC_ROUTER_SOLICITATION:
350                 case NDISC_ROUTER_ADVERTISEMENT:
351                 case NDISC_NEIGHBOUR_SOLICITATION:
352                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
353                 case NDISC_REDIRECT:
354                         /* For reaction involving unicast neighbor discovery
355                          * message destined to the proxied address, pass it to
356                          * input function.
357                          */
358                         return 1;
359                 default:
360                         break;
361                 }
362         }
363
364         /*
365          * The proxying router can't forward traffic sent to a link-local
366          * address, so signal the sender and discard the packet. This
367          * behavior is clarified by the MIPv6 specification.
368          */
369         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370                 dst_link_failure(skb);
371                 return -1;
372         }
373
374         return 0;
375 }
376
377 static inline int ip6_forward_finish(struct sk_buff *skb)
378 {
379         return dst_output(skb);
380 }
381
382 int ip6_forward(struct sk_buff *skb)
383 {
384         struct dst_entry *dst = skb_dst(skb);
385         struct ipv6hdr *hdr = ipv6_hdr(skb);
386         struct inet6_skb_parm *opt = IP6CB(skb);
387         struct net *net = dev_net(dst->dev);
388         u32 mtu;
389
390         if (net->ipv6.devconf_all->forwarding == 0)
391                 goto error;
392
393         if (skb_warn_if_lro(skb))
394                 goto drop;
395
396         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
397                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
398                 goto drop;
399         }
400
401         if (skb->pkt_type != PACKET_HOST)
402                 goto drop;
403
404         skb_forward_csum(skb);
405
406         /*
407          *      We DO NOT make any processing on
408          *      RA packets, pushing them to user level AS IS
409          *      without ane WARRANTY that application will be able
410          *      to interpret them. The reason is that we
411          *      cannot make anything clever here.
412          *
413          *      We are not end-node, so that if packet contains
414          *      AH/ESP, we cannot make anything.
415          *      Defragmentation also would be mistake, RA packets
416          *      cannot be fragmented, because there is no warranty
417          *      that different fragments will go along one path. --ANK
418          */
419         if (opt->ra) {
420                 u8 *ptr = skb_network_header(skb) + opt->ra;
421                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
422                         return 0;
423         }
424
425         /*
426          *      check and decrement ttl
427          */
428         if (hdr->hop_limit <= 1) {
429                 /* Force OUTPUT device used as source address */
430                 skb->dev = dst->dev;
431                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
432                 IP6_INC_STATS_BH(net,
433                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
434
435                 kfree_skb(skb);
436                 return -ETIMEDOUT;
437         }
438
439         /* XXX: idev->cnf.proxy_ndp? */
440         if (net->ipv6.devconf_all->proxy_ndp &&
441             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
442                 int proxied = ip6_forward_proxy_check(skb);
443                 if (proxied > 0)
444                         return ip6_input(skb);
445                 else if (proxied < 0) {
446                         IP6_INC_STATS(net, ip6_dst_idev(dst),
447                                       IPSTATS_MIB_INDISCARDS);
448                         goto drop;
449                 }
450         }
451
452         if (!xfrm6_route_forward(skb)) {
453                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
454                 goto drop;
455         }
456         dst = skb_dst(skb);
457
458         /* IPv6 specs say nothing about it, but it is clear that we cannot
459            send redirects to source routed frames.
460            We don't send redirects to frames decapsulated from IPsec.
461          */
462         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
463             !skb_sec_path(skb)) {
464                 struct in6_addr *target = NULL;
465                 struct rt6_info *rt;
466                 struct neighbour *n = dst->neighbour;
467
468                 /*
469                  *      incoming and outgoing devices are the same
470                  *      send a redirect.
471                  */
472
473                 rt = (struct rt6_info *) dst;
474                 if ((rt->rt6i_flags & RTF_GATEWAY))
475                         target = (struct in6_addr*)&n->primary_key;
476                 else
477                         target = &hdr->daddr;
478
479                 if (!rt->rt6i_peer)
480                         rt6_bind_peer(rt, 1);
481
482                 /* Limit redirects both by destination (here)
483                    and by source (inside ndisc_send_redirect)
484                  */
485                 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486                         ndisc_send_redirect(skb, n, target);
487         } else {
488                 int addrtype = ipv6_addr_type(&hdr->saddr);
489
490                 /* This check is security critical. */
491                 if (addrtype == IPV6_ADDR_ANY ||
492                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493                         goto error;
494                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496                                     ICMPV6_NOT_NEIGHBOUR, 0);
497                         goto error;
498                 }
499         }
500
501         mtu = dst_mtu(dst);
502         if (mtu < IPV6_MIN_MTU)
503                 mtu = IPV6_MIN_MTU;
504
505         if (skb->len > mtu && !skb_is_gso(skb)) {
506                 /* Again, force OUTPUT device used as source address */
507                 skb->dev = dst->dev;
508                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509                 IP6_INC_STATS_BH(net,
510                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511                 IP6_INC_STATS_BH(net,
512                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513                 kfree_skb(skb);
514                 return -EMSGSIZE;
515         }
516
517         if (skb_cow(skb, dst->dev->hard_header_len)) {
518                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519                 goto drop;
520         }
521
522         hdr = ipv6_hdr(skb);
523
524         /* Mangling hops number delayed to point after skb COW */
525
526         hdr->hop_limit--;
527
528         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530                        ip6_forward_finish);
531
532 error:
533         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535         kfree_skb(skb);
536         return -EINVAL;
537 }
538
539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 {
541         to->pkt_type = from->pkt_type;
542         to->priority = from->priority;
543         to->protocol = from->protocol;
544         skb_dst_drop(to);
545         skb_dst_set(to, dst_clone(skb_dst(from)));
546         to->dev = from->dev;
547         to->mark = from->mark;
548
549 #ifdef CONFIG_NET_SCHED
550         to->tc_index = from->tc_index;
551 #endif
552         nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555         to->nf_trace = from->nf_trace;
556 #endif
557         skb_copy_secmark(to, from);
558 }
559
560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561 {
562         u16 offset = sizeof(struct ipv6hdr);
563         struct ipv6_opt_hdr *exthdr =
564                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565         unsigned int packet_len = skb->tail - skb->network_header;
566         int found_rhdr = 0;
567         *nexthdr = &ipv6_hdr(skb)->nexthdr;
568
569         while (offset + 1 <= packet_len) {
570
571                 switch (**nexthdr) {
572
573                 case NEXTHDR_HOP:
574                         break;
575                 case NEXTHDR_ROUTING:
576                         found_rhdr = 1;
577                         break;
578                 case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581                                 break;
582 #endif
583                         if (found_rhdr)
584                                 return offset;
585                         break;
586                 default :
587                         return offset;
588                 }
589
590                 offset += ipv6_optlen(exthdr);
591                 *nexthdr = &exthdr->nexthdr;
592                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593                                                  offset);
594         }
595
596         return offset;
597 }
598
599 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
600 {
601         struct sk_buff *frag;
602         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
604         struct ipv6hdr *tmp_hdr;
605         struct frag_hdr *fh;
606         unsigned int mtu, hlen, left, len;
607         __be32 frag_id = 0;
608         int ptr, offset = 0, err=0;
609         u8 *prevhdr, nexthdr = 0;
610         struct net *net = dev_net(skb_dst(skb)->dev);
611
612         hlen = ip6_find_1stfragopt(skb, &prevhdr);
613         nexthdr = *prevhdr;
614
615         mtu = ip6_skb_dst_mtu(skb);
616
617         /* We must not fragment if the socket is set to force MTU discovery
618          * or if the skb it not generated by a local socket.
619          */
620         if (!skb->local_df && skb->len > mtu) {
621                 skb->dev = skb_dst(skb)->dev;
622                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
623                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624                               IPSTATS_MIB_FRAGFAILS);
625                 kfree_skb(skb);
626                 return -EMSGSIZE;
627         }
628
629         if (np && np->frag_size < mtu) {
630                 if (np->frag_size)
631                         mtu = np->frag_size;
632         }
633         mtu -= hlen + sizeof(struct frag_hdr);
634
635         if (skb_has_frag_list(skb)) {
636                 int first_len = skb_pagelen(skb);
637                 struct sk_buff *frag2;
638
639                 if (first_len - hlen > mtu ||
640                     ((first_len - hlen) & 7) ||
641                     skb_cloned(skb))
642                         goto slow_path;
643
644                 skb_walk_frags(skb, frag) {
645                         /* Correct geometry. */
646                         if (frag->len > mtu ||
647                             ((frag->len & 7) && frag->next) ||
648                             skb_headroom(frag) < hlen)
649                                 goto slow_path_clean;
650
651                         /* Partially cloned skb? */
652                         if (skb_shared(frag))
653                                 goto slow_path_clean;
654
655                         BUG_ON(frag->sk);
656                         if (skb->sk) {
657                                 frag->sk = skb->sk;
658                                 frag->destructor = sock_wfree;
659                         }
660                         skb->truesize -= frag->truesize;
661                 }
662
663                 err = 0;
664                 offset = 0;
665                 frag = skb_shinfo(skb)->frag_list;
666                 skb_frag_list_init(skb);
667                 /* BUILD HEADER */
668
669                 *prevhdr = NEXTHDR_FRAGMENT;
670                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
671                 if (!tmp_hdr) {
672                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673                                       IPSTATS_MIB_FRAGFAILS);
674                         return -ENOMEM;
675                 }
676
677                 __skb_pull(skb, hlen);
678                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679                 __skb_push(skb, hlen);
680                 skb_reset_network_header(skb);
681                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
682
683                 ipv6_select_ident(fh);
684                 fh->nexthdr = nexthdr;
685                 fh->reserved = 0;
686                 fh->frag_off = htons(IP6_MF);
687                 frag_id = fh->identification;
688
689                 first_len = skb_pagelen(skb);
690                 skb->data_len = first_len - skb_headlen(skb);
691                 skb->len = first_len;
692                 ipv6_hdr(skb)->payload_len = htons(first_len -
693                                                    sizeof(struct ipv6hdr));
694
695                 dst_hold(&rt->dst);
696
697                 for (;;) {
698                         /* Prepare header of the next frame,
699                          * before previous one went down. */
700                         if (frag) {
701                                 frag->ip_summed = CHECKSUM_NONE;
702                                 skb_reset_transport_header(frag);
703                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704                                 __skb_push(frag, hlen);
705                                 skb_reset_network_header(frag);
706                                 memcpy(skb_network_header(frag), tmp_hdr,
707                                        hlen);
708                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
709                                 fh->nexthdr = nexthdr;
710                                 fh->reserved = 0;
711                                 fh->frag_off = htons(offset);
712                                 if (frag->next != NULL)
713                                         fh->frag_off |= htons(IP6_MF);
714                                 fh->identification = frag_id;
715                                 ipv6_hdr(frag)->payload_len =
716                                                 htons(frag->len -
717                                                       sizeof(struct ipv6hdr));
718                                 ip6_copy_metadata(frag, skb);
719                         }
720
721                         err = output(skb);
722                         if(!err)
723                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724                                               IPSTATS_MIB_FRAGCREATES);
725
726                         if (err || !frag)
727                                 break;
728
729                         skb = frag;
730                         frag = skb->next;
731                         skb->next = NULL;
732                 }
733
734                 kfree(tmp_hdr);
735
736                 if (err == 0) {
737                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738                                       IPSTATS_MIB_FRAGOKS);
739                         dst_release(&rt->dst);
740                         return 0;
741                 }
742
743                 while (frag) {
744                         skb = frag->next;
745                         kfree_skb(frag);
746                         frag = skb;
747                 }
748
749                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750                               IPSTATS_MIB_FRAGFAILS);
751                 dst_release(&rt->dst);
752                 return err;
753
754 slow_path_clean:
755                 skb_walk_frags(skb, frag2) {
756                         if (frag2 == frag)
757                                 break;
758                         frag2->sk = NULL;
759                         frag2->destructor = NULL;
760                         skb->truesize += frag2->truesize;
761                 }
762         }
763
764 slow_path:
765         left = skb->len - hlen;         /* Space per frame */
766         ptr = hlen;                     /* Where to start from */
767
768         /*
769          *      Fragment the datagram.
770          */
771
772         *prevhdr = NEXTHDR_FRAGMENT;
773
774         /*
775          *      Keep copying data until we run out.
776          */
777         while(left > 0) {
778                 len = left;
779                 /* IF: it doesn't fit, use 'mtu' - the data space left */
780                 if (len > mtu)
781                         len = mtu;
782                 /* IF: we are not sending upto and including the packet end
783                    then align the next start on an eight byte boundary */
784                 if (len < left) {
785                         len &= ~7;
786                 }
787                 /*
788                  *      Allocate buffer.
789                  */
790
791                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794                                       IPSTATS_MIB_FRAGFAILS);
795                         err = -ENOMEM;
796                         goto fail;
797                 }
798
799                 /*
800                  *      Set up data on packet
801                  */
802
803                 ip6_copy_metadata(frag, skb);
804                 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
805                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806                 skb_reset_network_header(frag);
807                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808                 frag->transport_header = (frag->network_header + hlen +
809                                           sizeof(struct frag_hdr));
810
811                 /*
812                  *      Charge the memory for the fragment to any owner
813                  *      it might possess
814                  */
815                 if (skb->sk)
816                         skb_set_owner_w(frag, skb->sk);
817
818                 /*
819                  *      Copy the packet header into the new buffer.
820                  */
821                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822
823                 /*
824                  *      Build fragment header.
825                  */
826                 fh->nexthdr = nexthdr;
827                 fh->reserved = 0;
828                 if (!frag_id) {
829                         ipv6_select_ident(fh);
830                         frag_id = fh->identification;
831                 } else
832                         fh->identification = frag_id;
833
834                 /*
835                  *      Copy a block of the IP datagram.
836                  */
837                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838                         BUG();
839                 left -= len;
840
841                 fh->frag_off = htons(offset);
842                 if (left > 0)
843                         fh->frag_off |= htons(IP6_MF);
844                 ipv6_hdr(frag)->payload_len = htons(frag->len -
845                                                     sizeof(struct ipv6hdr));
846
847                 ptr += len;
848                 offset += len;
849
850                 /*
851                  *      Put this fragment into the sending queue.
852                  */
853                 err = output(frag);
854                 if (err)
855                         goto fail;
856
857                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858                               IPSTATS_MIB_FRAGCREATES);
859         }
860         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861                       IPSTATS_MIB_FRAGOKS);
862         kfree_skb(skb);
863         return err;
864
865 fail:
866         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867                       IPSTATS_MIB_FRAGFAILS);
868         kfree_skb(skb);
869         return err;
870 }
871
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873                                struct in6_addr *fl_addr,
874                                struct in6_addr *addr_cache)
875 {
876         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
878 }
879
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881                                           struct dst_entry *dst,
882                                           struct flowi *fl)
883 {
884         struct ipv6_pinfo *np = inet6_sk(sk);
885         struct rt6_info *rt = (struct rt6_info *)dst;
886
887         if (!dst)
888                 goto out;
889
890         /* Yes, checking route validity in not connected
891          * case is not very simple. Take into account,
892          * that we do not support routing by source, TOS,
893          * and MSG_DONTROUTE            --ANK (980726)
894          *
895          * 1. ip6_rt_check(): If route was host route,
896          *    check that cached destination is current.
897          *    If it is network route, we still may
898          *    check its validity using saved pointer
899          *    to the last used address: daddr_cache.
900          *    We do not want to save whole address now,
901          *    (because main consumer of this service
902          *    is tcp, which has not this problem),
903          *    so that the last trick works only on connected
904          *    sockets.
905          * 2. oif also should be the same.
906          */
907         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910 #endif
911             (fl->oif && fl->oif != dst->dev->ifindex)) {
912                 dst_release(dst);
913                 dst = NULL;
914         }
915
916 out:
917         return dst;
918 }
919
920 static int ip6_dst_lookup_tail(struct sock *sk,
921                                struct dst_entry **dst, struct flowi *fl)
922 {
923         int err;
924         struct net *net = sock_net(sk);
925
926         if (*dst == NULL)
927                 *dst = ip6_route_output(net, sk, fl);
928
929         if ((err = (*dst)->error))
930                 goto out_err_release;
931
932         if (ipv6_addr_any(&fl->fl6_src)) {
933                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934                                          &fl->fl6_dst,
935                                          sk ? inet6_sk(sk)->srcprefs : 0,
936                                          &fl->fl6_src);
937                 if (err)
938                         goto out_err_release;
939         }
940
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942         /*
943          * Here if the dst entry we've looked up
944          * has a neighbour entry that is in the INCOMPLETE
945          * state and the src address from the flow is
946          * marked as OPTIMISTIC, we release the found
947          * dst entry and replace it instead with the
948          * dst entry of the nexthop router
949          */
950         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951                 struct inet6_ifaddr *ifp;
952                 struct flowi fl_gw;
953                 int redirect;
954
955                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956                                       (*dst)->dev, 1);
957
958                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959                 if (ifp)
960                         in6_ifa_put(ifp);
961
962                 if (redirect) {
963                         /*
964                          * We need to get the dst entry for the
965                          * default router instead
966                          */
967                         dst_release(*dst);
968                         memcpy(&fl_gw, fl, sizeof(struct flowi));
969                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970                         *dst = ip6_route_output(net, sk, &fl_gw);
971                         if ((err = (*dst)->error))
972                                 goto out_err_release;
973                 }
974         }
975 #endif
976
977         return 0;
978
979 out_err_release:
980         if (err == -ENETUNREACH)
981                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982         dst_release(*dst);
983         *dst = NULL;
984         return err;
985 }
986
987 /**
988  *      ip6_dst_lookup - perform route lookup on flow
989  *      @sk: socket which provides route info
990  *      @dst: pointer to dst_entry * for result
991  *      @fl: flow to lookup
992  *
993  *      This function performs a route lookup on the given flow.
994  *
995  *      It returns zero on success, or a standard errno code on error.
996  */
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998 {
999         *dst = NULL;
1000         return ip6_dst_lookup_tail(sk, dst, fl);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004 /**
1005  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006  *      @sk: socket which provides the dst cache and route info
1007  *      @dst: pointer to dst_entry * for result
1008  *      @fl: flow to lookup
1009  *
1010  *      This function performs a route lookup on the given flow with the
1011  *      possibility of using the cached route in the socket if it is valid.
1012  *      It will take the socket dst lock when operating on the dst cache.
1013  *      As a result, this function can only be used in process context.
1014  *
1015  *      It returns zero on success, or a standard errno code on error.
1016  */
1017 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018 {
1019         *dst = NULL;
1020         if (sk) {
1021                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1023         }
1024
1025         return ip6_dst_lookup_tail(sk, dst, fl);
1026 }
1027 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
1029 static inline int ip6_ufo_append_data(struct sock *sk,
1030                         int getfrag(void *from, char *to, int offset, int len,
1031                         int odd, struct sk_buff *skb),
1032                         void *from, int length, int hh_len, int fragheaderlen,
1033                         int transhdrlen, int mtu,unsigned int flags)
1034
1035 {
1036         struct sk_buff *skb;
1037         int err;
1038
1039         /* There is support for UDP large send offload by network
1040          * device, so create one single skb packet containing complete
1041          * udp datagram
1042          */
1043         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044                 skb = sock_alloc_send_skb(sk,
1045                         hh_len + fragheaderlen + transhdrlen + 20,
1046                         (flags & MSG_DONTWAIT), &err);
1047                 if (skb == NULL)
1048                         return -ENOMEM;
1049
1050                 /* reserve space for Hardware header */
1051                 skb_reserve(skb, hh_len);
1052
1053                 /* create space for UDP/IP header */
1054                 skb_put(skb,fragheaderlen + transhdrlen);
1055
1056                 /* initialize network header pointer */
1057                 skb_reset_network_header(skb);
1058
1059                 /* initialize protocol header pointer */
1060                 skb->transport_header = skb->network_header + fragheaderlen;
1061
1062                 skb->ip_summed = CHECKSUM_PARTIAL;
1063                 skb->csum = 0;
1064                 sk->sk_sndmsg_off = 0;
1065         }
1066
1067         err = skb_append_datato_frags(sk,skb, getfrag, from,
1068                                       (length - transhdrlen));
1069         if (!err) {
1070                 struct frag_hdr fhdr;
1071
1072                 /* Specify the length of each IPv6 datagram fragment.
1073                  * It has to be a multiple of 8.
1074                  */
1075                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1076                                              sizeof(struct frag_hdr)) & ~7;
1077                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1078                 ipv6_select_ident(&fhdr);
1079                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1080                 __skb_queue_tail(&sk->sk_write_queue, skb);
1081
1082                 return 0;
1083         }
1084         /* There is not enough support do UPD LSO,
1085          * so follow normal path
1086          */
1087         kfree_skb(skb);
1088
1089         return err;
1090 }
1091
1092 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1093                                                gfp_t gfp)
1094 {
1095         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1096 }
1097
1098 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1099                                                 gfp_t gfp)
1100 {
1101         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102 }
1103
1104 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105         int offset, int len, int odd, struct sk_buff *skb),
1106         void *from, int length, int transhdrlen,
1107         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1108         struct rt6_info *rt, unsigned int flags, int dontfrag)
1109 {
1110         struct inet_sock *inet = inet_sk(sk);
1111         struct ipv6_pinfo *np = inet6_sk(sk);
1112         struct sk_buff *skb;
1113         unsigned int maxfraglen, fragheaderlen;
1114         int exthdrlen;
1115         int hh_len;
1116         int mtu;
1117         int copy;
1118         int err;
1119         int offset = 0;
1120         int csummode = CHECKSUM_NONE;
1121         __u8 tx_flags = 0;
1122
1123         if (flags&MSG_PROBE)
1124                 return 0;
1125         if (skb_queue_empty(&sk->sk_write_queue)) {
1126                 /*
1127                  * setup for corking
1128                  */
1129                 if (opt) {
1130                         if (WARN_ON(np->cork.opt))
1131                                 return -EINVAL;
1132
1133                         np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1134                         if (unlikely(np->cork.opt == NULL))
1135                                 return -ENOBUFS;
1136
1137                         np->cork.opt->tot_len = opt->tot_len;
1138                         np->cork.opt->opt_flen = opt->opt_flen;
1139                         np->cork.opt->opt_nflen = opt->opt_nflen;
1140
1141                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1142                                                             sk->sk_allocation);
1143                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1144                                 return -ENOBUFS;
1145
1146                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1147                                                             sk->sk_allocation);
1148                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1149                                 return -ENOBUFS;
1150
1151                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1152                                                            sk->sk_allocation);
1153                         if (opt->hopopt && !np->cork.opt->hopopt)
1154                                 return -ENOBUFS;
1155
1156                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1157                                                             sk->sk_allocation);
1158                         if (opt->srcrt && !np->cork.opt->srcrt)
1159                                 return -ENOBUFS;
1160
1161                         /* need source address above miyazawa*/
1162                 }
1163                 dst_hold(&rt->dst);
1164                 inet->cork.dst = &rt->dst;
1165                 inet->cork.fl = *fl;
1166                 np->cork.hop_limit = hlimit;
1167                 np->cork.tclass = tclass;
1168                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1169                       rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1170                 if (np->frag_size < mtu) {
1171                         if (np->frag_size)
1172                                 mtu = np->frag_size;
1173                 }
1174                 inet->cork.fragsize = mtu;
1175                 if (dst_allfrag(rt->dst.path))
1176                         inet->cork.flags |= IPCORK_ALLFRAG;
1177                 inet->cork.length = 0;
1178                 sk->sk_sndmsg_page = NULL;
1179                 sk->sk_sndmsg_off = 0;
1180                 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1181                             rt->rt6i_nfheader_len;
1182                 length += exthdrlen;
1183                 transhdrlen += exthdrlen;
1184         } else {
1185                 rt = (struct rt6_info *)inet->cork.dst;
1186                 fl = &inet->cork.fl;
1187                 opt = np->cork.opt;
1188                 transhdrlen = 0;
1189                 exthdrlen = 0;
1190                 mtu = inet->cork.fragsize;
1191         }
1192
1193         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1194
1195         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1196                         (opt ? opt->opt_nflen : 0);
1197         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1198
1199         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1200                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1201                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1202                         return -EMSGSIZE;
1203                 }
1204         }
1205
1206         /* For UDP, check if TX timestamp is enabled */
1207         if (sk->sk_type == SOCK_DGRAM) {
1208                 err = sock_tx_timestamp(sk, &tx_flags);
1209                 if (err)
1210                         goto error;
1211         }
1212
1213         /*
1214          * Let's try using as much space as possible.
1215          * Use MTU if total length of the message fits into the MTU.
1216          * Otherwise, we need to reserve fragment header and
1217          * fragment alignment (= 8-15 octects, in total).
1218          *
1219          * Note that we may need to "move" the data from the tail of
1220          * of the buffer to the new fragment when we split
1221          * the message.
1222          *
1223          * FIXME: It may be fragmented into multiple chunks
1224          *        at once if non-fragmentable extension headers
1225          *        are too large.
1226          * --yoshfuji
1227          */
1228
1229         inet->cork.length += length;
1230         if (length > mtu) {
1231                 int proto = sk->sk_protocol;
1232                 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1233                         ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1234                         return -EMSGSIZE;
1235                 }
1236
1237                 if (proto == IPPROTO_UDP &&
1238                     (rt->dst.dev->features & NETIF_F_UFO)) {
1239
1240                         err = ip6_ufo_append_data(sk, getfrag, from, length,
1241                                                   hh_len, fragheaderlen,
1242                                                   transhdrlen, mtu, flags);
1243                         if (err)
1244                                 goto error;
1245                         return 0;
1246                 }
1247         }
1248
1249         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1250                 goto alloc_new_skb;
1251
1252         while (length > 0) {
1253                 /* Check if the remaining data fits into current packet. */
1254                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1255                 if (copy < length)
1256                         copy = maxfraglen - skb->len;
1257
1258                 if (copy <= 0) {
1259                         char *data;
1260                         unsigned int datalen;
1261                         unsigned int fraglen;
1262                         unsigned int fraggap;
1263                         unsigned int alloclen;
1264                         struct sk_buff *skb_prev;
1265 alloc_new_skb:
1266                         skb_prev = skb;
1267
1268                         /* There's no room in the current skb */
1269                         if (skb_prev)
1270                                 fraggap = skb_prev->len - maxfraglen;
1271                         else
1272                                 fraggap = 0;
1273
1274                         /*
1275                          * If remaining data exceeds the mtu,
1276                          * we know we need more fragment(s).
1277                          */
1278                         datalen = length + fraggap;
1279                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1280                                 datalen = maxfraglen - fragheaderlen;
1281
1282                         fraglen = datalen + fragheaderlen;
1283                         if ((flags & MSG_MORE) &&
1284                             !(rt->dst.dev->features&NETIF_F_SG))
1285                                 alloclen = mtu;
1286                         else
1287                                 alloclen = datalen + fragheaderlen;
1288
1289                         /*
1290                          * The last fragment gets additional space at tail.
1291                          * Note: we overallocate on fragments with MSG_MODE
1292                          * because we have no idea if we're the last one.
1293                          */
1294                         if (datalen == length + fraggap)
1295                                 alloclen += rt->dst.trailer_len;
1296
1297                         /*
1298                          * We just reserve space for fragment header.
1299                          * Note: this may be overallocation if the message
1300                          * (without MSG_MORE) fits into the MTU.
1301                          */
1302                         alloclen += sizeof(struct frag_hdr);
1303
1304                         if (transhdrlen) {
1305                                 skb = sock_alloc_send_skb(sk,
1306                                                 alloclen + hh_len,
1307                                                 (flags & MSG_DONTWAIT), &err);
1308                         } else {
1309                                 skb = NULL;
1310                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1311                                     2 * sk->sk_sndbuf)
1312                                         skb = sock_wmalloc(sk,
1313                                                            alloclen + hh_len, 1,
1314                                                            sk->sk_allocation);
1315                                 if (unlikely(skb == NULL))
1316                                         err = -ENOBUFS;
1317                                 else {
1318                                         /* Only the initial fragment
1319                                          * is time stamped.
1320                                          */
1321                                         tx_flags = 0;
1322                                 }
1323                         }
1324                         if (skb == NULL)
1325                                 goto error;
1326                         /*
1327                          *      Fill in the control structures
1328                          */
1329                         skb->ip_summed = csummode;
1330                         skb->csum = 0;
1331                         /* reserve for fragmentation */
1332                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1333
1334                         if (sk->sk_type == SOCK_DGRAM)
1335                                 skb_shinfo(skb)->tx_flags = tx_flags;
1336
1337                         /*
1338                          *      Find where to start putting bytes
1339                          */
1340                         data = skb_put(skb, fraglen);
1341                         skb_set_network_header(skb, exthdrlen);
1342                         data += fragheaderlen;
1343                         skb->transport_header = (skb->network_header +
1344                                                  fragheaderlen);
1345                         if (fraggap) {
1346                                 skb->csum = skb_copy_and_csum_bits(
1347                                         skb_prev, maxfraglen,
1348                                         data + transhdrlen, fraggap, 0);
1349                                 skb_prev->csum = csum_sub(skb_prev->csum,
1350                                                           skb->csum);
1351                                 data += fraggap;
1352                                 pskb_trim_unique(skb_prev, maxfraglen);
1353                         }
1354                         copy = datalen - transhdrlen - fraggap;
1355                         if (copy < 0) {
1356                                 err = -EINVAL;
1357                                 kfree_skb(skb);
1358                                 goto error;
1359                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1360                                 err = -EFAULT;
1361                                 kfree_skb(skb);
1362                                 goto error;
1363                         }
1364
1365                         offset += copy;
1366                         length -= datalen - fraggap;
1367                         transhdrlen = 0;
1368                         exthdrlen = 0;
1369                         csummode = CHECKSUM_NONE;
1370
1371                         /*
1372                          * Put the packet on the pending queue
1373                          */
1374                         __skb_queue_tail(&sk->sk_write_queue, skb);
1375                         continue;
1376                 }
1377
1378                 if (copy > length)
1379                         copy = length;
1380
1381                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1382                         unsigned int off;
1383
1384                         off = skb->len;
1385                         if (getfrag(from, skb_put(skb, copy),
1386                                                 offset, copy, off, skb) < 0) {
1387                                 __skb_trim(skb, off);
1388                                 err = -EFAULT;
1389                                 goto error;
1390                         }
1391                 } else {
1392                         int i = skb_shinfo(skb)->nr_frags;
1393                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1394                         struct page *page = sk->sk_sndmsg_page;
1395                         int off = sk->sk_sndmsg_off;
1396                         unsigned int left;
1397
1398                         if (page && (left = PAGE_SIZE - off) > 0) {
1399                                 if (copy >= left)
1400                                         copy = left;
1401                                 if (page != frag->page) {
1402                                         if (i == MAX_SKB_FRAGS) {
1403                                                 err = -EMSGSIZE;
1404                                                 goto error;
1405                                         }
1406                                         get_page(page);
1407                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1408                                         frag = &skb_shinfo(skb)->frags[i];
1409                                 }
1410                         } else if(i < MAX_SKB_FRAGS) {
1411                                 if (copy > PAGE_SIZE)
1412                                         copy = PAGE_SIZE;
1413                                 page = alloc_pages(sk->sk_allocation, 0);
1414                                 if (page == NULL) {
1415                                         err = -ENOMEM;
1416                                         goto error;
1417                                 }
1418                                 sk->sk_sndmsg_page = page;
1419                                 sk->sk_sndmsg_off = 0;
1420
1421                                 skb_fill_page_desc(skb, i, page, 0, 0);
1422                                 frag = &skb_shinfo(skb)->frags[i];
1423                         } else {
1424                                 err = -EMSGSIZE;
1425                                 goto error;
1426                         }
1427                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1428                                 err = -EFAULT;
1429                                 goto error;
1430                         }
1431                         sk->sk_sndmsg_off += copy;
1432                         frag->size += copy;
1433                         skb->len += copy;
1434                         skb->data_len += copy;
1435                         skb->truesize += copy;
1436                         atomic_add(copy, &sk->sk_wmem_alloc);
1437                 }
1438                 offset += copy;
1439                 length -= copy;
1440         }
1441         return 0;
1442 error:
1443         inet->cork.length -= length;
1444         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1445         return err;
1446 }
1447
1448 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1449 {
1450         if (np->cork.opt) {
1451                 kfree(np->cork.opt->dst0opt);
1452                 kfree(np->cork.opt->dst1opt);
1453                 kfree(np->cork.opt->hopopt);
1454                 kfree(np->cork.opt->srcrt);
1455                 kfree(np->cork.opt);
1456                 np->cork.opt = NULL;
1457         }
1458
1459         if (inet->cork.dst) {
1460                 dst_release(inet->cork.dst);
1461                 inet->cork.dst = NULL;
1462                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1463         }
1464         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1465 }
1466
1467 int ip6_push_pending_frames(struct sock *sk)
1468 {
1469         struct sk_buff *skb, *tmp_skb;
1470         struct sk_buff **tail_skb;
1471         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1472         struct inet_sock *inet = inet_sk(sk);
1473         struct ipv6_pinfo *np = inet6_sk(sk);
1474         struct net *net = sock_net(sk);
1475         struct ipv6hdr *hdr;
1476         struct ipv6_txoptions *opt = np->cork.opt;
1477         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1478         struct flowi *fl = &inet->cork.fl;
1479         unsigned char proto = fl->proto;
1480         int err = 0;
1481
1482         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1483                 goto out;
1484         tail_skb = &(skb_shinfo(skb)->frag_list);
1485
1486         /* move skb->data to ip header from ext header */
1487         if (skb->data < skb_network_header(skb))
1488                 __skb_pull(skb, skb_network_offset(skb));
1489         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1490                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1491                 *tail_skb = tmp_skb;
1492                 tail_skb = &(tmp_skb->next);
1493                 skb->len += tmp_skb->len;
1494                 skb->data_len += tmp_skb->len;
1495                 skb->truesize += tmp_skb->truesize;
1496                 tmp_skb->destructor = NULL;
1497                 tmp_skb->sk = NULL;
1498         }
1499
1500         /* Allow local fragmentation. */
1501         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1502                 skb->local_df = 1;
1503
1504         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1505         __skb_pull(skb, skb_network_header_len(skb));
1506         if (opt && opt->opt_flen)
1507                 ipv6_push_frag_opts(skb, opt, &proto);
1508         if (opt && opt->opt_nflen)
1509                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1510
1511         skb_push(skb, sizeof(struct ipv6hdr));
1512         skb_reset_network_header(skb);
1513         hdr = ipv6_hdr(skb);
1514
1515         *(__be32*)hdr = fl->fl6_flowlabel |
1516                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1517
1518         hdr->hop_limit = np->cork.hop_limit;
1519         hdr->nexthdr = proto;
1520         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1521         ipv6_addr_copy(&hdr->daddr, final_dst);
1522
1523         skb->priority = sk->sk_priority;
1524         skb->mark = sk->sk_mark;
1525
1526         skb_dst_set(skb, dst_clone(&rt->dst));
1527         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1528         if (proto == IPPROTO_ICMPV6) {
1529                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1530
1531                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1532                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1533         }
1534
1535         err = ip6_local_out(skb);
1536         if (err) {
1537                 if (err > 0)
1538                         err = net_xmit_errno(err);
1539                 if (err)
1540                         goto error;
1541         }
1542
1543 out:
1544         ip6_cork_release(inet, np);
1545         return err;
1546 error:
1547         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1548         goto out;
1549 }
1550
1551 void ip6_flush_pending_frames(struct sock *sk)
1552 {
1553         struct sk_buff *skb;
1554
1555         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1556                 if (skb_dst(skb))
1557                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1558                                       IPSTATS_MIB_OUTDISCARDS);
1559                 kfree_skb(skb);
1560         }
1561
1562         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1563 }