]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv6/ip6_output.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[karo-tx-linux.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63         int len;
64
65         len = skb->len - sizeof(struct ipv6hdr);
66         if (len > IPV6_MAXPLEN)
67                 len = 0;
68         ipv6_hdr(skb)->payload_len = htons(len);
69
70         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71                        skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76         int err;
77
78         err = __ip6_local_out(skb);
79         if (likely(err == 1))
80                 err = dst_output(skb);
81
82         return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89         skb_reset_mac_header(newskb);
90         __skb_pull(newskb, skb_network_offset(newskb));
91         newskb->pkt_type = PACKET_LOOPBACK;
92         newskb->ip_summed = CHECKSUM_UNNECESSARY;
93         WARN_ON(!skb_dst(newskb));
94
95         netif_rx_ni(newskb);
96         return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102         struct net_device *dev = dst->dev;
103
104         skb->protocol = htons(ETH_P_IPV6);
105         skb->dev = dev;
106
107         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109
110                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111                     ((mroute6_socket(dev_net(dev), skb) &&
112                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114                                          &ipv6_hdr(skb)->saddr))) {
115                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117                         /* Do not check for IFF_ALLMULTI; multicast routing
118                            is not supported in any case.
119                          */
120                         if (newskb)
121                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122                                         newskb, NULL, newskb->dev,
123                                         ip6_dev_loopback_xmit);
124
125                         if (ipv6_hdr(skb)->hop_limit == 0) {
126                                 IP6_INC_STATS(dev_net(dev), idev,
127                                               IPSTATS_MIB_OUTDISCARDS);
128                                 kfree_skb(skb);
129                                 return 0;
130                         }
131                 }
132
133                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134                                 skb->len);
135         }
136
137         if (dst->hh)
138                 return neigh_hh_output(dst->hh, skb);
139         else if (dst->neighbour)
140                 return dst->neighbour->output(skb);
141
142         IP6_INC_STATS_BH(dev_net(dst->dev),
143                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144         kfree_skb(skb);
145         return -EINVAL;
146 }
147
148 static int ip6_finish_output(struct sk_buff *skb)
149 {
150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151             dst_allfrag(skb_dst(skb)))
152                 return ip6_fragment(skb, ip6_finish_output2);
153         else
154                 return ip6_finish_output2(skb);
155 }
156
157 int ip6_output(struct sk_buff *skb)
158 {
159         struct net_device *dev = skb_dst(skb)->dev;
160         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161         if (unlikely(idev->cnf.disable_ipv6)) {
162                 IP6_INC_STATS(dev_net(dev), idev,
163                               IPSTATS_MIB_OUTDISCARDS);
164                 kfree_skb(skb);
165                 return 0;
166         }
167
168         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169                             ip6_finish_output,
170                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
171 }
172
173 /*
174  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
175  */
176
177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
178              struct ipv6_txoptions *opt)
179 {
180         struct net *net = sock_net(sk);
181         struct ipv6_pinfo *np = inet6_sk(sk);
182         struct in6_addr *first_hop = &fl->fl6_dst;
183         struct dst_entry *dst = skb_dst(skb);
184         struct ipv6hdr *hdr;
185         u8  proto = fl->proto;
186         int seg_len = skb->len;
187         int hlimit = -1;
188         int tclass = 0;
189         u32 mtu;
190
191         if (opt) {
192                 unsigned int head_room;
193
194                 /* First: exthdrs may take lots of space (~8K for now)
195                    MAX_HEADER is not enough.
196                  */
197                 head_room = opt->opt_nflen + opt->opt_flen;
198                 seg_len += head_room;
199                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201                 if (skb_headroom(skb) < head_room) {
202                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203                         if (skb2 == NULL) {
204                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205                                               IPSTATS_MIB_OUTDISCARDS);
206                                 kfree_skb(skb);
207                                 return -ENOBUFS;
208                         }
209                         kfree_skb(skb);
210                         skb = skb2;
211                         skb_set_owner_w(skb, sk);
212                 }
213                 if (opt->opt_flen)
214                         ipv6_push_frag_opts(skb, opt, &proto);
215                 if (opt->opt_nflen)
216                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217         }
218
219         skb_push(skb, sizeof(struct ipv6hdr));
220         skb_reset_network_header(skb);
221         hdr = ipv6_hdr(skb);
222
223         /*
224          *      Fill in the IPv6 header
225          */
226         if (np) {
227                 tclass = np->tclass;
228                 hlimit = np->hop_limit;
229         }
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232
233         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
234
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238
239         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
240         ipv6_addr_copy(&hdr->daddr, first_hop);
241
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         if (net_ratelimit())
254                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255         skb->dev = dst->dev;
256         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258         kfree_skb(skb);
259         return -EMSGSIZE;
260 }
261
262 EXPORT_SYMBOL(ip6_xmit);
263
264 /*
265  *      To avoid extra problems ND packets are send through this
266  *      routine. It's code duplication but I really want to avoid
267  *      extra checks since ipv6_build_header is used by TCP (which
268  *      is for us performance critical)
269  */
270
271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272                const struct in6_addr *saddr, const struct in6_addr *daddr,
273                int proto, int len)
274 {
275         struct ipv6_pinfo *np = inet6_sk(sk);
276         struct ipv6hdr *hdr;
277         int totlen;
278
279         skb->protocol = htons(ETH_P_IPV6);
280         skb->dev = dev;
281
282         totlen = len + sizeof(struct ipv6hdr);
283
284         skb_reset_network_header(skb);
285         skb_put(skb, sizeof(struct ipv6hdr));
286         hdr = ipv6_hdr(skb);
287
288         *(__be32*)hdr = htonl(0x60000000);
289
290         hdr->payload_len = htons(len);
291         hdr->nexthdr = proto;
292         hdr->hop_limit = np->hop_limit;
293
294         ipv6_addr_copy(&hdr->saddr, saddr);
295         ipv6_addr_copy(&hdr->daddr, daddr);
296
297         return 0;
298 }
299
300 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
301 {
302         struct ip6_ra_chain *ra;
303         struct sock *last = NULL;
304
305         read_lock(&ip6_ra_lock);
306         for (ra = ip6_ra_chain; ra; ra = ra->next) {
307                 struct sock *sk = ra->sk;
308                 if (sk && ra->sel == sel &&
309                     (!sk->sk_bound_dev_if ||
310                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
311                         if (last) {
312                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
313                                 if (skb2)
314                                         rawv6_rcv(last, skb2);
315                         }
316                         last = sk;
317                 }
318         }
319
320         if (last) {
321                 rawv6_rcv(last, skb);
322                 read_unlock(&ip6_ra_lock);
323                 return 1;
324         }
325         read_unlock(&ip6_ra_lock);
326         return 0;
327 }
328
329 static int ip6_forward_proxy_check(struct sk_buff *skb)
330 {
331         struct ipv6hdr *hdr = ipv6_hdr(skb);
332         u8 nexthdr = hdr->nexthdr;
333         int offset;
334
335         if (ipv6_ext_hdr(nexthdr)) {
336                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
337                 if (offset < 0)
338                         return 0;
339         } else
340                 offset = sizeof(struct ipv6hdr);
341
342         if (nexthdr == IPPROTO_ICMPV6) {
343                 struct icmp6hdr *icmp6;
344
345                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346                                          offset + 1 - skb->data)))
347                         return 0;
348
349                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
350
351                 switch (icmp6->icmp6_type) {
352                 case NDISC_ROUTER_SOLICITATION:
353                 case NDISC_ROUTER_ADVERTISEMENT:
354                 case NDISC_NEIGHBOUR_SOLICITATION:
355                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356                 case NDISC_REDIRECT:
357                         /* For reaction involving unicast neighbor discovery
358                          * message destined to the proxied address, pass it to
359                          * input function.
360                          */
361                         return 1;
362                 default:
363                         break;
364                 }
365         }
366
367         /*
368          * The proxying router can't forward traffic sent to a link-local
369          * address, so signal the sender and discard the packet. This
370          * behavior is clarified by the MIPv6 specification.
371          */
372         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373                 dst_link_failure(skb);
374                 return -1;
375         }
376
377         return 0;
378 }
379
380 static inline int ip6_forward_finish(struct sk_buff *skb)
381 {
382         return dst_output(skb);
383 }
384
385 int ip6_forward(struct sk_buff *skb)
386 {
387         struct dst_entry *dst = skb_dst(skb);
388         struct ipv6hdr *hdr = ipv6_hdr(skb);
389         struct inet6_skb_parm *opt = IP6CB(skb);
390         struct net *net = dev_net(dst->dev);
391         u32 mtu;
392
393         if (net->ipv6.devconf_all->forwarding == 0)
394                 goto error;
395
396         if (skb_warn_if_lro(skb))
397                 goto drop;
398
399         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
401                 goto drop;
402         }
403
404         if (skb->pkt_type != PACKET_HOST)
405                 goto drop;
406
407         skb_forward_csum(skb);
408
409         /*
410          *      We DO NOT make any processing on
411          *      RA packets, pushing them to user level AS IS
412          *      without ane WARRANTY that application will be able
413          *      to interpret them. The reason is that we
414          *      cannot make anything clever here.
415          *
416          *      We are not end-node, so that if packet contains
417          *      AH/ESP, we cannot make anything.
418          *      Defragmentation also would be mistake, RA packets
419          *      cannot be fragmented, because there is no warranty
420          *      that different fragments will go along one path. --ANK
421          */
422         if (opt->ra) {
423                 u8 *ptr = skb_network_header(skb) + opt->ra;
424                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425                         return 0;
426         }
427
428         /*
429          *      check and decrement ttl
430          */
431         if (hdr->hop_limit <= 1) {
432                 /* Force OUTPUT device used as source address */
433                 skb->dev = dst->dev;
434                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435                 IP6_INC_STATS_BH(net,
436                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
437
438                 kfree_skb(skb);
439                 return -ETIMEDOUT;
440         }
441
442         /* XXX: idev->cnf.proxy_ndp? */
443         if (net->ipv6.devconf_all->proxy_ndp &&
444             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445                 int proxied = ip6_forward_proxy_check(skb);
446                 if (proxied > 0)
447                         return ip6_input(skb);
448                 else if (proxied < 0) {
449                         IP6_INC_STATS(net, ip6_dst_idev(dst),
450                                       IPSTATS_MIB_INDISCARDS);
451                         goto drop;
452                 }
453         }
454
455         if (!xfrm6_route_forward(skb)) {
456                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
457                 goto drop;
458         }
459         dst = skb_dst(skb);
460
461         /* IPv6 specs say nothing about it, but it is clear that we cannot
462            send redirects to source routed frames.
463            We don't send redirects to frames decapsulated from IPsec.
464          */
465         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
466             !skb_sec_path(skb)) {
467                 struct in6_addr *target = NULL;
468                 struct rt6_info *rt;
469                 struct neighbour *n = dst->neighbour;
470
471                 /*
472                  *      incoming and outgoing devices are the same
473                  *      send a redirect.
474                  */
475
476                 rt = (struct rt6_info *) dst;
477                 if ((rt->rt6i_flags & RTF_GATEWAY))
478                         target = (struct in6_addr*)&n->primary_key;
479                 else
480                         target = &hdr->daddr;
481
482                 if (!rt->rt6i_peer)
483                         rt6_bind_peer(rt, 1);
484
485                 /* Limit redirects both by destination (here)
486                    and by source (inside ndisc_send_redirect)
487                  */
488                 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
489                         ndisc_send_redirect(skb, n, target);
490         } else {
491                 int addrtype = ipv6_addr_type(&hdr->saddr);
492
493                 /* This check is security critical. */
494                 if (addrtype == IPV6_ADDR_ANY ||
495                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
496                         goto error;
497                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
498                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
499                                     ICMPV6_NOT_NEIGHBOUR, 0);
500                         goto error;
501                 }
502         }
503
504         mtu = dst_mtu(dst);
505         if (mtu < IPV6_MIN_MTU)
506                 mtu = IPV6_MIN_MTU;
507
508         if (skb->len > mtu && !skb_is_gso(skb)) {
509                 /* Again, force OUTPUT device used as source address */
510                 skb->dev = dst->dev;
511                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
512                 IP6_INC_STATS_BH(net,
513                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
514                 IP6_INC_STATS_BH(net,
515                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
516                 kfree_skb(skb);
517                 return -EMSGSIZE;
518         }
519
520         if (skb_cow(skb, dst->dev->hard_header_len)) {
521                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
522                 goto drop;
523         }
524
525         hdr = ipv6_hdr(skb);
526
527         /* Mangling hops number delayed to point after skb COW */
528
529         hdr->hop_limit--;
530
531         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
532         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
533                        ip6_forward_finish);
534
535 error:
536         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
537 drop:
538         kfree_skb(skb);
539         return -EINVAL;
540 }
541
542 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543 {
544         to->pkt_type = from->pkt_type;
545         to->priority = from->priority;
546         to->protocol = from->protocol;
547         skb_dst_drop(to);
548         skb_dst_set(to, dst_clone(skb_dst(from)));
549         to->dev = from->dev;
550         to->mark = from->mark;
551
552 #ifdef CONFIG_NET_SCHED
553         to->tc_index = from->tc_index;
554 #endif
555         nf_copy(to, from);
556 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
557     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
558         to->nf_trace = from->nf_trace;
559 #endif
560         skb_copy_secmark(to, from);
561 }
562
563 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564 {
565         u16 offset = sizeof(struct ipv6hdr);
566         struct ipv6_opt_hdr *exthdr =
567                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
568         unsigned int packet_len = skb->tail - skb->network_header;
569         int found_rhdr = 0;
570         *nexthdr = &ipv6_hdr(skb)->nexthdr;
571
572         while (offset + 1 <= packet_len) {
573
574                 switch (**nexthdr) {
575
576                 case NEXTHDR_HOP:
577                         break;
578                 case NEXTHDR_ROUTING:
579                         found_rhdr = 1;
580                         break;
581                 case NEXTHDR_DEST:
582 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
583                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
584                                 break;
585 #endif
586                         if (found_rhdr)
587                                 return offset;
588                         break;
589                 default :
590                         return offset;
591                 }
592
593                 offset += ipv6_optlen(exthdr);
594                 *nexthdr = &exthdr->nexthdr;
595                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
596                                                  offset);
597         }
598
599         return offset;
600 }
601
602 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
603 {
604         struct sk_buff *frag;
605         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
606         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
607         struct ipv6hdr *tmp_hdr;
608         struct frag_hdr *fh;
609         unsigned int mtu, hlen, left, len;
610         __be32 frag_id = 0;
611         int ptr, offset = 0, err=0;
612         u8 *prevhdr, nexthdr = 0;
613         struct net *net = dev_net(skb_dst(skb)->dev);
614
615         hlen = ip6_find_1stfragopt(skb, &prevhdr);
616         nexthdr = *prevhdr;
617
618         mtu = ip6_skb_dst_mtu(skb);
619
620         /* We must not fragment if the socket is set to force MTU discovery
621          * or if the skb it not generated by a local socket.
622          */
623         if (!skb->local_df && skb->len > mtu) {
624                 skb->dev = skb_dst(skb)->dev;
625                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
626                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
627                               IPSTATS_MIB_FRAGFAILS);
628                 kfree_skb(skb);
629                 return -EMSGSIZE;
630         }
631
632         if (np && np->frag_size < mtu) {
633                 if (np->frag_size)
634                         mtu = np->frag_size;
635         }
636         mtu -= hlen + sizeof(struct frag_hdr);
637
638         if (skb_has_frag_list(skb)) {
639                 int first_len = skb_pagelen(skb);
640                 struct sk_buff *frag2;
641
642                 if (first_len - hlen > mtu ||
643                     ((first_len - hlen) & 7) ||
644                     skb_cloned(skb))
645                         goto slow_path;
646
647                 skb_walk_frags(skb, frag) {
648                         /* Correct geometry. */
649                         if (frag->len > mtu ||
650                             ((frag->len & 7) && frag->next) ||
651                             skb_headroom(frag) < hlen)
652                                 goto slow_path_clean;
653
654                         /* Partially cloned skb? */
655                         if (skb_shared(frag))
656                                 goto slow_path_clean;
657
658                         BUG_ON(frag->sk);
659                         if (skb->sk) {
660                                 frag->sk = skb->sk;
661                                 frag->destructor = sock_wfree;
662                         }
663                         skb->truesize -= frag->truesize;
664                 }
665
666                 err = 0;
667                 offset = 0;
668                 frag = skb_shinfo(skb)->frag_list;
669                 skb_frag_list_init(skb);
670                 /* BUILD HEADER */
671
672                 *prevhdr = NEXTHDR_FRAGMENT;
673                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
674                 if (!tmp_hdr) {
675                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
676                                       IPSTATS_MIB_FRAGFAILS);
677                         return -ENOMEM;
678                 }
679
680                 __skb_pull(skb, hlen);
681                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
682                 __skb_push(skb, hlen);
683                 skb_reset_network_header(skb);
684                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
685
686                 ipv6_select_ident(fh);
687                 fh->nexthdr = nexthdr;
688                 fh->reserved = 0;
689                 fh->frag_off = htons(IP6_MF);
690                 frag_id = fh->identification;
691
692                 first_len = skb_pagelen(skb);
693                 skb->data_len = first_len - skb_headlen(skb);
694                 skb->len = first_len;
695                 ipv6_hdr(skb)->payload_len = htons(first_len -
696                                                    sizeof(struct ipv6hdr));
697
698                 dst_hold(&rt->dst);
699
700                 for (;;) {
701                         /* Prepare header of the next frame,
702                          * before previous one went down. */
703                         if (frag) {
704                                 frag->ip_summed = CHECKSUM_NONE;
705                                 skb_reset_transport_header(frag);
706                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
707                                 __skb_push(frag, hlen);
708                                 skb_reset_network_header(frag);
709                                 memcpy(skb_network_header(frag), tmp_hdr,
710                                        hlen);
711                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
712                                 fh->nexthdr = nexthdr;
713                                 fh->reserved = 0;
714                                 fh->frag_off = htons(offset);
715                                 if (frag->next != NULL)
716                                         fh->frag_off |= htons(IP6_MF);
717                                 fh->identification = frag_id;
718                                 ipv6_hdr(frag)->payload_len =
719                                                 htons(frag->len -
720                                                       sizeof(struct ipv6hdr));
721                                 ip6_copy_metadata(frag, skb);
722                         }
723
724                         err = output(skb);
725                         if(!err)
726                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
727                                               IPSTATS_MIB_FRAGCREATES);
728
729                         if (err || !frag)
730                                 break;
731
732                         skb = frag;
733                         frag = skb->next;
734                         skb->next = NULL;
735                 }
736
737                 kfree(tmp_hdr);
738
739                 if (err == 0) {
740                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
741                                       IPSTATS_MIB_FRAGOKS);
742                         dst_release(&rt->dst);
743                         return 0;
744                 }
745
746                 while (frag) {
747                         skb = frag->next;
748                         kfree_skb(frag);
749                         frag = skb;
750                 }
751
752                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
753                               IPSTATS_MIB_FRAGFAILS);
754                 dst_release(&rt->dst);
755                 return err;
756
757 slow_path_clean:
758                 skb_walk_frags(skb, frag2) {
759                         if (frag2 == frag)
760                                 break;
761                         frag2->sk = NULL;
762                         frag2->destructor = NULL;
763                         skb->truesize += frag2->truesize;
764                 }
765         }
766
767 slow_path:
768         left = skb->len - hlen;         /* Space per frame */
769         ptr = hlen;                     /* Where to start from */
770
771         /*
772          *      Fragment the datagram.
773          */
774
775         *prevhdr = NEXTHDR_FRAGMENT;
776
777         /*
778          *      Keep copying data until we run out.
779          */
780         while(left > 0) {
781                 len = left;
782                 /* IF: it doesn't fit, use 'mtu' - the data space left */
783                 if (len > mtu)
784                         len = mtu;
785                 /* IF: we are not sending upto and including the packet end
786                    then align the next start on an eight byte boundary */
787                 if (len < left) {
788                         len &= ~7;
789                 }
790                 /*
791                  *      Allocate buffer.
792                  */
793
794                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
795                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
796                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
797                                       IPSTATS_MIB_FRAGFAILS);
798                         err = -ENOMEM;
799                         goto fail;
800                 }
801
802                 /*
803                  *      Set up data on packet
804                  */
805
806                 ip6_copy_metadata(frag, skb);
807                 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
808                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
809                 skb_reset_network_header(frag);
810                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
811                 frag->transport_header = (frag->network_header + hlen +
812                                           sizeof(struct frag_hdr));
813
814                 /*
815                  *      Charge the memory for the fragment to any owner
816                  *      it might possess
817                  */
818                 if (skb->sk)
819                         skb_set_owner_w(frag, skb->sk);
820
821                 /*
822                  *      Copy the packet header into the new buffer.
823                  */
824                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
825
826                 /*
827                  *      Build fragment header.
828                  */
829                 fh->nexthdr = nexthdr;
830                 fh->reserved = 0;
831                 if (!frag_id) {
832                         ipv6_select_ident(fh);
833                         frag_id = fh->identification;
834                 } else
835                         fh->identification = frag_id;
836
837                 /*
838                  *      Copy a block of the IP datagram.
839                  */
840                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
841                         BUG();
842                 left -= len;
843
844                 fh->frag_off = htons(offset);
845                 if (left > 0)
846                         fh->frag_off |= htons(IP6_MF);
847                 ipv6_hdr(frag)->payload_len = htons(frag->len -
848                                                     sizeof(struct ipv6hdr));
849
850                 ptr += len;
851                 offset += len;
852
853                 /*
854                  *      Put this fragment into the sending queue.
855                  */
856                 err = output(frag);
857                 if (err)
858                         goto fail;
859
860                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861                               IPSTATS_MIB_FRAGCREATES);
862         }
863         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
864                       IPSTATS_MIB_FRAGOKS);
865         kfree_skb(skb);
866         return err;
867
868 fail:
869         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
870                       IPSTATS_MIB_FRAGFAILS);
871         kfree_skb(skb);
872         return err;
873 }
874
875 static inline int ip6_rt_check(struct rt6key *rt_key,
876                                struct in6_addr *fl_addr,
877                                struct in6_addr *addr_cache)
878 {
879         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
880                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
881 }
882
883 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
884                                           struct dst_entry *dst,
885                                           struct flowi *fl)
886 {
887         struct ipv6_pinfo *np = inet6_sk(sk);
888         struct rt6_info *rt = (struct rt6_info *)dst;
889
890         if (!dst)
891                 goto out;
892
893         /* Yes, checking route validity in not connected
894          * case is not very simple. Take into account,
895          * that we do not support routing by source, TOS,
896          * and MSG_DONTROUTE            --ANK (980726)
897          *
898          * 1. ip6_rt_check(): If route was host route,
899          *    check that cached destination is current.
900          *    If it is network route, we still may
901          *    check its validity using saved pointer
902          *    to the last used address: daddr_cache.
903          *    We do not want to save whole address now,
904          *    (because main consumer of this service
905          *    is tcp, which has not this problem),
906          *    so that the last trick works only on connected
907          *    sockets.
908          * 2. oif also should be the same.
909          */
910         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
911 #ifdef CONFIG_IPV6_SUBTREES
912             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
913 #endif
914             (fl->oif && fl->oif != dst->dev->ifindex)) {
915                 dst_release(dst);
916                 dst = NULL;
917         }
918
919 out:
920         return dst;
921 }
922
923 static int ip6_dst_lookup_tail(struct sock *sk,
924                                struct dst_entry **dst, struct flowi *fl)
925 {
926         int err;
927         struct net *net = sock_net(sk);
928
929         if (*dst == NULL)
930                 *dst = ip6_route_output(net, sk, fl);
931
932         if ((err = (*dst)->error))
933                 goto out_err_release;
934
935         if (ipv6_addr_any(&fl->fl6_src)) {
936                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
937                                          &fl->fl6_dst,
938                                          sk ? inet6_sk(sk)->srcprefs : 0,
939                                          &fl->fl6_src);
940                 if (err)
941                         goto out_err_release;
942         }
943
944 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
945         /*
946          * Here if the dst entry we've looked up
947          * has a neighbour entry that is in the INCOMPLETE
948          * state and the src address from the flow is
949          * marked as OPTIMISTIC, we release the found
950          * dst entry and replace it instead with the
951          * dst entry of the nexthop router
952          */
953         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
954                 struct inet6_ifaddr *ifp;
955                 struct flowi fl_gw;
956                 int redirect;
957
958                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
959                                       (*dst)->dev, 1);
960
961                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
962                 if (ifp)
963                         in6_ifa_put(ifp);
964
965                 if (redirect) {
966                         /*
967                          * We need to get the dst entry for the
968                          * default router instead
969                          */
970                         dst_release(*dst);
971                         memcpy(&fl_gw, fl, sizeof(struct flowi));
972                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
973                         *dst = ip6_route_output(net, sk, &fl_gw);
974                         if ((err = (*dst)->error))
975                                 goto out_err_release;
976                 }
977         }
978 #endif
979
980         return 0;
981
982 out_err_release:
983         if (err == -ENETUNREACH)
984                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
985         dst_release(*dst);
986         *dst = NULL;
987         return err;
988 }
989
990 /**
991  *      ip6_dst_lookup - perform route lookup on flow
992  *      @sk: socket which provides route info
993  *      @dst: pointer to dst_entry * for result
994  *      @fl: flow to lookup
995  *
996  *      This function performs a route lookup on the given flow.
997  *
998  *      It returns zero on success, or a standard errno code on error.
999  */
1000 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1001 {
1002         *dst = NULL;
1003         return ip6_dst_lookup_tail(sk, dst, fl);
1004 }
1005 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1006
1007 /**
1008  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1009  *      @sk: socket which provides the dst cache and route info
1010  *      @dst: pointer to dst_entry * for result
1011  *      @fl: flow to lookup
1012  *
1013  *      This function performs a route lookup on the given flow with the
1014  *      possibility of using the cached route in the socket if it is valid.
1015  *      It will take the socket dst lock when operating on the dst cache.
1016  *      As a result, this function can only be used in process context.
1017  *
1018  *      It returns zero on success, or a standard errno code on error.
1019  */
1020 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1021 {
1022         *dst = NULL;
1023         if (sk) {
1024                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1025                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1026         }
1027
1028         return ip6_dst_lookup_tail(sk, dst, fl);
1029 }
1030 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1031
1032 static inline int ip6_ufo_append_data(struct sock *sk,
1033                         int getfrag(void *from, char *to, int offset, int len,
1034                         int odd, struct sk_buff *skb),
1035                         void *from, int length, int hh_len, int fragheaderlen,
1036                         int transhdrlen, int mtu,unsigned int flags)
1037
1038 {
1039         struct sk_buff *skb;
1040         int err;
1041
1042         /* There is support for UDP large send offload by network
1043          * device, so create one single skb packet containing complete
1044          * udp datagram
1045          */
1046         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1047                 skb = sock_alloc_send_skb(sk,
1048                         hh_len + fragheaderlen + transhdrlen + 20,
1049                         (flags & MSG_DONTWAIT), &err);
1050                 if (skb == NULL)
1051                         return -ENOMEM;
1052
1053                 /* reserve space for Hardware header */
1054                 skb_reserve(skb, hh_len);
1055
1056                 /* create space for UDP/IP header */
1057                 skb_put(skb,fragheaderlen + transhdrlen);
1058
1059                 /* initialize network header pointer */
1060                 skb_reset_network_header(skb);
1061
1062                 /* initialize protocol header pointer */
1063                 skb->transport_header = skb->network_header + fragheaderlen;
1064
1065                 skb->ip_summed = CHECKSUM_PARTIAL;
1066                 skb->csum = 0;
1067                 sk->sk_sndmsg_off = 0;
1068         }
1069
1070         err = skb_append_datato_frags(sk,skb, getfrag, from,
1071                                       (length - transhdrlen));
1072         if (!err) {
1073                 struct frag_hdr fhdr;
1074
1075                 /* Specify the length of each IPv6 datagram fragment.
1076                  * It has to be a multiple of 8.
1077                  */
1078                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1079                                              sizeof(struct frag_hdr)) & ~7;
1080                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1081                 ipv6_select_ident(&fhdr);
1082                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1083                 __skb_queue_tail(&sk->sk_write_queue, skb);
1084
1085                 return 0;
1086         }
1087         /* There is not enough support do UPD LSO,
1088          * so follow normal path
1089          */
1090         kfree_skb(skb);
1091
1092         return err;
1093 }
1094
1095 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1096                                                gfp_t gfp)
1097 {
1098         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099 }
1100
1101 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1102                                                 gfp_t gfp)
1103 {
1104         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1105 }
1106
1107 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1108         int offset, int len, int odd, struct sk_buff *skb),
1109         void *from, int length, int transhdrlen,
1110         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1111         struct rt6_info *rt, unsigned int flags, int dontfrag)
1112 {
1113         struct inet_sock *inet = inet_sk(sk);
1114         struct ipv6_pinfo *np = inet6_sk(sk);
1115         struct sk_buff *skb;
1116         unsigned int maxfraglen, fragheaderlen;
1117         int exthdrlen;
1118         int hh_len;
1119         int mtu;
1120         int copy;
1121         int err;
1122         int offset = 0;
1123         int csummode = CHECKSUM_NONE;
1124
1125         if (flags&MSG_PROBE)
1126                 return 0;
1127         if (skb_queue_empty(&sk->sk_write_queue)) {
1128                 /*
1129                  * setup for corking
1130                  */
1131                 if (opt) {
1132                         if (WARN_ON(np->cork.opt))
1133                                 return -EINVAL;
1134
1135                         np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1136                         if (unlikely(np->cork.opt == NULL))
1137                                 return -ENOBUFS;
1138
1139                         np->cork.opt->tot_len = opt->tot_len;
1140                         np->cork.opt->opt_flen = opt->opt_flen;
1141                         np->cork.opt->opt_nflen = opt->opt_nflen;
1142
1143                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1144                                                             sk->sk_allocation);
1145                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1146                                 return -ENOBUFS;
1147
1148                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1149                                                             sk->sk_allocation);
1150                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1151                                 return -ENOBUFS;
1152
1153                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1154                                                            sk->sk_allocation);
1155                         if (opt->hopopt && !np->cork.opt->hopopt)
1156                                 return -ENOBUFS;
1157
1158                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1159                                                             sk->sk_allocation);
1160                         if (opt->srcrt && !np->cork.opt->srcrt)
1161                                 return -ENOBUFS;
1162
1163                         /* need source address above miyazawa*/
1164                 }
1165                 dst_hold(&rt->dst);
1166                 inet->cork.dst = &rt->dst;
1167                 inet->cork.fl = *fl;
1168                 np->cork.hop_limit = hlimit;
1169                 np->cork.tclass = tclass;
1170                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1171                       rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1172                 if (np->frag_size < mtu) {
1173                         if (np->frag_size)
1174                                 mtu = np->frag_size;
1175                 }
1176                 inet->cork.fragsize = mtu;
1177                 if (dst_allfrag(rt->dst.path))
1178                         inet->cork.flags |= IPCORK_ALLFRAG;
1179                 inet->cork.length = 0;
1180                 sk->sk_sndmsg_page = NULL;
1181                 sk->sk_sndmsg_off = 0;
1182                 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1183                             rt->rt6i_nfheader_len;
1184                 length += exthdrlen;
1185                 transhdrlen += exthdrlen;
1186         } else {
1187                 rt = (struct rt6_info *)inet->cork.dst;
1188                 fl = &inet->cork.fl;
1189                 opt = np->cork.opt;
1190                 transhdrlen = 0;
1191                 exthdrlen = 0;
1192                 mtu = inet->cork.fragsize;
1193         }
1194
1195         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1196
1197         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1198                         (opt ? opt->opt_nflen : 0);
1199         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1200
1201         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1202                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1203                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1204                         return -EMSGSIZE;
1205                 }
1206         }
1207
1208         /*
1209          * Let's try using as much space as possible.
1210          * Use MTU if total length of the message fits into the MTU.
1211          * Otherwise, we need to reserve fragment header and
1212          * fragment alignment (= 8-15 octects, in total).
1213          *
1214          * Note that we may need to "move" the data from the tail of
1215          * of the buffer to the new fragment when we split
1216          * the message.
1217          *
1218          * FIXME: It may be fragmented into multiple chunks
1219          *        at once if non-fragmentable extension headers
1220          *        are too large.
1221          * --yoshfuji
1222          */
1223
1224         inet->cork.length += length;
1225         if (length > mtu) {
1226                 int proto = sk->sk_protocol;
1227                 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1228                         ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1229                         return -EMSGSIZE;
1230                 }
1231
1232                 if (proto == IPPROTO_UDP &&
1233                     (rt->dst.dev->features & NETIF_F_UFO)) {
1234
1235                         err = ip6_ufo_append_data(sk, getfrag, from, length,
1236                                                   hh_len, fragheaderlen,
1237                                                   transhdrlen, mtu, flags);
1238                         if (err)
1239                                 goto error;
1240                         return 0;
1241                 }
1242         }
1243
1244         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1245                 goto alloc_new_skb;
1246
1247         while (length > 0) {
1248                 /* Check if the remaining data fits into current packet. */
1249                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1250                 if (copy < length)
1251                         copy = maxfraglen - skb->len;
1252
1253                 if (copy <= 0) {
1254                         char *data;
1255                         unsigned int datalen;
1256                         unsigned int fraglen;
1257                         unsigned int fraggap;
1258                         unsigned int alloclen;
1259                         struct sk_buff *skb_prev;
1260 alloc_new_skb:
1261                         skb_prev = skb;
1262
1263                         /* There's no room in the current skb */
1264                         if (skb_prev)
1265                                 fraggap = skb_prev->len - maxfraglen;
1266                         else
1267                                 fraggap = 0;
1268
1269                         /*
1270                          * If remaining data exceeds the mtu,
1271                          * we know we need more fragment(s).
1272                          */
1273                         datalen = length + fraggap;
1274                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1275                                 datalen = maxfraglen - fragheaderlen;
1276
1277                         fraglen = datalen + fragheaderlen;
1278                         if ((flags & MSG_MORE) &&
1279                             !(rt->dst.dev->features&NETIF_F_SG))
1280                                 alloclen = mtu;
1281                         else
1282                                 alloclen = datalen + fragheaderlen;
1283
1284                         /*
1285                          * The last fragment gets additional space at tail.
1286                          * Note: we overallocate on fragments with MSG_MODE
1287                          * because we have no idea if we're the last one.
1288                          */
1289                         if (datalen == length + fraggap)
1290                                 alloclen += rt->dst.trailer_len;
1291
1292                         /*
1293                          * We just reserve space for fragment header.
1294                          * Note: this may be overallocation if the message
1295                          * (without MSG_MORE) fits into the MTU.
1296                          */
1297                         alloclen += sizeof(struct frag_hdr);
1298
1299                         if (transhdrlen) {
1300                                 skb = sock_alloc_send_skb(sk,
1301                                                 alloclen + hh_len,
1302                                                 (flags & MSG_DONTWAIT), &err);
1303                         } else {
1304                                 skb = NULL;
1305                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1306                                     2 * sk->sk_sndbuf)
1307                                         skb = sock_wmalloc(sk,
1308                                                            alloclen + hh_len, 1,
1309                                                            sk->sk_allocation);
1310                                 if (unlikely(skb == NULL))
1311                                         err = -ENOBUFS;
1312                         }
1313                         if (skb == NULL)
1314                                 goto error;
1315                         /*
1316                          *      Fill in the control structures
1317                          */
1318                         skb->ip_summed = csummode;
1319                         skb->csum = 0;
1320                         /* reserve for fragmentation */
1321                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1322
1323                         /*
1324                          *      Find where to start putting bytes
1325                          */
1326                         data = skb_put(skb, fraglen);
1327                         skb_set_network_header(skb, exthdrlen);
1328                         data += fragheaderlen;
1329                         skb->transport_header = (skb->network_header +
1330                                                  fragheaderlen);
1331                         if (fraggap) {
1332                                 skb->csum = skb_copy_and_csum_bits(
1333                                         skb_prev, maxfraglen,
1334                                         data + transhdrlen, fraggap, 0);
1335                                 skb_prev->csum = csum_sub(skb_prev->csum,
1336                                                           skb->csum);
1337                                 data += fraggap;
1338                                 pskb_trim_unique(skb_prev, maxfraglen);
1339                         }
1340                         copy = datalen - transhdrlen - fraggap;
1341                         if (copy < 0) {
1342                                 err = -EINVAL;
1343                                 kfree_skb(skb);
1344                                 goto error;
1345                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1346                                 err = -EFAULT;
1347                                 kfree_skb(skb);
1348                                 goto error;
1349                         }
1350
1351                         offset += copy;
1352                         length -= datalen - fraggap;
1353                         transhdrlen = 0;
1354                         exthdrlen = 0;
1355                         csummode = CHECKSUM_NONE;
1356
1357                         /*
1358                          * Put the packet on the pending queue
1359                          */
1360                         __skb_queue_tail(&sk->sk_write_queue, skb);
1361                         continue;
1362                 }
1363
1364                 if (copy > length)
1365                         copy = length;
1366
1367                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1368                         unsigned int off;
1369
1370                         off = skb->len;
1371                         if (getfrag(from, skb_put(skb, copy),
1372                                                 offset, copy, off, skb) < 0) {
1373                                 __skb_trim(skb, off);
1374                                 err = -EFAULT;
1375                                 goto error;
1376                         }
1377                 } else {
1378                         int i = skb_shinfo(skb)->nr_frags;
1379                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1380                         struct page *page = sk->sk_sndmsg_page;
1381                         int off = sk->sk_sndmsg_off;
1382                         unsigned int left;
1383
1384                         if (page && (left = PAGE_SIZE - off) > 0) {
1385                                 if (copy >= left)
1386                                         copy = left;
1387                                 if (page != frag->page) {
1388                                         if (i == MAX_SKB_FRAGS) {
1389                                                 err = -EMSGSIZE;
1390                                                 goto error;
1391                                         }
1392                                         get_page(page);
1393                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1394                                         frag = &skb_shinfo(skb)->frags[i];
1395                                 }
1396                         } else if(i < MAX_SKB_FRAGS) {
1397                                 if (copy > PAGE_SIZE)
1398                                         copy = PAGE_SIZE;
1399                                 page = alloc_pages(sk->sk_allocation, 0);
1400                                 if (page == NULL) {
1401                                         err = -ENOMEM;
1402                                         goto error;
1403                                 }
1404                                 sk->sk_sndmsg_page = page;
1405                                 sk->sk_sndmsg_off = 0;
1406
1407                                 skb_fill_page_desc(skb, i, page, 0, 0);
1408                                 frag = &skb_shinfo(skb)->frags[i];
1409                         } else {
1410                                 err = -EMSGSIZE;
1411                                 goto error;
1412                         }
1413                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1414                                 err = -EFAULT;
1415                                 goto error;
1416                         }
1417                         sk->sk_sndmsg_off += copy;
1418                         frag->size += copy;
1419                         skb->len += copy;
1420                         skb->data_len += copy;
1421                         skb->truesize += copy;
1422                         atomic_add(copy, &sk->sk_wmem_alloc);
1423                 }
1424                 offset += copy;
1425                 length -= copy;
1426         }
1427         return 0;
1428 error:
1429         inet->cork.length -= length;
1430         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1431         return err;
1432 }
1433
1434 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1435 {
1436         if (np->cork.opt) {
1437                 kfree(np->cork.opt->dst0opt);
1438                 kfree(np->cork.opt->dst1opt);
1439                 kfree(np->cork.opt->hopopt);
1440                 kfree(np->cork.opt->srcrt);
1441                 kfree(np->cork.opt);
1442                 np->cork.opt = NULL;
1443         }
1444
1445         if (inet->cork.dst) {
1446                 dst_release(inet->cork.dst);
1447                 inet->cork.dst = NULL;
1448                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1449         }
1450         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1451 }
1452
1453 int ip6_push_pending_frames(struct sock *sk)
1454 {
1455         struct sk_buff *skb, *tmp_skb;
1456         struct sk_buff **tail_skb;
1457         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1458         struct inet_sock *inet = inet_sk(sk);
1459         struct ipv6_pinfo *np = inet6_sk(sk);
1460         struct net *net = sock_net(sk);
1461         struct ipv6hdr *hdr;
1462         struct ipv6_txoptions *opt = np->cork.opt;
1463         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1464         struct flowi *fl = &inet->cork.fl;
1465         unsigned char proto = fl->proto;
1466         int err = 0;
1467
1468         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1469                 goto out;
1470         tail_skb = &(skb_shinfo(skb)->frag_list);
1471
1472         /* move skb->data to ip header from ext header */
1473         if (skb->data < skb_network_header(skb))
1474                 __skb_pull(skb, skb_network_offset(skb));
1475         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1476                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1477                 *tail_skb = tmp_skb;
1478                 tail_skb = &(tmp_skb->next);
1479                 skb->len += tmp_skb->len;
1480                 skb->data_len += tmp_skb->len;
1481                 skb->truesize += tmp_skb->truesize;
1482                 tmp_skb->destructor = NULL;
1483                 tmp_skb->sk = NULL;
1484         }
1485
1486         /* Allow local fragmentation. */
1487         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1488                 skb->local_df = 1;
1489
1490         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1491         __skb_pull(skb, skb_network_header_len(skb));
1492         if (opt && opt->opt_flen)
1493                 ipv6_push_frag_opts(skb, opt, &proto);
1494         if (opt && opt->opt_nflen)
1495                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1496
1497         skb_push(skb, sizeof(struct ipv6hdr));
1498         skb_reset_network_header(skb);
1499         hdr = ipv6_hdr(skb);
1500
1501         *(__be32*)hdr = fl->fl6_flowlabel |
1502                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1503
1504         hdr->hop_limit = np->cork.hop_limit;
1505         hdr->nexthdr = proto;
1506         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1507         ipv6_addr_copy(&hdr->daddr, final_dst);
1508
1509         skb->priority = sk->sk_priority;
1510         skb->mark = sk->sk_mark;
1511
1512         skb_dst_set(skb, dst_clone(&rt->dst));
1513         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1514         if (proto == IPPROTO_ICMPV6) {
1515                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1516
1517                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1518                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1519         }
1520
1521         err = ip6_local_out(skb);
1522         if (err) {
1523                 if (err > 0)
1524                         err = net_xmit_errno(err);
1525                 if (err)
1526                         goto error;
1527         }
1528
1529 out:
1530         ip6_cork_release(inet, np);
1531         return err;
1532 error:
1533         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1534         goto out;
1535 }
1536
1537 void ip6_flush_pending_frames(struct sock *sk)
1538 {
1539         struct sk_buff *skb;
1540
1541         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1542                 if (skb_dst(skb))
1543                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1544                                       IPSTATS_MIB_OUTDISCARDS);
1545                 kfree_skb(skb);
1546         }
1547
1548         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1549 }