net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  *      Kazunori MIYAZAWA @USAGI
  27  *                      :       add ip6_append_data and related functions
  28  *                              for datagram xmit
  29  */
  30
  31 #include <linux/errno.h>
  32 #include <linux/kernel.h>
  33 #include <linux/string.h>
  34 #include <linux/socket.h>
  35 #include <linux/net.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/if_arp.h>
  38 #include <linux/in6.h>
  39 #include <linux/tcp.h>
  40 #include <linux/route.h>
  41 #include <linux/module.h>
  42
  43 #include <linux/netfilter.h>
  44 #include <linux/netfilter_ipv6.h>
  45
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48
  49 #include <net/ipv6.h>
  50 #include <net/ndisc.h>
  51 #include <net/protocol.h>
  52 #include <net/ip6_route.h>
  53 #include <net/addrconf.h>
  54 #include <net/rawv6.h>
  55 #include <net/icmp.h>
  56 #include <net/xfrm.h>
  57 #include <net/checksum.h>
  58
  59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62 {
  63         static u32 ipv6_fragmentation_id = 1;
  64         static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66         spin_lock_bh(&ip6_id_lock);
  67         fhdr->identification = htonl(ipv6_fragmentation_id);
  68         if (++ipv6_fragmentation_id == 0)
  69                 ipv6_fragmentation_id = 1;
  70         spin_unlock_bh(&ip6_id_lock);
  71 }
  72
  73 int __ip6_local_out(struct sk_buff *skb)
  74 {
  75         int len;
  76
  77         len = skb->len - sizeof(struct ipv6hdr);
  78         if (len > IPV6_MAXPLEN)
  79                 len = 0;
  80         ipv6_hdr(skb)->payload_len = htons(len);
  81
  82         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
  83                        dst_output);
  84 }
  85
  86 int ip6_local_out(struct sk_buff *skb)
  87 {
  88         int err;
  89
  90         err = __ip6_local_out(skb);
  91         if (likely(err == 1))
  92                 err = dst_output(skb);
  93
  94         return err;
  95 }
  96 EXPORT_SYMBOL_GPL(ip6_local_out);
  97
  98 static int ip6_output_finish(struct sk_buff *skb)
  99 {
 100         struct dst_entry *dst = skb->dst;
 101
 102         if (dst->hh)
 103                 return neigh_hh_output(dst->hh, skb);
 104         else if (dst->neighbour)
 105                 return dst->neighbour->output(skb);
 106
 107         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 108         kfree_skb(skb);
 109         return -EINVAL;
 110
 111 }
 112
 113 /* dev_loopback_xmit for use with netfilter. */
 114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 115 {
 116         skb_reset_mac_header(newskb);
 117         __skb_pull(newskb, skb_network_offset(newskb));
 118         newskb->pkt_type = PACKET_LOOPBACK;
 119         newskb->ip_summed = CHECKSUM_UNNECESSARY;
 120         BUG_TRAP(newskb->dst);
 121
 122         netif_rx(newskb);
 123         return 0;
 124 }
 125
 126
 127 static int ip6_output2(struct sk_buff *skb)
 128 {
 129         struct dst_entry *dst = skb->dst;
 130         struct net_device *dev = dst->dev;
 131
 132         skb->protocol = htons(ETH_P_IPV6);
 133         skb->dev = dev;
 134
 135         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 136                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 137                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 138
 139                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 140                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 141                                         &ipv6_hdr(skb)->saddr)) {
 142                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 143
 144                         /* Do not check for IFF_ALLMULTI; multicast routing
 145                            is not supported in any case.
 146                          */
 147                         if (newskb)
 148                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
 149                                         NULL, newskb->dev,
 150                                         ip6_dev_loopback_xmit);
 151
 152                         if (ipv6_hdr(skb)->hop_limit == 0) {
 153                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 154                                 kfree_skb(skb);
 155                                 return 0;
 156                         }
 157                 }
 158
 159                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 160         }
 161
 162         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 163                        ip6_output_finish);
 164 }
 165
 166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 167 {
 168         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 169
 170         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 171                skb->dst->dev->mtu : dst_mtu(skb->dst);
 172 }
 173
 174 int ip6_output(struct sk_buff *skb)
 175 {
 176         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 177                                 dst_allfrag(skb->dst))
 178                 return ip6_fragment(skb, ip6_output2);
 179         else
 180                 return ip6_output2(skb);
 181 }
 182
 183 /*
 184  *      xmit an sk_buff (used by TCP)
 185  */
 186
 187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 188              struct ipv6_txoptions *opt, int ipfragok)
 189 {
 190         struct ipv6_pinfo *np = inet6_sk(sk);
 191         struct in6_addr *first_hop = &fl->fl6_dst;
 192         struct dst_entry *dst = skb->dst;
 193         struct ipv6hdr *hdr;
 194         u8  proto = fl->proto;
 195         int seg_len = skb->len;
 196         int hlimit, tclass;
 197         u32 mtu;
 198
 199         if (opt) {
 200                 unsigned int head_room;
 201
 202                 /* First: exthdrs may take lots of space (~8K for now)
 203                    MAX_HEADER is not enough.
 204                  */
 205                 head_room = opt->opt_nflen + opt->opt_flen;
 206                 seg_len += head_room;
 207                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 208
 209                 if (skb_headroom(skb) < head_room) {
 210                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 211                         if (skb2 == NULL) {
 212                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 213                                               IPSTATS_MIB_OUTDISCARDS);
 214                                 kfree_skb(skb);
 215                                 return -ENOBUFS;
 216                         }
 217                         kfree_skb(skb);
 218                         skb = skb2;
 219                         if (sk)
 220                                 skb_set_owner_w(skb, sk);
 221                 }
 222                 if (opt->opt_flen)
 223                         ipv6_push_frag_opts(skb, opt, &proto);
 224                 if (opt->opt_nflen)
 225                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 226         }
 227
 228         skb_push(skb, sizeof(struct ipv6hdr));
 229         skb_reset_network_header(skb);
 230         hdr = ipv6_hdr(skb);
 231
 232         /*
 233          *      Fill in the IPv6 header
 234          */
 235
 236         hlimit = -1;
 237         if (np)
 238                 hlimit = np->hop_limit;
 239         if (hlimit < 0)
 240                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 241         if (hlimit < 0)
 242                 hlimit = ipv6_get_hoplimit(dst->dev);
 243
 244         tclass = -1;
 245         if (np)
 246                 tclass = np->tclass;
 247         if (tclass < 0)
 248                 tclass = 0;
 249
 250         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 251
 252         hdr->payload_len = htons(seg_len);
 253         hdr->nexthdr = proto;
 254         hdr->hop_limit = hlimit;
 255
 256         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 257         ipv6_addr_copy(&hdr->daddr, first_hop);
 258
 259         skb->priority = sk->sk_priority;
 260         skb->mark = sk->sk_mark;
 261
 262         mtu = dst_mtu(dst);
 263         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 264                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 265                               IPSTATS_MIB_OUTREQUESTS);
 266                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 267                                 dst_output);
 268         }
 269
 270         if (net_ratelimit())
 271                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 272         skb->dev = dst->dev;
 273         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 274         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 275         kfree_skb(skb);
 276         return -EMSGSIZE;
 277 }
 278
 279 EXPORT_SYMBOL(ip6_xmit);
 280
 281 /*
 282  *      To avoid extra problems ND packets are send through this
 283  *      routine. It's code duplication but I really want to avoid
 284  *      extra checks since ipv6_build_header is used by TCP (which
 285  *      is for us performance critical)
 286  */
 287
 288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 289                struct in6_addr *saddr, struct in6_addr *daddr,
 290                int proto, int len)
 291 {
 292         struct ipv6_pinfo *np = inet6_sk(sk);
 293         struct ipv6hdr *hdr;
 294         int totlen;
 295
 296         skb->protocol = htons(ETH_P_IPV6);
 297         skb->dev = dev;
 298
 299         totlen = len + sizeof(struct ipv6hdr);
 300
 301         skb_reset_network_header(skb);
 302         skb_put(skb, sizeof(struct ipv6hdr));
 303         hdr = ipv6_hdr(skb);
 304
 305         *(__be32*)hdr = htonl(0x60000000);
 306
 307         hdr->payload_len = htons(len);
 308         hdr->nexthdr = proto;
 309         hdr->hop_limit = np->hop_limit;
 310
 311         ipv6_addr_copy(&hdr->saddr, saddr);
 312         ipv6_addr_copy(&hdr->daddr, daddr);
 313
 314         return 0;
 315 }
 316
 317 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 318 {
 319         struct ip6_ra_chain *ra;
 320         struct sock *last = NULL;
 321
 322         read_lock(&ip6_ra_lock);
 323         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 324                 struct sock *sk = ra->sk;
 325                 if (sk && ra->sel == sel &&
 326                     (!sk->sk_bound_dev_if ||
 327                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 328                         if (last) {
 329                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 330                                 if (skb2)
 331                                         rawv6_rcv(last, skb2);
 332                         }
 333                         last = sk;
 334                 }
 335         }
 336
 337         if (last) {
 338                 rawv6_rcv(last, skb);
 339                 read_unlock(&ip6_ra_lock);
 340                 return 1;
 341         }
 342         read_unlock(&ip6_ra_lock);
 343         return 0;
 344 }
 345
 346 static int ip6_forward_proxy_check(struct sk_buff *skb)
 347 {
 348         struct ipv6hdr *hdr = ipv6_hdr(skb);
 349         u8 nexthdr = hdr->nexthdr;
 350         int offset;
 351
 352         if (ipv6_ext_hdr(nexthdr)) {
 353                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 354                 if (offset < 0)
 355                         return 0;
 356         } else
 357                 offset = sizeof(struct ipv6hdr);
 358
 359         if (nexthdr == IPPROTO_ICMPV6) {
 360                 struct icmp6hdr *icmp6;
 361
 362                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 363                                          offset + 1 - skb->data)))
 364                         return 0;
 365
 366                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 367
 368                 switch (icmp6->icmp6_type) {
 369                 case NDISC_ROUTER_SOLICITATION:
 370                 case NDISC_ROUTER_ADVERTISEMENT:
 371                 case NDISC_NEIGHBOUR_SOLICITATION:
 372                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 373                 case NDISC_REDIRECT:
 374                         /* For reaction involving unicast neighbor discovery
 375                          * message destined to the proxied address, pass it to
 376                          * input function.
 377                          */
 378                         return 1;
 379                 default:
 380                         break;
 381                 }
 382         }
 383
 384         /*
 385          * The proxying router can't forward traffic sent to a link-local
 386          * address, so signal the sender and discard the packet. This
 387          * behavior is clarified by the MIPv6 specification.
 388          */
 389         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 390                 dst_link_failure(skb);
 391                 return -1;
 392         }
 393
 394         return 0;
 395 }
 396
 397 static inline int ip6_forward_finish(struct sk_buff *skb)
 398 {
 399         return dst_output(skb);
 400 }
 401
 402 int ip6_forward(struct sk_buff *skb)
 403 {
 404         struct dst_entry *dst = skb->dst;
 405         struct ipv6hdr *hdr = ipv6_hdr(skb);
 406         struct inet6_skb_parm *opt = IP6CB(skb);
 407
 408         if (ipv6_devconf.forwarding == 0)
 409                 goto error;
 410
 411         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 412                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 413                 goto drop;
 414         }
 415
 416         skb_forward_csum(skb);
 417
 418         /*
 419          *      We DO NOT make any processing on
 420          *      RA packets, pushing them to user level AS IS
 421          *      without ane WARRANTY that application will be able
 422          *      to interpret them. The reason is that we
 423          *      cannot make anything clever here.
 424          *
 425          *      We are not end-node, so that if packet contains
 426          *      AH/ESP, we cannot make anything.
 427          *      Defragmentation also would be mistake, RA packets
 428          *      cannot be fragmented, because there is no warranty
 429          *      that different fragments will go along one path. --ANK
 430          */
 431         if (opt->ra) {
 432                 u8 *ptr = skb_network_header(skb) + opt->ra;
 433                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 434                         return 0;
 435         }
 436
 437         /*
 438          *      check and decrement ttl
 439          */
 440         if (hdr->hop_limit <= 1) {
 441                 /* Force OUTPUT device used as source address */
 442                 skb->dev = dst->dev;
 443                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 444                             0, skb->dev);
 445                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 446
 447                 kfree_skb(skb);
 448                 return -ETIMEDOUT;
 449         }
 450
 451         /* XXX: idev->cnf.proxy_ndp? */
 452         if (ipv6_devconf.proxy_ndp &&
 453             pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
 454                 int proxied = ip6_forward_proxy_check(skb);
 455                 if (proxied > 0)
 456                         return ip6_input(skb);
 457                 else if (proxied < 0) {
 458                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 459                         goto drop;
 460                 }
 461         }
 462
 463         if (!xfrm6_route_forward(skb)) {
 464                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 465                 goto drop;
 466         }
 467         dst = skb->dst;
 468
 469         /* IPv6 specs say nothing about it, but it is clear that we cannot
 470            send redirects to source routed frames.
 471            We don't send redirects to frames decapsulated from IPsec.
 472          */
 473         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 474             !skb->sp) {
 475                 struct in6_addr *target = NULL;
 476                 struct rt6_info *rt;
 477                 struct neighbour *n = dst->neighbour;
 478
 479                 /*
 480                  *      incoming and outgoing devices are the same
 481                  *      send a redirect.
 482                  */
 483
 484                 rt = (struct rt6_info *) dst;
 485                 if ((rt->rt6i_flags & RTF_GATEWAY))
 486                         target = (struct in6_addr*)&n->primary_key;
 487                 else
 488                         target = &hdr->daddr;
 489
 490                 /* Limit redirects both by destination (here)
 491                    and by source (inside ndisc_send_redirect)
 492                  */
 493                 if (xrlim_allow(dst, 1*HZ))
 494                         ndisc_send_redirect(skb, n, target);
 495         } else {
 496                 int addrtype = ipv6_addr_type(&hdr->saddr);
 497
 498                 /* This check is security critical. */
 499                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 500                         goto error;
 501                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 502                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 503                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 504                         goto error;
 505                 }
 506         }
 507
 508         if (skb->len > dst_mtu(dst)) {
 509                 /* Again, force OUTPUT device used as source address */
 510                 skb->dev = dst->dev;
 511                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 512                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 513                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 514                 kfree_skb(skb);
 515                 return -EMSGSIZE;
 516         }
 517
 518         if (skb_cow(skb, dst->dev->hard_header_len)) {
 519                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 520                 goto drop;
 521         }
 522
 523         hdr = ipv6_hdr(skb);
 524
 525         /* Mangling hops number delayed to point after skb COW */
 526
 527         hdr->hop_limit--;
 528
 529         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 530         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 531                        ip6_forward_finish);
 532
 533 error:
 534         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 535 drop:
 536         kfree_skb(skb);
 537         return -EINVAL;
 538 }
 539
 540 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 541 {
 542         to->pkt_type = from->pkt_type;
 543         to->priority = from->priority;
 544         to->protocol = from->protocol;
 545         dst_release(to->dst);
 546         to->dst = dst_clone(from->dst);
 547         to->dev = from->dev;
 548         to->mark = from->mark;
 549
 550 #ifdef CONFIG_NET_SCHED
 551         to->tc_index = from->tc_index;
 552 #endif
 553         nf_copy(to, from);
 554 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 555     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 556         to->nf_trace = from->nf_trace;
 557 #endif
 558         skb_copy_secmark(to, from);
 559 }
 560
 561 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 562 {
 563         u16 offset = sizeof(struct ipv6hdr);
 564         struct ipv6_opt_hdr *exthdr =
 565                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 566         unsigned int packet_len = skb->tail - skb->network_header;
 567         int found_rhdr = 0;
 568         *nexthdr = &ipv6_hdr(skb)->nexthdr;
 569
 570         while (offset + 1 <= packet_len) {
 571
 572                 switch (**nexthdr) {
 573
 574                 case NEXTHDR_HOP:
 575                         break;
 576                 case NEXTHDR_ROUTING:
 577                         found_rhdr = 1;
 578                         break;
 579                 case NEXTHDR_DEST:
 580 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 581                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 582                                 break;
 583 #endif
 584                         if (found_rhdr)
 585                                 return offset;
 586                         break;
 587                 default :
 588                         return offset;
 589                 }
 590
 591                 offset += ipv6_optlen(exthdr);
 592                 *nexthdr = &exthdr->nexthdr;
 593                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 594                                                  offset);
 595         }
 596
 597         return offset;
 598 }
 599
 600 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 601 {
 602         struct net_device *dev;
 603         struct sk_buff *frag;
 604         struct rt6_info *rt = (struct rt6_info*)skb->dst;
 605         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 606         struct ipv6hdr *tmp_hdr;
 607         struct frag_hdr *fh;
 608         unsigned int mtu, hlen, left, len;
 609         __be32 frag_id = 0;
 610         int ptr, offset = 0, err=0;
 611         u8 *prevhdr, nexthdr = 0;
 612
 613         dev = rt->u.dst.dev;
 614         hlen = ip6_find_1stfragopt(skb, &prevhdr);
 615         nexthdr = *prevhdr;
 616
 617         mtu = ip6_skb_dst_mtu(skb);
 618
 619         /* We must not fragment if the socket is set to force MTU discovery
 620          * or if the skb it not generated by a local socket.  (This last
 621          * check should be redundant, but it's free.)
 622          */
 623         if (!skb->local_df) {
 624                 skb->dev = skb->dst->dev;
 625                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 626                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 627                 kfree_skb(skb);
 628                 return -EMSGSIZE;
 629         }
 630
 631         if (np && np->frag_size < mtu) {
 632                 if (np->frag_size)
 633                         mtu = np->frag_size;
 634         }
 635         mtu -= hlen + sizeof(struct frag_hdr);
 636
 637         if (skb_shinfo(skb)->frag_list) {
 638                 int first_len = skb_pagelen(skb);
 639                 int truesizes = 0;
 640
 641                 if (first_len - hlen > mtu ||
 642                     ((first_len - hlen) & 7) ||
 643                     skb_cloned(skb))
 644                         goto slow_path;
 645
 646                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 647                         /* Correct geometry. */
 648                         if (frag->len > mtu ||
 649                             ((frag->len & 7) && frag->next) ||
 650                             skb_headroom(frag) < hlen)
 651                             goto slow_path;
 652
 653                         /* Partially cloned skb? */
 654                         if (skb_shared(frag))
 655                                 goto slow_path;
 656
 657                         BUG_ON(frag->sk);
 658                         if (skb->sk) {
 659                                 sock_hold(skb->sk);
 660                                 frag->sk = skb->sk;
 661                                 frag->destructor = sock_wfree;
 662                                 truesizes += frag->truesize;
 663                         }
 664                 }
 665
 666                 err = 0;
 667                 offset = 0;
 668                 frag = skb_shinfo(skb)->frag_list;
 669                 skb_shinfo(skb)->frag_list = NULL;
 670                 /* BUILD HEADER */
 671
 672                 *prevhdr = NEXTHDR_FRAGMENT;
 673                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 674                 if (!tmp_hdr) {
 675                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 676                         return -ENOMEM;
 677                 }
 678
 679                 __skb_pull(skb, hlen);
 680                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 681                 __skb_push(skb, hlen);
 682                 skb_reset_network_header(skb);
 683                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
 684
 685                 ipv6_select_ident(skb, fh);
 686                 fh->nexthdr = nexthdr;
 687                 fh->reserved = 0;
 688                 fh->frag_off = htons(IP6_MF);
 689                 frag_id = fh->identification;
 690
 691                 first_len = skb_pagelen(skb);
 692                 skb->data_len = first_len - skb_headlen(skb);
 693                 skb->truesize -= truesizes;
 694                 skb->len = first_len;
 695                 ipv6_hdr(skb)->payload_len = htons(first_len -
 696                                                    sizeof(struct ipv6hdr));
 697
 698                 dst_hold(&rt->u.dst);
 699
 700                 for (;;) {
 701                         /* Prepare header of the next frame,
 702                          * before previous one went down. */
 703                         if (frag) {
 704                                 frag->ip_summed = CHECKSUM_NONE;
 705                                 skb_reset_transport_header(frag);
 706                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 707                                 __skb_push(frag, hlen);
 708                                 skb_reset_network_header(frag);
 709                                 memcpy(skb_network_header(frag), tmp_hdr,
 710                                        hlen);
 711                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
 712                                 fh->nexthdr = nexthdr;
 713                                 fh->reserved = 0;
 714                                 fh->frag_off = htons(offset);
 715                                 if (frag->next != NULL)
 716                                         fh->frag_off |= htons(IP6_MF);
 717                                 fh->identification = frag_id;
 718                                 ipv6_hdr(frag)->payload_len =
 719                                                 htons(frag->len -
 720                                                       sizeof(struct ipv6hdr));
 721                                 ip6_copy_metadata(frag, skb);
 722                         }
 723
 724                         err = output(skb);
 725                         if(!err)
 726                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 727
 728                         if (err || !frag)
 729                                 break;
 730
 731                         skb = frag;
 732                         frag = skb->next;
 733                         skb->next = NULL;
 734                 }
 735
 736                 kfree(tmp_hdr);
 737
 738                 if (err == 0) {
 739                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 740                         dst_release(&rt->u.dst);
 741                         return 0;
 742                 }
 743
 744                 while (frag) {
 745                         skb = frag->next;
 746                         kfree_skb(frag);
 747                         frag = skb;
 748                 }
 749
 750                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 751                 dst_release(&rt->u.dst);
 752                 return err;
 753         }
 754
 755 slow_path:
 756         left = skb->len - hlen;         /* Space per frame */
 757         ptr = hlen;                     /* Where to start from */
 758
 759         /*
 760          *      Fragment the datagram.
 761          */
 762
 763         *prevhdr = NEXTHDR_FRAGMENT;
 764
 765         /*
 766          *      Keep copying data until we run out.
 767          */
 768         while(left > 0) {
 769                 len = left;
 770                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 771                 if (len > mtu)
 772                         len = mtu;
 773                 /* IF: we are not sending upto and including the packet end
 774                    then align the next start on an eight byte boundary */
 775                 if (len < left) {
 776                         len &= ~7;
 777                 }
 778                 /*
 779                  *      Allocate buffer.
 780                  */
 781
 782                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 783                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 784                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 785                                       IPSTATS_MIB_FRAGFAILS);
 786                         err = -ENOMEM;
 787                         goto fail;
 788                 }
 789
 790                 /*
 791                  *      Set up data on packet
 792                  */
 793
 794                 ip6_copy_metadata(frag, skb);
 795                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 796                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 797                 skb_reset_network_header(frag);
 798                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 799                 frag->transport_header = (frag->network_header + hlen +
 800                                           sizeof(struct frag_hdr));
 801
 802                 /*
 803                  *      Charge the memory for the fragment to any owner
 804                  *      it might possess
 805                  */
 806                 if (skb->sk)
 807                         skb_set_owner_w(frag, skb->sk);
 808
 809                 /*
 810                  *      Copy the packet header into the new buffer.
 811                  */
 812                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 813
 814                 /*
 815                  *      Build fragment header.
 816                  */
 817                 fh->nexthdr = nexthdr;
 818                 fh->reserved = 0;
 819                 if (!frag_id) {
 820                         ipv6_select_ident(skb, fh);
 821                         frag_id = fh->identification;
 822                 } else
 823                         fh->identification = frag_id;
 824
 825                 /*
 826                  *      Copy a block of the IP datagram.
 827                  */
 828                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 829                         BUG();
 830                 left -= len;
 831
 832                 fh->frag_off = htons(offset);
 833                 if (left > 0)
 834                         fh->frag_off |= htons(IP6_MF);
 835                 ipv6_hdr(frag)->payload_len = htons(frag->len -
 836                                                     sizeof(struct ipv6hdr));
 837
 838                 ptr += len;
 839                 offset += len;
 840
 841                 /*
 842                  *      Put this fragment into the sending queue.
 843                  */
 844                 err = output(frag);
 845                 if (err)
 846                         goto fail;
 847
 848                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 849         }
 850         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 851                       IPSTATS_MIB_FRAGOKS);
 852         kfree_skb(skb);
 853         return err;
 854
 855 fail:
 856         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 857                       IPSTATS_MIB_FRAGFAILS);
 858         kfree_skb(skb);
 859         return err;
 860 }
 861
 862 static inline int ip6_rt_check(struct rt6key *rt_key,
 863                                struct in6_addr *fl_addr,
 864                                struct in6_addr *addr_cache)
 865 {
 866         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 867                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 868 }
 869
 870 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 871                                           struct dst_entry *dst,
 872                                           struct flowi *fl)
 873 {
 874         struct ipv6_pinfo *np = inet6_sk(sk);
 875         struct rt6_info *rt = (struct rt6_info *)dst;
 876
 877         if (!dst)
 878                 goto out;
 879
 880         /* Yes, checking route validity in not connected
 881          * case is not very simple. Take into account,
 882          * that we do not support routing by source, TOS,
 883          * and MSG_DONTROUTE            --ANK (980726)
 884          *
 885          * 1. ip6_rt_check(): If route was host route,
 886          *    check that cached destination is current.
 887          *    If it is network route, we still may
 888          *    check its validity using saved pointer
 889          *    to the last used address: daddr_cache.
 890          *    We do not want to save whole address now,
 891          *    (because main consumer of this service
 892          *    is tcp, which has not this problem),
 893          *    so that the last trick works only on connected
 894          *    sockets.
 895          * 2. oif also should be the same.
 896          */
 897         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 898 #ifdef CONFIG_IPV6_SUBTREES
 899             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 900 #endif
 901             (fl->oif && fl->oif != dst->dev->ifindex)) {
 902                 dst_release(dst);
 903                 dst = NULL;
 904         }
 905
 906 out:
 907         return dst;
 908 }
 909
 910 static int ip6_dst_lookup_tail(struct sock *sk,
 911                                struct dst_entry **dst, struct flowi *fl)
 912 {
 913         int err;
 914
 915         if (*dst == NULL)
 916                 *dst = ip6_route_output(sk, fl);
 917
 918         if ((err = (*dst)->error))
 919                 goto out_err_release;
 920
 921         if (ipv6_addr_any(&fl->fl6_src)) {
 922                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 923                 if (err)
 924                         goto out_err_release;
 925         }
 926
 927 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 928                 /*
 929                  * Here if the dst entry we've looked up
 930                  * has a neighbour entry that is in the INCOMPLETE
 931                  * state and the src address from the flow is
 932                  * marked as OPTIMISTIC, we release the found
 933                  * dst entry and replace it instead with the
 934                  * dst entry of the nexthop router
 935                  */
 936                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 937                         struct inet6_ifaddr *ifp;
 938                         struct flowi fl_gw;
 939                         int redirect;
 940
 941                         ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
 942                                               (*dst)->dev, 1);
 943
 944                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 945                         if (ifp)
 946                                 in6_ifa_put(ifp);
 947
 948                         if (redirect) {
 949                                 /*
 950                                  * We need to get the dst entry for the
 951                                  * default router instead
 952                                  */
 953                                 dst_release(*dst);
 954                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
 955                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 956                                 *dst = ip6_route_output(sk, &fl_gw);
 957                                 if ((err = (*dst)->error))
 958                                         goto out_err_release;
 959                         }
 960                 }
 961 #endif
 962
 963         return 0;
 964
 965 out_err_release:
 966         if (err == -ENETUNREACH)
 967                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 968         dst_release(*dst);
 969         *dst = NULL;
 970         return err;
 971 }
 972
 973 /**
 974  *      ip6_dst_lookup - perform route lookup on flow
 975  *      @sk: socket which provides route info
 976  *      @dst: pointer to dst_entry * for result
 977  *      @fl: flow to lookup
 978  *
 979  *      This function performs a route lookup on the given flow.
 980  *
 981  *      It returns zero on success, or a standard errno code on error.
 982  */
 983 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 984 {
 985         *dst = NULL;
 986         return ip6_dst_lookup_tail(sk, dst, fl);
 987 }
 988 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 989
 990 /**
 991  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 992  *      @sk: socket which provides the dst cache and route info
 993  *      @dst: pointer to dst_entry * for result
 994  *      @fl: flow to lookup
 995  *
 996  *      This function performs a route lookup on the given flow with the
 997  *      possibility of using the cached route in the socket if it is valid.
 998  *      It will take the socket dst lock when operating on the dst cache.
 999  *      As a result, this function can only be used in process context.
1000  *
1001  *      It returns zero on success, or a standard errno code on error.
1002  */
1003 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1004 {
1005         *dst = NULL;
1006         if (sk) {
1007                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1008                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1009         }
1010
1011         return ip6_dst_lookup_tail(sk, dst, fl);
1012 }
1013 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1014
1015 static inline int ip6_ufo_append_data(struct sock *sk,
1016                         int getfrag(void *from, char *to, int offset, int len,
1017                         int odd, struct sk_buff *skb),
1018                         void *from, int length, int hh_len, int fragheaderlen,
1019                         int transhdrlen, int mtu,unsigned int flags)
1020
1021 {
1022         struct sk_buff *skb;
1023         int err;
1024
1025         /* There is support for UDP large send offload by network
1026          * device, so create one single skb packet containing complete
1027          * udp datagram
1028          */
1029         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1030                 skb = sock_alloc_send_skb(sk,
1031                         hh_len + fragheaderlen + transhdrlen + 20,
1032                         (flags & MSG_DONTWAIT), &err);
1033                 if (skb == NULL)
1034                         return -ENOMEM;
1035
1036                 /* reserve space for Hardware header */
1037                 skb_reserve(skb, hh_len);
1038
1039                 /* create space for UDP/IP header */
1040                 skb_put(skb,fragheaderlen + transhdrlen);
1041
1042                 /* initialize network header pointer */
1043                 skb_reset_network_header(skb);
1044
1045                 /* initialize protocol header pointer */
1046                 skb->transport_header = skb->network_header + fragheaderlen;
1047
1048                 skb->ip_summed = CHECKSUM_PARTIAL;
1049                 skb->csum = 0;
1050                 sk->sk_sndmsg_off = 0;
1051         }
1052
1053         err = skb_append_datato_frags(sk,skb, getfrag, from,
1054                                       (length - transhdrlen));
1055         if (!err) {
1056                 struct frag_hdr fhdr;
1057
1058                 /* specify the length of each IP datagram fragment*/
1059                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1060                                             sizeof(struct frag_hdr);
1061                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1062                 ipv6_select_ident(skb, &fhdr);
1063                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1064                 __skb_queue_tail(&sk->sk_write_queue, skb);
1065
1066                 return 0;
1067         }
1068         /* There is not enough support do UPD LSO,
1069          * so follow normal path
1070          */
1071         kfree_skb(skb);
1072
1073         return err;
1074 }
1075
1076 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1077         int offset, int len, int odd, struct sk_buff *skb),
1078         void *from, int length, int transhdrlen,
1079         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1080         struct rt6_info *rt, unsigned int flags)
1081 {
1082         struct inet_sock *inet = inet_sk(sk);
1083         struct ipv6_pinfo *np = inet6_sk(sk);
1084         struct sk_buff *skb;
1085         unsigned int maxfraglen, fragheaderlen;
1086         int exthdrlen;
1087         int hh_len;
1088         int mtu;
1089         int copy;
1090         int err;
1091         int offset = 0;
1092         int csummode = CHECKSUM_NONE;
1093
1094         if (flags&MSG_PROBE)
1095                 return 0;
1096         if (skb_queue_empty(&sk->sk_write_queue)) {
1097                 /*
1098                  * setup for corking
1099                  */
1100                 if (opt) {
1101                         if (np->cork.opt == NULL) {
1102                                 np->cork.opt = kmalloc(opt->tot_len,
1103                                                        sk->sk_allocation);
1104                                 if (unlikely(np->cork.opt == NULL))
1105                                         return -ENOBUFS;
1106                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1107                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1108                                 return -EINVAL;
1109                         }
1110                         memcpy(np->cork.opt, opt, opt->tot_len);
1111                         inet->cork.flags |= IPCORK_OPT;
1112                         /* need source address above miyazawa*/
1113                 }
1114                 dst_hold(&rt->u.dst);
1115                 np->cork.rt = rt;
1116                 inet->cork.fl = *fl;
1117                 np->cork.hop_limit = hlimit;
1118                 np->cork.tclass = tclass;
1119                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1120                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1121                 if (np->frag_size < mtu) {
1122                         if (np->frag_size)
1123                                 mtu = np->frag_size;
1124                 }
1125                 inet->cork.fragsize = mtu;
1126                 if (dst_allfrag(rt->u.dst.path))
1127                         inet->cork.flags |= IPCORK_ALLFRAG;
1128                 inet->cork.length = 0;
1129                 sk->sk_sndmsg_page = NULL;
1130                 sk->sk_sndmsg_off = 0;
1131                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1132                             rt->rt6i_nfheader_len;
1133                 length += exthdrlen;
1134                 transhdrlen += exthdrlen;
1135         } else {
1136                 rt = np->cork.rt;
1137                 fl = &inet->cork.fl;
1138                 if (inet->cork.flags & IPCORK_OPT)
1139                         opt = np->cork.opt;
1140                 transhdrlen = 0;
1141                 exthdrlen = 0;
1142                 mtu = inet->cork.fragsize;
1143         }
1144
1145         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1146
1147         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1148                         (opt ? opt->opt_nflen : 0);
1149         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1150
1151         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1152                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1153                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1154                         return -EMSGSIZE;
1155                 }
1156         }
1157
1158         /*
1159          * Let's try using as much space as possible.
1160          * Use MTU if total length of the message fits into the MTU.
1161          * Otherwise, we need to reserve fragment header and
1162          * fragment alignment (= 8-15 octects, in total).
1163          *
1164          * Note that we may need to "move" the data from the tail of
1165          * of the buffer to the new fragment when we split
1166          * the message.
1167          *
1168          * FIXME: It may be fragmented into multiple chunks
1169          *        at once if non-fragmentable extension headers
1170          *        are too large.
1171          * --yoshfuji
1172          */
1173
1174         inet->cork.length += length;
1175         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1176             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1177
1178                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1179                                           fragheaderlen, transhdrlen, mtu,
1180                                           flags);
1181                 if (err)
1182                         goto error;
1183                 return 0;
1184         }
1185
1186         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1187                 goto alloc_new_skb;
1188
1189         while (length > 0) {
1190                 /* Check if the remaining data fits into current packet. */
1191                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1192                 if (copy < length)
1193                         copy = maxfraglen - skb->len;
1194
1195                 if (copy <= 0) {
1196                         char *data;
1197                         unsigned int datalen;
1198                         unsigned int fraglen;
1199                         unsigned int fraggap;
1200                         unsigned int alloclen;
1201                         struct sk_buff *skb_prev;
1202 alloc_new_skb:
1203                         skb_prev = skb;
1204
1205                         /* There's no room in the current skb */
1206                         if (skb_prev)
1207                                 fraggap = skb_prev->len - maxfraglen;
1208                         else
1209                                 fraggap = 0;
1210
1211                         /*
1212                          * If remaining data exceeds the mtu,
1213                          * we know we need more fragment(s).
1214                          */
1215                         datalen = length + fraggap;
1216                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1217                                 datalen = maxfraglen - fragheaderlen;
1218
1219                         fraglen = datalen + fragheaderlen;
1220                         if ((flags & MSG_MORE) &&
1221                             !(rt->u.dst.dev->features&NETIF_F_SG))
1222                                 alloclen = mtu;
1223                         else
1224                                 alloclen = datalen + fragheaderlen;
1225
1226                         /*
1227                          * The last fragment gets additional space at tail.
1228                          * Note: we overallocate on fragments with MSG_MODE
1229                          * because we have no idea if we're the last one.
1230                          */
1231                         if (datalen == length + fraggap)
1232                                 alloclen += rt->u.dst.trailer_len;
1233
1234                         /*
1235                          * We just reserve space for fragment header.
1236                          * Note: this may be overallocation if the message
1237                          * (without MSG_MORE) fits into the MTU.
1238                          */
1239                         alloclen += sizeof(struct frag_hdr);
1240
1241                         if (transhdrlen) {
1242                                 skb = sock_alloc_send_skb(sk,
1243                                                 alloclen + hh_len,
1244                                                 (flags & MSG_DONTWAIT), &err);
1245                         } else {
1246                                 skb = NULL;
1247                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1248                                     2 * sk->sk_sndbuf)
1249                                         skb = sock_wmalloc(sk,
1250                                                            alloclen + hh_len, 1,
1251                                                            sk->sk_allocation);
1252                                 if (unlikely(skb == NULL))
1253                                         err = -ENOBUFS;
1254                         }
1255                         if (skb == NULL)
1256                                 goto error;
1257                         /*
1258                          *      Fill in the control structures
1259                          */
1260                         skb->ip_summed = csummode;
1261                         skb->csum = 0;
1262                         /* reserve for fragmentation */
1263                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1264
1265                         /*
1266                          *      Find where to start putting bytes
1267                          */
1268                         data = skb_put(skb, fraglen);
1269                         skb_set_network_header(skb, exthdrlen);
1270                         data += fragheaderlen;
1271                         skb->transport_header = (skb->network_header +
1272                                                  fragheaderlen);
1273                         if (fraggap) {
1274                                 skb->csum = skb_copy_and_csum_bits(
1275                                         skb_prev, maxfraglen,
1276                                         data + transhdrlen, fraggap, 0);
1277                                 skb_prev->csum = csum_sub(skb_prev->csum,
1278                                                           skb->csum);
1279                                 data += fraggap;
1280                                 pskb_trim_unique(skb_prev, maxfraglen);
1281                         }
1282                         copy = datalen - transhdrlen - fraggap;
1283                         if (copy < 0) {
1284                                 err = -EINVAL;
1285                                 kfree_skb(skb);
1286                                 goto error;
1287                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1288                                 err = -EFAULT;
1289                                 kfree_skb(skb);
1290                                 goto error;
1291                         }
1292
1293                         offset += copy;
1294                         length -= datalen - fraggap;
1295                         transhdrlen = 0;
1296                         exthdrlen = 0;
1297                         csummode = CHECKSUM_NONE;
1298
1299                         /*
1300                          * Put the packet on the pending queue
1301                          */
1302                         __skb_queue_tail(&sk->sk_write_queue, skb);
1303                         continue;
1304                 }
1305
1306                 if (copy > length)
1307                         copy = length;
1308
1309                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1310                         unsigned int off;
1311
1312                         off = skb->len;
1313                         if (getfrag(from, skb_put(skb, copy),
1314                                                 offset, copy, off, skb) < 0) {
1315                                 __skb_trim(skb, off);
1316                                 err = -EFAULT;
1317                                 goto error;
1318                         }
1319                 } else {
1320                         int i = skb_shinfo(skb)->nr_frags;
1321                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1322                         struct page *page = sk->sk_sndmsg_page;
1323                         int off = sk->sk_sndmsg_off;
1324                         unsigned int left;
1325
1326                         if (page && (left = PAGE_SIZE - off) > 0) {
1327                                 if (copy >= left)
1328                                         copy = left;
1329                                 if (page != frag->page) {
1330                                         if (i == MAX_SKB_FRAGS) {
1331                                                 err = -EMSGSIZE;
1332                                                 goto error;
1333                                         }
1334                                         get_page(page);
1335                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1336                                         frag = &skb_shinfo(skb)->frags[i];
1337                                 }
1338                         } else if(i < MAX_SKB_FRAGS) {
1339                                 if (copy > PAGE_SIZE)
1340                                         copy = PAGE_SIZE;
1341                                 page = alloc_pages(sk->sk_allocation, 0);
1342                                 if (page == NULL) {
1343                                         err = -ENOMEM;
1344                                         goto error;
1345                                 }
1346                                 sk->sk_sndmsg_page = page;
1347                                 sk->sk_sndmsg_off = 0;
1348
1349                                 skb_fill_page_desc(skb, i, page, 0, 0);
1350                                 frag = &skb_shinfo(skb)->frags[i];
1351                         } else {
1352                                 err = -EMSGSIZE;
1353                                 goto error;
1354                         }
1355                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1356                                 err = -EFAULT;
1357                                 goto error;
1358                         }
1359                         sk->sk_sndmsg_off += copy;
1360                         frag->size += copy;
1361                         skb->len += copy;
1362                         skb->data_len += copy;
1363                         skb->truesize += copy;
1364                         atomic_add(copy, &sk->sk_wmem_alloc);
1365                 }
1366                 offset += copy;
1367                 length -= copy;
1368         }
1369         return 0;
1370 error:
1371         inet->cork.length -= length;
1372         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1373         return err;
1374 }
1375
1376 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1377 {
1378         inet->cork.flags &= ~IPCORK_OPT;
1379         kfree(np->cork.opt);
1380         np->cork.opt = NULL;
1381         if (np->cork.rt) {
1382                 dst_release(&np->cork.rt->u.dst);
1383                 np->cork.rt = NULL;
1384                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1385         }
1386         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1387 }
1388
1389 int ip6_push_pending_frames(struct sock *sk)
1390 {
1391         struct sk_buff *skb, *tmp_skb;
1392         struct sk_buff **tail_skb;
1393         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1394         struct inet_sock *inet = inet_sk(sk);
1395         struct ipv6_pinfo *np = inet6_sk(sk);
1396         struct ipv6hdr *hdr;
1397         struct ipv6_txoptions *opt = np->cork.opt;
1398         struct rt6_info *rt = np->cork.rt;
1399         struct flowi *fl = &inet->cork.fl;
1400         unsigned char proto = fl->proto;
1401         int err = 0;
1402
1403         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1404                 goto out;
1405         tail_skb = &(skb_shinfo(skb)->frag_list);
1406
1407         /* move skb->data to ip header from ext header */
1408         if (skb->data < skb_network_header(skb))
1409                 __skb_pull(skb, skb_network_offset(skb));
1410         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1411                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1412                 *tail_skb = tmp_skb;
1413                 tail_skb = &(tmp_skb->next);
1414                 skb->len += tmp_skb->len;
1415                 skb->data_len += tmp_skb->len;
1416                 skb->truesize += tmp_skb->truesize;
1417                 __sock_put(tmp_skb->sk);
1418                 tmp_skb->destructor = NULL;
1419                 tmp_skb->sk = NULL;
1420         }
1421
1422         /* Allow local fragmentation. */
1423         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1424                 skb->local_df = 1;
1425
1426         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1427         __skb_pull(skb, skb_network_header_len(skb));
1428         if (opt && opt->opt_flen)
1429                 ipv6_push_frag_opts(skb, opt, &proto);
1430         if (opt && opt->opt_nflen)
1431                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1432
1433         skb_push(skb, sizeof(struct ipv6hdr));
1434         skb_reset_network_header(skb);
1435         hdr = ipv6_hdr(skb);
1436
1437         *(__be32*)hdr = fl->fl6_flowlabel |
1438                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1439
1440         hdr->hop_limit = np->cork.hop_limit;
1441         hdr->nexthdr = proto;
1442         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1443         ipv6_addr_copy(&hdr->daddr, final_dst);
1444
1445         skb->priority = sk->sk_priority;
1446         skb->mark = sk->sk_mark;
1447
1448         skb->dst = dst_clone(&rt->u.dst);
1449         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1450         if (proto == IPPROTO_ICMPV6) {
1451                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1452
1453                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1454                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1455         }
1456
1457         err = ip6_local_out(skb);
1458         if (err) {
1459                 if (err > 0)
1460                         err = np->recverr ? net_xmit_errno(err) : 0;
1461                 if (err)
1462                         goto error;
1463         }
1464
1465 out:
1466         ip6_cork_release(inet, np);
1467         return err;
1468 error:
1469         goto out;
1470 }
1471
1472 void ip6_flush_pending_frames(struct sock *sk)
1473 {
1474         struct sk_buff *skb;
1475
1476         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1477                 if (skb->dst)
1478                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1479                                       IPSTATS_MIB_OUTDISCARDS);
1480                 kfree_skb(skb);
1481         }
1482
1483         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1484 }