]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv4/ip_tunnel.c
ip_tunnel: Initialize the fallback device properly
[karo-tx-linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238
239         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
240                 remote = parms->iph.daddr;
241         else
242                 remote = 0;
243
244         h = ip_tunnel_hash(parms->i_key, remote);
245         return &itn->tunnels[h];
246 }
247
248 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249 {
250         struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252         hlist_add_head_rcu(&t->hash_node, head);
253 }
254
255 static void ip_tunnel_del(struct ip_tunnel *t)
256 {
257         hlist_del_init_rcu(&t->hash_node);
258 }
259
260 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261                                         struct ip_tunnel_parm *parms,
262                                         int type)
263 {
264         __be32 remote = parms->iph.daddr;
265         __be32 local = parms->iph.saddr;
266         __be32 key = parms->i_key;
267         int link = parms->link;
268         struct ip_tunnel *t = NULL;
269         struct hlist_head *head = ip_bucket(itn, parms);
270
271         hlist_for_each_entry_rcu(t, head, hash_node) {
272                 if (local == t->parms.iph.saddr &&
273                     remote == t->parms.iph.daddr &&
274                     key == t->parms.i_key &&
275                     link == t->parms.link &&
276                     type == t->dev->type)
277                         break;
278         }
279         return t;
280 }
281
282 static struct net_device *__ip_tunnel_create(struct net *net,
283                                              const struct rtnl_link_ops *ops,
284                                              struct ip_tunnel_parm *parms)
285 {
286         int err;
287         struct ip_tunnel *tunnel;
288         struct net_device *dev;
289         char name[IFNAMSIZ];
290
291         if (parms->name[0])
292                 strlcpy(name, parms->name, IFNAMSIZ);
293         else {
294                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
295                         err = -E2BIG;
296                         goto failed;
297                 }
298                 strlcpy(name, ops->kind, IFNAMSIZ);
299                 strncat(name, "%d", 2);
300         }
301
302         ASSERT_RTNL();
303         dev = alloc_netdev(ops->priv_size, name, ops->setup);
304         if (!dev) {
305                 err = -ENOMEM;
306                 goto failed;
307         }
308         dev_net_set(dev, net);
309
310         dev->rtnl_link_ops = ops;
311
312         tunnel = netdev_priv(dev);
313         tunnel->parms = *parms;
314         tunnel->net = net;
315
316         err = register_netdevice(dev);
317         if (err)
318                 goto failed_free;
319
320         return dev;
321
322 failed_free:
323         free_netdev(dev);
324 failed:
325         return ERR_PTR(err);
326 }
327
328 static inline void init_tunnel_flow(struct flowi4 *fl4,
329                                     int proto,
330                                     __be32 daddr, __be32 saddr,
331                                     __be32 key, __u8 tos, int oif)
332 {
333         memset(fl4, 0, sizeof(*fl4));
334         fl4->flowi4_oif = oif;
335         fl4->daddr = daddr;
336         fl4->saddr = saddr;
337         fl4->flowi4_tos = tos;
338         fl4->flowi4_proto = proto;
339         fl4->fl4_gre_key = key;
340 }
341
342 static int ip_tunnel_bind_dev(struct net_device *dev)
343 {
344         struct net_device *tdev = NULL;
345         struct ip_tunnel *tunnel = netdev_priv(dev);
346         const struct iphdr *iph;
347         int hlen = LL_MAX_HEADER;
348         int mtu = ETH_DATA_LEN;
349         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
350
351         iph = &tunnel->parms.iph;
352
353         /* Guess output device to choose reasonable mtu and needed_headroom */
354         if (iph->daddr) {
355                 struct flowi4 fl4;
356                 struct rtable *rt;
357
358                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
359                                  iph->saddr, tunnel->parms.o_key,
360                                  RT_TOS(iph->tos), tunnel->parms.link);
361                 rt = ip_route_output_key(tunnel->net, &fl4);
362
363                 if (!IS_ERR(rt)) {
364                         tdev = rt->dst.dev;
365                         tunnel_dst_set(tunnel, &rt->dst);
366                         ip_rt_put(rt);
367                 }
368                 if (dev->type != ARPHRD_ETHER)
369                         dev->flags |= IFF_POINTOPOINT;
370         }
371
372         if (!tdev && tunnel->parms.link)
373                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
374
375         if (tdev) {
376                 hlen = tdev->hard_header_len + tdev->needed_headroom;
377                 mtu = tdev->mtu;
378         }
379         dev->iflink = tunnel->parms.link;
380
381         dev->needed_headroom = t_hlen + hlen;
382         mtu -= (dev->hard_header_len + t_hlen);
383
384         if (mtu < 68)
385                 mtu = 68;
386
387         return mtu;
388 }
389
390 static struct ip_tunnel *ip_tunnel_create(struct net *net,
391                                           struct ip_tunnel_net *itn,
392                                           struct ip_tunnel_parm *parms)
393 {
394         struct ip_tunnel *nt, *fbt;
395         struct net_device *dev;
396
397         BUG_ON(!itn->fb_tunnel_dev);
398         fbt = netdev_priv(itn->fb_tunnel_dev);
399         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400         if (IS_ERR(dev))
401                 return NULL;
402
403         dev->mtu = ip_tunnel_bind_dev(dev);
404
405         nt = netdev_priv(dev);
406         ip_tunnel_add(itn, nt);
407         return nt;
408 }
409
410 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
412 {
413         struct pcpu_sw_netstats *tstats;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417 #ifdef CONFIG_NET_IPGRE_BROADCAST
418         if (ipv4_is_multicast(iph->daddr)) {
419                 tunnel->dev->stats.multicast++;
420                 skb->pkt_type = PACKET_BROADCAST;
421         }
422 #endif
423
424         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
425              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
426                 tunnel->dev->stats.rx_crc_errors++;
427                 tunnel->dev->stats.rx_errors++;
428                 goto drop;
429         }
430
431         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
432                 if (!(tpi->flags&TUNNEL_SEQ) ||
433                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
434                         tunnel->dev->stats.rx_fifo_errors++;
435                         tunnel->dev->stats.rx_errors++;
436                         goto drop;
437                 }
438                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
439         }
440
441         skb_reset_network_header(skb);
442
443         err = IP_ECN_decapsulate(iph, skb);
444         if (unlikely(err)) {
445                 if (log_ecn_error)
446                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
447                                         &iph->saddr, iph->tos);
448                 if (err > 1) {
449                         ++tunnel->dev->stats.rx_frame_errors;
450                         ++tunnel->dev->stats.rx_errors;
451                         goto drop;
452                 }
453         }
454
455         tstats = this_cpu_ptr(tunnel->dev->tstats);
456         u64_stats_update_begin(&tstats->syncp);
457         tstats->rx_packets++;
458         tstats->rx_bytes += skb->len;
459         u64_stats_update_end(&tstats->syncp);
460
461         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
462
463         if (tunnel->dev->type == ARPHRD_ETHER) {
464                 skb->protocol = eth_type_trans(skb, tunnel->dev);
465                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
466         } else {
467                 skb->dev = tunnel->dev;
468         }
469
470         gro_cells_receive(&tunnel->gro_cells, skb);
471         return 0;
472
473 drop:
474         kfree_skb(skb);
475         return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
478
479 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
480                             struct rtable *rt, __be16 df)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
484         int mtu;
485
486         if (df)
487                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
488                                         - sizeof(struct iphdr) - tunnel->hlen;
489         else
490                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
491
492         if (skb_dst(skb))
493                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
494
495         if (skb->protocol == htons(ETH_P_IP)) {
496                 if (!skb_is_gso(skb) &&
497                     (df & htons(IP_DF)) && mtu < pkt_size) {
498                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
499                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
500                         return -E2BIG;
501                 }
502         }
503 #if IS_ENABLED(CONFIG_IPV6)
504         else if (skb->protocol == htons(ETH_P_IPV6)) {
505                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
506
507                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
508                            mtu >= IPV6_MIN_MTU) {
509                         if ((tunnel->parms.iph.daddr &&
510                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
511                             rt6->rt6i_dst.plen == 128) {
512                                 rt6->rt6i_flags |= RTF_MODIFIED;
513                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
514                         }
515                 }
516
517                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
518                                         mtu < pkt_size) {
519                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520                         return -E2BIG;
521                 }
522         }
523 #endif
524         return 0;
525 }
526
527 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
528                     const struct iphdr *tnl_params, const u8 protocol)
529 {
530         struct ip_tunnel *tunnel = netdev_priv(dev);
531         const struct iphdr *inner_iph;
532         struct flowi4 fl4;
533         u8     tos, ttl;
534         __be16 df;
535         struct rtable *rt;              /* Route to the other host */
536         unsigned int max_headroom;      /* The extra header space needed */
537         __be32 dst;
538         int err;
539         bool connected;
540
541         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
542         connected = (tunnel->parms.iph.daddr != 0);
543
544         dst = tnl_params->daddr;
545         if (dst == 0) {
546                 /* NBMA tunnel */
547
548                 if (skb_dst(skb) == NULL) {
549                         dev->stats.tx_fifo_errors++;
550                         goto tx_error;
551                 }
552
553                 if (skb->protocol == htons(ETH_P_IP)) {
554                         rt = skb_rtable(skb);
555                         dst = rt_nexthop(rt, inner_iph->daddr);
556                 }
557 #if IS_ENABLED(CONFIG_IPV6)
558                 else if (skb->protocol == htons(ETH_P_IPV6)) {
559                         const struct in6_addr *addr6;
560                         struct neighbour *neigh;
561                         bool do_tx_error_icmp;
562                         int addr_type;
563
564                         neigh = dst_neigh_lookup(skb_dst(skb),
565                                                  &ipv6_hdr(skb)->daddr);
566                         if (neigh == NULL)
567                                 goto tx_error;
568
569                         addr6 = (const struct in6_addr *)&neigh->primary_key;
570                         addr_type = ipv6_addr_type(addr6);
571
572                         if (addr_type == IPV6_ADDR_ANY) {
573                                 addr6 = &ipv6_hdr(skb)->daddr;
574                                 addr_type = ipv6_addr_type(addr6);
575                         }
576
577                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
578                                 do_tx_error_icmp = true;
579                         else {
580                                 do_tx_error_icmp = false;
581                                 dst = addr6->s6_addr32[3];
582                         }
583                         neigh_release(neigh);
584                         if (do_tx_error_icmp)
585                                 goto tx_error_icmp;
586                 }
587 #endif
588                 else
589                         goto tx_error;
590
591                 connected = false;
592         }
593
594         tos = tnl_params->tos;
595         if (tos & 0x1) {
596                 tos &= ~0x1;
597                 if (skb->protocol == htons(ETH_P_IP)) {
598                         tos = inner_iph->tos;
599                         connected = false;
600                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
601                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
602                         connected = false;
603                 }
604         }
605
606         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
607                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
608
609         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
610
611         if (!rt) {
612                 rt = ip_route_output_key(tunnel->net, &fl4);
613
614                 if (IS_ERR(rt)) {
615                         dev->stats.tx_carrier_errors++;
616                         goto tx_error;
617                 }
618                 if (connected)
619                         tunnel_dst_set(tunnel, &rt->dst);
620         }
621
622         if (rt->dst.dev == dev) {
623                 ip_rt_put(rt);
624                 dev->stats.collisions++;
625                 goto tx_error;
626         }
627
628         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
629                 ip_rt_put(rt);
630                 goto tx_error;
631         }
632
633         if (tunnel->err_count > 0) {
634                 if (time_before(jiffies,
635                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
636                         tunnel->err_count--;
637
638                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
639                         dst_link_failure(skb);
640                 } else
641                         tunnel->err_count = 0;
642         }
643
644         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
645         ttl = tnl_params->ttl;
646         if (ttl == 0) {
647                 if (skb->protocol == htons(ETH_P_IP))
648                         ttl = inner_iph->ttl;
649 #if IS_ENABLED(CONFIG_IPV6)
650                 else if (skb->protocol == htons(ETH_P_IPV6))
651                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
652 #endif
653                 else
654                         ttl = ip4_dst_hoplimit(&rt->dst);
655         }
656
657         df = tnl_params->frag_off;
658         if (skb->protocol == htons(ETH_P_IP))
659                 df |= (inner_iph->frag_off&htons(IP_DF));
660
661         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
662                         + rt->dst.header_len;
663         if (max_headroom > dev->needed_headroom)
664                 dev->needed_headroom = max_headroom;
665
666         if (skb_cow_head(skb, dev->needed_headroom)) {
667                 dev->stats.tx_dropped++;
668                 kfree_skb(skb);
669                 return;
670         }
671
672         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
673                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
674         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
675
676         return;
677
678 #if IS_ENABLED(CONFIG_IPV6)
679 tx_error_icmp:
680         dst_link_failure(skb);
681 #endif
682 tx_error:
683         dev->stats.tx_errors++;
684         kfree_skb(skb);
685 }
686 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
687
688 static void ip_tunnel_update(struct ip_tunnel_net *itn,
689                              struct ip_tunnel *t,
690                              struct net_device *dev,
691                              struct ip_tunnel_parm *p,
692                              bool set_mtu)
693 {
694         ip_tunnel_del(t);
695         t->parms.iph.saddr = p->iph.saddr;
696         t->parms.iph.daddr = p->iph.daddr;
697         t->parms.i_key = p->i_key;
698         t->parms.o_key = p->o_key;
699         if (dev->type != ARPHRD_ETHER) {
700                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
701                 memcpy(dev->broadcast, &p->iph.daddr, 4);
702         }
703         ip_tunnel_add(itn, t);
704
705         t->parms.iph.ttl = p->iph.ttl;
706         t->parms.iph.tos = p->iph.tos;
707         t->parms.iph.frag_off = p->iph.frag_off;
708
709         if (t->parms.link != p->link) {
710                 int mtu;
711
712                 t->parms.link = p->link;
713                 mtu = ip_tunnel_bind_dev(dev);
714                 if (set_mtu)
715                         dev->mtu = mtu;
716         }
717         ip_tunnel_dst_reset_all(t);
718         netdev_state_change(dev);
719 }
720
721 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
722 {
723         int err = 0;
724         struct ip_tunnel *t;
725         struct net *net = dev_net(dev);
726         struct ip_tunnel *tunnel = netdev_priv(dev);
727         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
728
729         BUG_ON(!itn->fb_tunnel_dev);
730         switch (cmd) {
731         case SIOCGETTUNNEL:
732                 t = NULL;
733                 if (dev == itn->fb_tunnel_dev)
734                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
735                 if (t == NULL)
736                         t = netdev_priv(dev);
737                 memcpy(p, &t->parms, sizeof(*p));
738                 break;
739
740         case SIOCADDTUNNEL:
741         case SIOCCHGTUNNEL:
742                 err = -EPERM;
743                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
744                         goto done;
745                 if (p->iph.ttl)
746                         p->iph.frag_off |= htons(IP_DF);
747                 if (!(p->i_flags&TUNNEL_KEY))
748                         p->i_key = 0;
749                 if (!(p->o_flags&TUNNEL_KEY))
750                         p->o_key = 0;
751
752                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
753
754                 if (!t && (cmd == SIOCADDTUNNEL))
755                         t = ip_tunnel_create(net, itn, p);
756
757                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
758                         if (t != NULL) {
759                                 if (t->dev != dev) {
760                                         err = -EEXIST;
761                                         break;
762                                 }
763                         } else {
764                                 unsigned int nflags = 0;
765
766                                 if (ipv4_is_multicast(p->iph.daddr))
767                                         nflags = IFF_BROADCAST;
768                                 else if (p->iph.daddr)
769                                         nflags = IFF_POINTOPOINT;
770
771                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
772                                         err = -EINVAL;
773                                         break;
774                                 }
775
776                                 t = netdev_priv(dev);
777                         }
778                 }
779
780                 if (t) {
781                         err = 0;
782                         ip_tunnel_update(itn, t, dev, p, true);
783                 } else
784                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
785                 break;
786
787         case SIOCDELTUNNEL:
788                 err = -EPERM;
789                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
790                         goto done;
791
792                 if (dev == itn->fb_tunnel_dev) {
793                         err = -ENOENT;
794                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
795                         if (t == NULL)
796                                 goto done;
797                         err = -EPERM;
798                         if (t == netdev_priv(itn->fb_tunnel_dev))
799                                 goto done;
800                         dev = t->dev;
801                 }
802                 unregister_netdevice(dev);
803                 err = 0;
804                 break;
805
806         default:
807                 err = -EINVAL;
808         }
809
810 done:
811         return err;
812 }
813 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
814
815 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
816 {
817         struct ip_tunnel *tunnel = netdev_priv(dev);
818         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
819
820         if (new_mtu < 68 ||
821             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
822                 return -EINVAL;
823         dev->mtu = new_mtu;
824         return 0;
825 }
826 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
827
828 static void ip_tunnel_dev_free(struct net_device *dev)
829 {
830         struct ip_tunnel *tunnel = netdev_priv(dev);
831
832         gro_cells_destroy(&tunnel->gro_cells);
833         free_percpu(tunnel->dst_cache);
834         free_percpu(dev->tstats);
835         free_netdev(dev);
836 }
837
838 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
839 {
840         struct ip_tunnel *tunnel = netdev_priv(dev);
841         struct ip_tunnel_net *itn;
842
843         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
844
845         if (itn->fb_tunnel_dev != dev) {
846                 ip_tunnel_del(netdev_priv(dev));
847                 unregister_netdevice_queue(dev, head);
848         }
849 }
850 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
851
852 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
853                                   struct rtnl_link_ops *ops, char *devname)
854 {
855         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
856         struct ip_tunnel_parm parms;
857         unsigned int i;
858
859         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
860                 INIT_HLIST_HEAD(&itn->tunnels[i]);
861
862         if (!ops) {
863                 itn->fb_tunnel_dev = NULL;
864                 return 0;
865         }
866
867         memset(&parms, 0, sizeof(parms));
868         if (devname)
869                 strlcpy(parms.name, devname, IFNAMSIZ);
870
871         rtnl_lock();
872         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
873         /* FB netdevice is special: we have one, and only one per netns.
874          * Allowing to move it to another netns is clearly unsafe.
875          */
876         if (!IS_ERR(itn->fb_tunnel_dev)) {
877                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
878                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
879                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
880         }
881         rtnl_unlock();
882
883         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
884 }
885 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
886
887 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
888                               struct rtnl_link_ops *ops)
889 {
890         struct net *net = dev_net(itn->fb_tunnel_dev);
891         struct net_device *dev, *aux;
892         int h;
893
894         for_each_netdev_safe(net, dev, aux)
895                 if (dev->rtnl_link_ops == ops)
896                         unregister_netdevice_queue(dev, head);
897
898         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
899                 struct ip_tunnel *t;
900                 struct hlist_node *n;
901                 struct hlist_head *thead = &itn->tunnels[h];
902
903                 hlist_for_each_entry_safe(t, n, thead, hash_node)
904                         /* If dev is in the same netns, it has already
905                          * been added to the list by the previous loop.
906                          */
907                         if (!net_eq(dev_net(t->dev), net))
908                                 unregister_netdevice_queue(t->dev, head);
909         }
910 }
911
912 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
913 {
914         LIST_HEAD(list);
915
916         rtnl_lock();
917         ip_tunnel_destroy(itn, &list, ops);
918         unregister_netdevice_many(&list);
919         rtnl_unlock();
920 }
921 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
922
923 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
924                       struct ip_tunnel_parm *p)
925 {
926         struct ip_tunnel *nt;
927         struct net *net = dev_net(dev);
928         struct ip_tunnel_net *itn;
929         int mtu;
930         int err;
931
932         nt = netdev_priv(dev);
933         itn = net_generic(net, nt->ip_tnl_net_id);
934
935         if (ip_tunnel_find(itn, p, dev->type))
936                 return -EEXIST;
937
938         nt->net = net;
939         nt->parms = *p;
940         err = register_netdevice(dev);
941         if (err)
942                 goto out;
943
944         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
945                 eth_hw_addr_random(dev);
946
947         mtu = ip_tunnel_bind_dev(dev);
948         if (!tb[IFLA_MTU])
949                 dev->mtu = mtu;
950
951         ip_tunnel_add(itn, nt);
952
953 out:
954         return err;
955 }
956 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
957
958 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
959                          struct ip_tunnel_parm *p)
960 {
961         struct ip_tunnel *t;
962         struct ip_tunnel *tunnel = netdev_priv(dev);
963         struct net *net = tunnel->net;
964         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
965
966         if (dev == itn->fb_tunnel_dev)
967                 return -EINVAL;
968
969         t = ip_tunnel_find(itn, p, dev->type);
970
971         if (t) {
972                 if (t->dev != dev)
973                         return -EEXIST;
974         } else {
975                 t = tunnel;
976
977                 if (dev->type != ARPHRD_ETHER) {
978                         unsigned int nflags = 0;
979
980                         if (ipv4_is_multicast(p->iph.daddr))
981                                 nflags = IFF_BROADCAST;
982                         else if (p->iph.daddr)
983                                 nflags = IFF_POINTOPOINT;
984
985                         if ((dev->flags ^ nflags) &
986                             (IFF_POINTOPOINT | IFF_BROADCAST))
987                                 return -EINVAL;
988                 }
989         }
990
991         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
992         return 0;
993 }
994 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
995
996 int ip_tunnel_init(struct net_device *dev)
997 {
998         struct ip_tunnel *tunnel = netdev_priv(dev);
999         struct iphdr *iph = &tunnel->parms.iph;
1000         int i, err;
1001
1002         dev->destructor = ip_tunnel_dev_free;
1003         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1004         if (!dev->tstats)
1005                 return -ENOMEM;
1006
1007         for_each_possible_cpu(i) {
1008                 struct pcpu_sw_netstats *ipt_stats;
1009                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1010                 u64_stats_init(&ipt_stats->syncp);
1011         }
1012
1013         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1014         if (!tunnel->dst_cache) {
1015                 free_percpu(dev->tstats);
1016                 return -ENOMEM;
1017         }
1018
1019         err = gro_cells_init(&tunnel->gro_cells, dev);
1020         if (err) {
1021                 free_percpu(tunnel->dst_cache);
1022                 free_percpu(dev->tstats);
1023                 return err;
1024         }
1025
1026         tunnel->dev = dev;
1027         tunnel->net = dev_net(dev);
1028         strcpy(tunnel->parms.name, dev->name);
1029         iph->version            = 4;
1030         iph->ihl                = 5;
1031
1032         return 0;
1033 }
1034 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1035
1036 void ip_tunnel_uninit(struct net_device *dev)
1037 {
1038         struct ip_tunnel *tunnel = netdev_priv(dev);
1039         struct net *net = tunnel->net;
1040         struct ip_tunnel_net *itn;
1041
1042         itn = net_generic(net, tunnel->ip_tnl_net_id);
1043         /* fb_tunnel_dev will be unregisted in net-exit call. */
1044         if (itn->fb_tunnel_dev != dev)
1045                 ip_tunnel_del(netdev_priv(dev));
1046
1047         ip_tunnel_dst_reset_all(tunnel);
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1050
1051 /* Do least required initialization, rest of init is done in tunnel_init call */
1052 void ip_tunnel_setup(struct net_device *dev, int net_id)
1053 {
1054         struct ip_tunnel *tunnel = netdev_priv(dev);
1055         tunnel->ip_tnl_net_id = net_id;
1056 }
1057 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1058
1059 MODULE_LICENSE("GPL");