]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv4/ip_gre.c
gre: strict physical device binding
[karo-tx-linux.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
32
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
46
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 /*
54    Problems & solutions
55    --------------------
56
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is the best
66    solution, but it supposes maintaing new variable in ALL
67    skb, even if no tunneling is used.
68
69    Current solution: t->recursion lock breaks dead loops. It looks
70    like dev->tbusy flag, but I preferred new variable, because
71    the semantics is different. One day, when hard_start_xmit
72    will be multithreaded we will have to use skb->encapsulation.
73
74
75
76    2. Networking dead loops would not kill routers, but would really
77    kill network. IP hop limit plays role of "t->recursion" in this case,
78    if we copy it from packet being encapsulated to upper header.
79    It is very good solution, but it introduces two problems:
80
81    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82      do not work over tunnels.
83    - traceroute does not work. I planned to relay ICMP from tunnel,
84      so that this problem would be solved and traceroute output
85      would even more informative. This idea appeared to be wrong:
86      only Linux complies to rfc1812 now (yes, guys, Linux is the only
87      true router now :-)), all routers (at least, in neighbourhood of mine)
88      return only 8 bytes of payload. It is the end.
89
90    Hence, if we want that OSPF worked or traceroute said something reasonable,
91    we should search for another solution.
92
93    One of them is to parse packet trying to detect inner encapsulation
94    made by our node. It is difficult or even impossible, especially,
95    taking into account fragmentation. TO be short, tt is not solution at all.
96
97    Current solution: The solution was UNEXPECTEDLY SIMPLE.
98    We force DF flag on tunnels with preconfigured hop limit,
99    that is ALL. :-) Well, it does not remove the problem completely,
100    but exponential growth of network traffic is changed to linear
101    (branches, that exceed pmtu are pruned) and tunnel mtu
102    fastly degrades to value <68, where looping stops.
103    Yes, it is not good if there exists a router in the loop,
104    which does not force DF, even when encapsulating packets have DF set.
105    But it is not our problem! Nobody could accuse us, we made
106    all that we could make. Even if it is your gated who injected
107    fatal route to network, even if it were you who configured
108    fatal static route: you are innocent. :-)
109
110
111
112    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113    practically identical code. It would be good to glue them
114    together, but it is not very evident, how to make them modular.
115    sit is integral part of IPv6, ipip and gre are naturally modular.
116    We could extract common parts (hash table, ioctl etc)
117    to a separate module (ip_tunnel.c).
118
119    Alexey Kuznetsov.
120  */
121
122 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123 static int ipgre_tunnel_init(struct net_device *dev);
124 static void ipgre_tunnel_setup(struct net_device *dev);
125 static int ipgre_tunnel_bind_dev(struct net_device *dev);
126
127 /* Fallback tunnel: no source, no destination, no key, no options */
128
129 #define HASH_SIZE  16
130
131 static int ipgre_net_id;
132 struct ipgre_net {
133         struct ip_tunnel *tunnels[4][HASH_SIZE];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 /* Tunnel hash table */
139
140 /*
141    4 hash tables:
142
143    3: (remote,local)
144    2: (remote,*)
145    1: (*,local)
146    0: (*,*)
147
148    We require exact key match i.e. if a key is present in packet
149    it will match only tunnel with the same key; if it is not present,
150    it will match only keyless tunnel.
151
152    All keysless packets, if not matched configured keyless tunnels
153    will match fallback tunnel.
154  */
155
156 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
157
158 #define tunnels_r_l     tunnels[3]
159 #define tunnels_r       tunnels[2]
160 #define tunnels_l       tunnels[1]
161 #define tunnels_wc      tunnels[0]
162
163 static DEFINE_RWLOCK(ipgre_lock);
164
165 /* Given src, dst and key, find appropriate for input tunnel. */
166
167 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
168                                               __be32 remote, __be32 local,
169                                               __be32 key, __be16 gre_proto)
170 {
171         struct net *net = dev_net(dev);
172         int link = dev->ifindex;
173         unsigned h0 = HASH(remote);
174         unsigned h1 = HASH(key);
175         struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL };
176         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
177         int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
178                        ARPHRD_ETHER : ARPHRD_IPGRE;
179         int idx;
180
181         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
182                 if (local != t->parms.iph.saddr ||
183                     remote != t->parms.iph.daddr ||
184                     key != t->parms.i_key ||
185                     !(t->dev->flags & IFF_UP))
186                         continue;
187
188                 if (t->dev->type != ARPHRD_IPGRE &&
189                     t->dev->type != dev_type)
190                         continue;
191
192                 idx = 0;
193                 if (t->parms.link != link)
194                         idx |= 1;
195                 if (t->dev->type != dev_type)
196                         idx |= 2;
197                 if (idx == 0)
198                         return t;
199                 if (sel[idx] == NULL)
200                         sel[idx] = t;
201         }
202
203         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
204                 if (remote != t->parms.iph.daddr ||
205                     key != t->parms.i_key ||
206                     !(t->dev->flags & IFF_UP))
207                         continue;
208
209                 if (t->dev->type != ARPHRD_IPGRE &&
210                     t->dev->type != dev_type)
211                         continue;
212
213                 idx = 0;
214                 if (t->parms.link != link)
215                         idx |= 1;
216                 if (t->dev->type != dev_type)
217                         idx |= 2;
218                 if (idx == 0)
219                         return t;
220                 if (sel[idx] == NULL)
221                         sel[idx] = t;
222         }
223
224         for (t = ign->tunnels_l[h1]; t; t = t->next) {
225                 if ((local != t->parms.iph.saddr &&
226                      (local != t->parms.iph.daddr ||
227                       !ipv4_is_multicast(local))) ||
228                     key != t->parms.i_key ||
229                     !(t->dev->flags & IFF_UP))
230                         continue;
231
232                 if (t->dev->type != ARPHRD_IPGRE &&
233                     t->dev->type != dev_type)
234                         continue;
235
236                 idx = 0;
237                 if (t->parms.link != link)
238                         idx |= 1;
239                 if (t->dev->type != dev_type)
240                         idx |= 2;
241                 if (idx == 0)
242                         return t;
243                 if (sel[idx] == NULL)
244                         sel[idx] = t;
245         }
246
247         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
248                 if (t->parms.i_key != key ||
249                     !(t->dev->flags & IFF_UP))
250                         continue;
251
252                 if (t->dev->type != ARPHRD_IPGRE &&
253                     t->dev->type != dev_type)
254                         continue;
255
256                 idx = 0;
257                 if (t->parms.link != link)
258                         idx |= 1;
259                 if (t->dev->type != dev_type)
260                         idx |= 2;
261                 if (idx == 0)
262                         return t;
263                 if (sel[idx] == NULL)
264                         sel[idx] = t;
265         }
266
267         for (idx = 1; idx < ARRAY_SIZE(sel); idx++)
268                 if (sel[idx] != NULL)
269                         return sel[idx];
270
271         if (ign->fb_tunnel_dev->flags & IFF_UP)
272                 return netdev_priv(ign->fb_tunnel_dev);
273
274         return NULL;
275 }
276
277 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
278                 struct ip_tunnel_parm *parms)
279 {
280         __be32 remote = parms->iph.daddr;
281         __be32 local = parms->iph.saddr;
282         __be32 key = parms->i_key;
283         unsigned h = HASH(key);
284         int prio = 0;
285
286         if (local)
287                 prio |= 1;
288         if (remote && !ipv4_is_multicast(remote)) {
289                 prio |= 2;
290                 h ^= HASH(remote);
291         }
292
293         return &ign->tunnels[prio][h];
294 }
295
296 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
297                 struct ip_tunnel *t)
298 {
299         return __ipgre_bucket(ign, &t->parms);
300 }
301
302 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
303 {
304         struct ip_tunnel **tp = ipgre_bucket(ign, t);
305
306         t->next = *tp;
307         write_lock_bh(&ipgre_lock);
308         *tp = t;
309         write_unlock_bh(&ipgre_lock);
310 }
311
312 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
313 {
314         struct ip_tunnel **tp;
315
316         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
317                 if (t == *tp) {
318                         write_lock_bh(&ipgre_lock);
319                         *tp = t->next;
320                         write_unlock_bh(&ipgre_lock);
321                         break;
322                 }
323         }
324 }
325
326 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
327                                            struct ip_tunnel_parm *parms,
328                                            int type)
329 {
330         __be32 remote = parms->iph.daddr;
331         __be32 local = parms->iph.saddr;
332         __be32 key = parms->i_key;
333         int link = parms->link;
334         struct ip_tunnel *t, **tp;
335         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
336
337         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
338                 if (local == t->parms.iph.saddr &&
339                     remote == t->parms.iph.daddr &&
340                     key == t->parms.i_key &&
341                     link == t->parms.link &&
342                     type == t->dev->type)
343                         break;
344
345         return t;
346 }
347
348 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
349                 struct ip_tunnel_parm *parms, int create)
350 {
351         struct ip_tunnel *t, *nt;
352         struct net_device *dev;
353         char name[IFNAMSIZ];
354         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
355
356         t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
357         if (t || !create)
358                 return t;
359
360         if (parms->name[0])
361                 strlcpy(name, parms->name, IFNAMSIZ);
362         else
363                 sprintf(name, "gre%%d");
364
365         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
366         if (!dev)
367           return NULL;
368
369         dev_net_set(dev, net);
370
371         if (strchr(name, '%')) {
372                 if (dev_alloc_name(dev, name) < 0)
373                         goto failed_free;
374         }
375
376         nt = netdev_priv(dev);
377         nt->parms = *parms;
378         dev->rtnl_link_ops = &ipgre_link_ops;
379
380         dev->mtu = ipgre_tunnel_bind_dev(dev);
381
382         if (register_netdevice(dev) < 0)
383                 goto failed_free;
384
385         dev_hold(dev);
386         ipgre_tunnel_link(ign, nt);
387         return nt;
388
389 failed_free:
390         free_netdev(dev);
391         return NULL;
392 }
393
394 static void ipgre_tunnel_uninit(struct net_device *dev)
395 {
396         struct net *net = dev_net(dev);
397         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
398
399         ipgre_tunnel_unlink(ign, netdev_priv(dev));
400         dev_put(dev);
401 }
402
403
404 static void ipgre_err(struct sk_buff *skb, u32 info)
405 {
406
407 /* All the routers (except for Linux) return only
408    8 bytes of packet payload. It means, that precise relaying of
409    ICMP in the real Internet is absolutely infeasible.
410
411    Moreover, Cisco "wise men" put GRE key to the third word
412    in GRE header. It makes impossible maintaining even soft state for keyed
413    GRE tunnels with enabled checksum. Tell them "thank you".
414
415    Well, I wonder, rfc1812 was written by Cisco employee,
416    what the hell these idiots break standrads established
417    by themself???
418  */
419
420         struct iphdr *iph = (struct iphdr *)skb->data;
421         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
422         int grehlen = (iph->ihl<<2) + 4;
423         const int type = icmp_hdr(skb)->type;
424         const int code = icmp_hdr(skb)->code;
425         struct ip_tunnel *t;
426         __be16 flags;
427
428         flags = p[0];
429         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
430                 if (flags&(GRE_VERSION|GRE_ROUTING))
431                         return;
432                 if (flags&GRE_KEY) {
433                         grehlen += 4;
434                         if (flags&GRE_CSUM)
435                                 grehlen += 4;
436                 }
437         }
438
439         /* If only 8 bytes returned, keyed message will be dropped here */
440         if (skb_headlen(skb) < grehlen)
441                 return;
442
443         switch (type) {
444         default:
445         case ICMP_PARAMETERPROB:
446                 return;
447
448         case ICMP_DEST_UNREACH:
449                 switch (code) {
450                 case ICMP_SR_FAILED:
451                 case ICMP_PORT_UNREACH:
452                         /* Impossible event. */
453                         return;
454                 case ICMP_FRAG_NEEDED:
455                         /* Soft state for pmtu is maintained by IP core. */
456                         return;
457                 default:
458                         /* All others are translated to HOST_UNREACH.
459                            rfc2003 contains "deep thoughts" about NET_UNREACH,
460                            I believe they are just ether pollution. --ANK
461                          */
462                         break;
463                 }
464                 break;
465         case ICMP_TIME_EXCEEDED:
466                 if (code != ICMP_EXC_TTL)
467                         return;
468                 break;
469         }
470
471         read_lock(&ipgre_lock);
472         t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
473                                 flags & GRE_KEY ?
474                                 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
475                                 p[1]);
476         if (t == NULL || t->parms.iph.daddr == 0 ||
477             ipv4_is_multicast(t->parms.iph.daddr))
478                 goto out;
479
480         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
481                 goto out;
482
483         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
484                 t->err_count++;
485         else
486                 t->err_count = 1;
487         t->err_time = jiffies;
488 out:
489         read_unlock(&ipgre_lock);
490         return;
491 }
492
493 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
494 {
495         if (INET_ECN_is_ce(iph->tos)) {
496                 if (skb->protocol == htons(ETH_P_IP)) {
497                         IP_ECN_set_ce(ip_hdr(skb));
498                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
499                         IP6_ECN_set_ce(ipv6_hdr(skb));
500                 }
501         }
502 }
503
504 static inline u8
505 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
506 {
507         u8 inner = 0;
508         if (skb->protocol == htons(ETH_P_IP))
509                 inner = old_iph->tos;
510         else if (skb->protocol == htons(ETH_P_IPV6))
511                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
512         return INET_ECN_encapsulate(tos, inner);
513 }
514
515 static int ipgre_rcv(struct sk_buff *skb)
516 {
517         struct iphdr *iph;
518         u8     *h;
519         __be16    flags;
520         __sum16   csum = 0;
521         __be32 key = 0;
522         u32    seqno = 0;
523         struct ip_tunnel *tunnel;
524         int    offset = 4;
525         __be16 gre_proto;
526         unsigned int len;
527
528         if (!pskb_may_pull(skb, 16))
529                 goto drop_nolock;
530
531         iph = ip_hdr(skb);
532         h = skb->data;
533         flags = *(__be16*)h;
534
535         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
536                 /* - Version must be 0.
537                    - We do not support routing headers.
538                  */
539                 if (flags&(GRE_VERSION|GRE_ROUTING))
540                         goto drop_nolock;
541
542                 if (flags&GRE_CSUM) {
543                         switch (skb->ip_summed) {
544                         case CHECKSUM_COMPLETE:
545                                 csum = csum_fold(skb->csum);
546                                 if (!csum)
547                                         break;
548                                 /* fall through */
549                         case CHECKSUM_NONE:
550                                 skb->csum = 0;
551                                 csum = __skb_checksum_complete(skb);
552                                 skb->ip_summed = CHECKSUM_COMPLETE;
553                         }
554                         offset += 4;
555                 }
556                 if (flags&GRE_KEY) {
557                         key = *(__be32*)(h + offset);
558                         offset += 4;
559                 }
560                 if (flags&GRE_SEQ) {
561                         seqno = ntohl(*(__be32*)(h + offset));
562                         offset += 4;
563                 }
564         }
565
566         gre_proto = *(__be16 *)(h + 2);
567
568         read_lock(&ipgre_lock);
569         if ((tunnel = ipgre_tunnel_lookup(skb->dev,
570                                           iph->saddr, iph->daddr, key,
571                                           gre_proto))) {
572                 struct net_device_stats *stats = &tunnel->dev->stats;
573
574                 secpath_reset(skb);
575
576                 skb->protocol = gre_proto;
577                 /* WCCP version 1 and 2 protocol decoding.
578                  * - Change protocol to IP
579                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
580                  */
581                 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
582                         skb->protocol = htons(ETH_P_IP);
583                         if ((*(h + offset) & 0xF0) != 0x40)
584                                 offset += 4;
585                 }
586
587                 skb->mac_header = skb->network_header;
588                 __pskb_pull(skb, offset);
589                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
590                 skb->pkt_type = PACKET_HOST;
591 #ifdef CONFIG_NET_IPGRE_BROADCAST
592                 if (ipv4_is_multicast(iph->daddr)) {
593                         /* Looped back packet, drop it! */
594                         if (skb->rtable->fl.iif == 0)
595                                 goto drop;
596                         stats->multicast++;
597                         skb->pkt_type = PACKET_BROADCAST;
598                 }
599 #endif
600
601                 if (((flags&GRE_CSUM) && csum) ||
602                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
603                         stats->rx_crc_errors++;
604                         stats->rx_errors++;
605                         goto drop;
606                 }
607                 if (tunnel->parms.i_flags&GRE_SEQ) {
608                         if (!(flags&GRE_SEQ) ||
609                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
610                                 stats->rx_fifo_errors++;
611                                 stats->rx_errors++;
612                                 goto drop;
613                         }
614                         tunnel->i_seqno = seqno + 1;
615                 }
616
617                 len = skb->len;
618
619                 /* Warning: All skb pointers will be invalidated! */
620                 if (tunnel->dev->type == ARPHRD_ETHER) {
621                         if (!pskb_may_pull(skb, ETH_HLEN)) {
622                                 stats->rx_length_errors++;
623                                 stats->rx_errors++;
624                                 goto drop;
625                         }
626
627                         iph = ip_hdr(skb);
628                         skb->protocol = eth_type_trans(skb, tunnel->dev);
629                         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
630                 }
631
632                 stats->rx_packets++;
633                 stats->rx_bytes += len;
634                 skb->dev = tunnel->dev;
635                 dst_release(skb->dst);
636                 skb->dst = NULL;
637                 nf_reset(skb);
638
639                 skb_reset_network_header(skb);
640                 ipgre_ecn_decapsulate(iph, skb);
641
642                 netif_rx(skb);
643                 read_unlock(&ipgre_lock);
644                 return(0);
645         }
646         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
647
648 drop:
649         read_unlock(&ipgre_lock);
650 drop_nolock:
651         kfree_skb(skb);
652         return(0);
653 }
654
655 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
656 {
657         struct ip_tunnel *tunnel = netdev_priv(dev);
658         struct net_device_stats *stats = &tunnel->dev->stats;
659         struct iphdr  *old_iph = ip_hdr(skb);
660         struct iphdr  *tiph;
661         u8     tos;
662         __be16 df;
663         struct rtable *rt;                      /* Route to the other host */
664         struct net_device *tdev;                        /* Device to other host */
665         struct iphdr  *iph;                     /* Our new IP header */
666         unsigned int max_headroom;              /* The extra header space needed */
667         int    gre_hlen;
668         __be32 dst;
669         int    mtu;
670
671         if (tunnel->recursion++) {
672                 stats->collisions++;
673                 goto tx_error;
674         }
675
676         if (dev->type == ARPHRD_ETHER)
677                 IPCB(skb)->flags = 0;
678
679         if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
680                 gre_hlen = 0;
681                 tiph = (struct iphdr *)skb->data;
682         } else {
683                 gre_hlen = tunnel->hlen;
684                 tiph = &tunnel->parms.iph;
685         }
686
687         if ((dst = tiph->daddr) == 0) {
688                 /* NBMA tunnel */
689
690                 if (skb->dst == NULL) {
691                         stats->tx_fifo_errors++;
692                         goto tx_error;
693                 }
694
695                 if (skb->protocol == htons(ETH_P_IP)) {
696                         rt = skb->rtable;
697                         if ((dst = rt->rt_gateway) == 0)
698                                 goto tx_error_icmp;
699                 }
700 #ifdef CONFIG_IPV6
701                 else if (skb->protocol == htons(ETH_P_IPV6)) {
702                         struct in6_addr *addr6;
703                         int addr_type;
704                         struct neighbour *neigh = skb->dst->neighbour;
705
706                         if (neigh == NULL)
707                                 goto tx_error;
708
709                         addr6 = (struct in6_addr *)&neigh->primary_key;
710                         addr_type = ipv6_addr_type(addr6);
711
712                         if (addr_type == IPV6_ADDR_ANY) {
713                                 addr6 = &ipv6_hdr(skb)->daddr;
714                                 addr_type = ipv6_addr_type(addr6);
715                         }
716
717                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
718                                 goto tx_error_icmp;
719
720                         dst = addr6->s6_addr32[3];
721                 }
722 #endif
723                 else
724                         goto tx_error;
725         }
726
727         tos = tiph->tos;
728         if (tos&1) {
729                 if (skb->protocol == htons(ETH_P_IP))
730                         tos = old_iph->tos;
731                 tos &= ~1;
732         }
733
734         {
735                 struct flowi fl = { .oif = tunnel->parms.link,
736                                     .nl_u = { .ip4_u =
737                                               { .daddr = dst,
738                                                 .saddr = tiph->saddr,
739                                                 .tos = RT_TOS(tos) } },
740                                     .proto = IPPROTO_GRE };
741                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
742                         stats->tx_carrier_errors++;
743                         goto tx_error;
744                 }
745         }
746         tdev = rt->u.dst.dev;
747
748         if (tdev == dev) {
749                 ip_rt_put(rt);
750                 stats->collisions++;
751                 goto tx_error;
752         }
753
754         df = tiph->frag_off;
755         if (df)
756                 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
757         else
758                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
759
760         if (skb->dst)
761                 skb->dst->ops->update_pmtu(skb->dst, mtu);
762
763         if (skb->protocol == htons(ETH_P_IP)) {
764                 df |= (old_iph->frag_off&htons(IP_DF));
765
766                 if ((old_iph->frag_off&htons(IP_DF)) &&
767                     mtu < ntohs(old_iph->tot_len)) {
768                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
769                         ip_rt_put(rt);
770                         goto tx_error;
771                 }
772         }
773 #ifdef CONFIG_IPV6
774         else if (skb->protocol == htons(ETH_P_IPV6)) {
775                 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
776
777                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
778                         if ((tunnel->parms.iph.daddr &&
779                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
780                             rt6->rt6i_dst.plen == 128) {
781                                 rt6->rt6i_flags |= RTF_MODIFIED;
782                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
783                         }
784                 }
785
786                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
787                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
788                         ip_rt_put(rt);
789                         goto tx_error;
790                 }
791         }
792 #endif
793
794         if (tunnel->err_count > 0) {
795                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
796                         tunnel->err_count--;
797
798                         dst_link_failure(skb);
799                 } else
800                         tunnel->err_count = 0;
801         }
802
803         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
804
805         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
806             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
807                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
808                 if (!new_skb) {
809                         ip_rt_put(rt);
810                         stats->tx_dropped++;
811                         dev_kfree_skb(skb);
812                         tunnel->recursion--;
813                         return 0;
814                 }
815                 if (skb->sk)
816                         skb_set_owner_w(new_skb, skb->sk);
817                 dev_kfree_skb(skb);
818                 skb = new_skb;
819                 old_iph = ip_hdr(skb);
820         }
821
822         skb_reset_transport_header(skb);
823         skb_push(skb, gre_hlen);
824         skb_reset_network_header(skb);
825         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
826         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
827                               IPSKB_REROUTED);
828         dst_release(skb->dst);
829         skb->dst = &rt->u.dst;
830
831         /*
832          *      Push down and install the IPIP header.
833          */
834
835         iph                     =       ip_hdr(skb);
836         iph->version            =       4;
837         iph->ihl                =       sizeof(struct iphdr) >> 2;
838         iph->frag_off           =       df;
839         iph->protocol           =       IPPROTO_GRE;
840         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
841         iph->daddr              =       rt->rt_dst;
842         iph->saddr              =       rt->rt_src;
843
844         if ((iph->ttl = tiph->ttl) == 0) {
845                 if (skb->protocol == htons(ETH_P_IP))
846                         iph->ttl = old_iph->ttl;
847 #ifdef CONFIG_IPV6
848                 else if (skb->protocol == htons(ETH_P_IPV6))
849                         iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
850 #endif
851                 else
852                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
853         }
854
855         ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
856         ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
857                                    htons(ETH_P_TEB) : skb->protocol;
858
859         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
860                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
861
862                 if (tunnel->parms.o_flags&GRE_SEQ) {
863                         ++tunnel->o_seqno;
864                         *ptr = htonl(tunnel->o_seqno);
865                         ptr--;
866                 }
867                 if (tunnel->parms.o_flags&GRE_KEY) {
868                         *ptr = tunnel->parms.o_key;
869                         ptr--;
870                 }
871                 if (tunnel->parms.o_flags&GRE_CSUM) {
872                         *ptr = 0;
873                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
874                 }
875         }
876
877         nf_reset(skb);
878
879         IPTUNNEL_XMIT();
880         tunnel->recursion--;
881         return 0;
882
883 tx_error_icmp:
884         dst_link_failure(skb);
885
886 tx_error:
887         stats->tx_errors++;
888         dev_kfree_skb(skb);
889         tunnel->recursion--;
890         return 0;
891 }
892
893 static int ipgre_tunnel_bind_dev(struct net_device *dev)
894 {
895         struct net_device *tdev = NULL;
896         struct ip_tunnel *tunnel;
897         struct iphdr *iph;
898         int hlen = LL_MAX_HEADER;
899         int mtu = ETH_DATA_LEN;
900         int addend = sizeof(struct iphdr) + 4;
901
902         tunnel = netdev_priv(dev);
903         iph = &tunnel->parms.iph;
904
905         /* Guess output device to choose reasonable mtu and needed_headroom */
906
907         if (iph->daddr) {
908                 struct flowi fl = { .oif = tunnel->parms.link,
909                                     .nl_u = { .ip4_u =
910                                               { .daddr = iph->daddr,
911                                                 .saddr = iph->saddr,
912                                                 .tos = RT_TOS(iph->tos) } },
913                                     .proto = IPPROTO_GRE };
914                 struct rtable *rt;
915                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
916                         tdev = rt->u.dst.dev;
917                         ip_rt_put(rt);
918                 }
919
920                 if (dev->type != ARPHRD_ETHER)
921                         dev->flags |= IFF_POINTOPOINT;
922         }
923
924         if (!tdev && tunnel->parms.link)
925                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
926
927         if (tdev) {
928                 hlen = tdev->hard_header_len + tdev->needed_headroom;
929                 mtu = tdev->mtu;
930         }
931         dev->iflink = tunnel->parms.link;
932
933         /* Precalculate GRE options length */
934         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935                 if (tunnel->parms.o_flags&GRE_CSUM)
936                         addend += 4;
937                 if (tunnel->parms.o_flags&GRE_KEY)
938                         addend += 4;
939                 if (tunnel->parms.o_flags&GRE_SEQ)
940                         addend += 4;
941         }
942         dev->needed_headroom = addend + hlen;
943         mtu -= dev->hard_header_len - addend;
944
945         if (mtu < 68)
946                 mtu = 68;
947
948         tunnel->hlen = addend;
949
950         return mtu;
951 }
952
953 static int
954 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955 {
956         int err = 0;
957         struct ip_tunnel_parm p;
958         struct ip_tunnel *t;
959         struct net *net = dev_net(dev);
960         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
961
962         switch (cmd) {
963         case SIOCGETTUNNEL:
964                 t = NULL;
965                 if (dev == ign->fb_tunnel_dev) {
966                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967                                 err = -EFAULT;
968                                 break;
969                         }
970                         t = ipgre_tunnel_locate(net, &p, 0);
971                 }
972                 if (t == NULL)
973                         t = netdev_priv(dev);
974                 memcpy(&p, &t->parms, sizeof(p));
975                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976                         err = -EFAULT;
977                 break;
978
979         case SIOCADDTUNNEL:
980         case SIOCCHGTUNNEL:
981                 err = -EPERM;
982                 if (!capable(CAP_NET_ADMIN))
983                         goto done;
984
985                 err = -EFAULT;
986                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987                         goto done;
988
989                 err = -EINVAL;
990                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993                         goto done;
994                 if (p.iph.ttl)
995                         p.iph.frag_off |= htons(IP_DF);
996
997                 if (!(p.i_flags&GRE_KEY))
998                         p.i_key = 0;
999                 if (!(p.o_flags&GRE_KEY))
1000                         p.o_key = 0;
1001
1002                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1003
1004                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1005                         if (t != NULL) {
1006                                 if (t->dev != dev) {
1007                                         err = -EEXIST;
1008                                         break;
1009                                 }
1010                         } else {
1011                                 unsigned nflags = 0;
1012
1013                                 t = netdev_priv(dev);
1014
1015                                 if (ipv4_is_multicast(p.iph.daddr))
1016                                         nflags = IFF_BROADCAST;
1017                                 else if (p.iph.daddr)
1018                                         nflags = IFF_POINTOPOINT;
1019
1020                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021                                         err = -EINVAL;
1022                                         break;
1023                                 }
1024                                 ipgre_tunnel_unlink(ign, t);
1025                                 t->parms.iph.saddr = p.iph.saddr;
1026                                 t->parms.iph.daddr = p.iph.daddr;
1027                                 t->parms.i_key = p.i_key;
1028                                 t->parms.o_key = p.o_key;
1029                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1031                                 ipgre_tunnel_link(ign, t);
1032                                 netdev_state_change(dev);
1033                         }
1034                 }
1035
1036                 if (t) {
1037                         err = 0;
1038                         if (cmd == SIOCCHGTUNNEL) {
1039                                 t->parms.iph.ttl = p.iph.ttl;
1040                                 t->parms.iph.tos = p.iph.tos;
1041                                 t->parms.iph.frag_off = p.iph.frag_off;
1042                                 if (t->parms.link != p.link) {
1043                                         t->parms.link = p.link;
1044                                         dev->mtu = ipgre_tunnel_bind_dev(dev);
1045                                         netdev_state_change(dev);
1046                                 }
1047                         }
1048                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049                                 err = -EFAULT;
1050                 } else
1051                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052                 break;
1053
1054         case SIOCDELTUNNEL:
1055                 err = -EPERM;
1056                 if (!capable(CAP_NET_ADMIN))
1057                         goto done;
1058
1059                 if (dev == ign->fb_tunnel_dev) {
1060                         err = -EFAULT;
1061                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062                                 goto done;
1063                         err = -ENOENT;
1064                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1065                                 goto done;
1066                         err = -EPERM;
1067                         if (t == netdev_priv(ign->fb_tunnel_dev))
1068                                 goto done;
1069                         dev = t->dev;
1070                 }
1071                 unregister_netdevice(dev);
1072                 err = 0;
1073                 break;
1074
1075         default:
1076                 err = -EINVAL;
1077         }
1078
1079 done:
1080         return err;
1081 }
1082
1083 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084 {
1085         struct ip_tunnel *tunnel = netdev_priv(dev);
1086         if (new_mtu < 68 ||
1087             new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1088                 return -EINVAL;
1089         dev->mtu = new_mtu;
1090         return 0;
1091 }
1092
1093 /* Nice toy. Unfortunately, useless in real life :-)
1094    It allows to construct virtual multiprotocol broadcast "LAN"
1095    over the Internet, provided multicast routing is tuned.
1096
1097
1098    I have no idea was this bicycle invented before me,
1099    so that I had to set ARPHRD_IPGRE to a random value.
1100    I have an impression, that Cisco could make something similar,
1101    but this feature is apparently missing in IOS<=11.2(8).
1102
1103    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105
1106    ping -t 255 224.66.66.66
1107
1108    If nobody answers, mbone does not work.
1109
1110    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111    ip addr add 10.66.66.<somewhat>/24 dev Universe
1112    ifconfig Universe up
1113    ifconfig Universe add fe80::<Your_real_addr>/10
1114    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115    ftp 10.66.66.66
1116    ...
1117    ftp fec0:6666:6666::193.233.7.65
1118    ...
1119
1120  */
1121
1122 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123                         unsigned short type,
1124                         const void *daddr, const void *saddr, unsigned len)
1125 {
1126         struct ip_tunnel *t = netdev_priv(dev);
1127         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1128         __be16 *p = (__be16*)(iph+1);
1129
1130         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131         p[0]            = t->parms.o_flags;
1132         p[1]            = htons(type);
1133
1134         /*
1135          *      Set the source hardware address.
1136          */
1137
1138         if (saddr)
1139                 memcpy(&iph->saddr, saddr, 4);
1140
1141         if (daddr) {
1142                 memcpy(&iph->daddr, daddr, 4);
1143                 return t->hlen;
1144         }
1145         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1146                 return t->hlen;
1147
1148         return -t->hlen;
1149 }
1150
1151 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152 {
1153         struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1154         memcpy(haddr, &iph->saddr, 4);
1155         return 4;
1156 }
1157
1158 static const struct header_ops ipgre_header_ops = {
1159         .create = ipgre_header,
1160         .parse  = ipgre_header_parse,
1161 };
1162
1163 #ifdef CONFIG_NET_IPGRE_BROADCAST
1164 static int ipgre_open(struct net_device *dev)
1165 {
1166         struct ip_tunnel *t = netdev_priv(dev);
1167
1168         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1169                 struct flowi fl = { .oif = t->parms.link,
1170                                     .nl_u = { .ip4_u =
1171                                               { .daddr = t->parms.iph.daddr,
1172                                                 .saddr = t->parms.iph.saddr,
1173                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1174                                     .proto = IPPROTO_GRE };
1175                 struct rtable *rt;
1176                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1177                         return -EADDRNOTAVAIL;
1178                 dev = rt->u.dst.dev;
1179                 ip_rt_put(rt);
1180                 if (__in_dev_get_rtnl(dev) == NULL)
1181                         return -EADDRNOTAVAIL;
1182                 t->mlink = dev->ifindex;
1183                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1184         }
1185         return 0;
1186 }
1187
1188 static int ipgre_close(struct net_device *dev)
1189 {
1190         struct ip_tunnel *t = netdev_priv(dev);
1191
1192         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1193                 struct in_device *in_dev;
1194                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1195                 if (in_dev) {
1196                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197                         in_dev_put(in_dev);
1198                 }
1199         }
1200         return 0;
1201 }
1202
1203 #endif
1204
1205 static const struct net_device_ops ipgre_netdev_ops = {
1206         .ndo_init               = ipgre_tunnel_init,
1207         .ndo_uninit             = ipgre_tunnel_uninit,
1208 #ifdef CONFIG_NET_IPGRE_BROADCAST
1209         .ndo_open               = ipgre_open,
1210         .ndo_stop               = ipgre_close,
1211 #endif
1212         .ndo_start_xmit         = ipgre_tunnel_xmit,
1213         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
1214         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1215 };
1216
1217 static void ipgre_tunnel_setup(struct net_device *dev)
1218 {
1219         dev->netdev_ops         = &ipgre_netdev_ops;
1220         dev->destructor         = free_netdev;
1221
1222         dev->type               = ARPHRD_IPGRE;
1223         dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1224         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1225         dev->flags              = IFF_NOARP;
1226         dev->iflink             = 0;
1227         dev->addr_len           = 4;
1228         dev->features           |= NETIF_F_NETNS_LOCAL;
1229 }
1230
1231 static int ipgre_tunnel_init(struct net_device *dev)
1232 {
1233         struct ip_tunnel *tunnel;
1234         struct iphdr *iph;
1235
1236         tunnel = netdev_priv(dev);
1237         iph = &tunnel->parms.iph;
1238
1239         tunnel->dev = dev;
1240         strcpy(tunnel->parms.name, dev->name);
1241
1242         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1243         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1244
1245         if (iph->daddr) {
1246 #ifdef CONFIG_NET_IPGRE_BROADCAST
1247                 if (ipv4_is_multicast(iph->daddr)) {
1248                         if (!iph->saddr)
1249                                 return -EINVAL;
1250                         dev->flags = IFF_BROADCAST;
1251                         dev->header_ops = &ipgre_header_ops;
1252                 }
1253 #endif
1254         } else
1255                 dev->header_ops = &ipgre_header_ops;
1256
1257         return 0;
1258 }
1259
1260 static void ipgre_fb_tunnel_init(struct net_device *dev)
1261 {
1262         struct ip_tunnel *tunnel = netdev_priv(dev);
1263         struct iphdr *iph = &tunnel->parms.iph;
1264         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1265
1266         tunnel->dev = dev;
1267         strcpy(tunnel->parms.name, dev->name);
1268
1269         iph->version            = 4;
1270         iph->protocol           = IPPROTO_GRE;
1271         iph->ihl                = 5;
1272         tunnel->hlen            = sizeof(struct iphdr) + 4;
1273
1274         dev_hold(dev);
1275         ign->tunnels_wc[0]      = tunnel;
1276 }
1277
1278
1279 static struct net_protocol ipgre_protocol = {
1280         .handler        =       ipgre_rcv,
1281         .err_handler    =       ipgre_err,
1282         .netns_ok       =       1,
1283 };
1284
1285 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1286 {
1287         int prio;
1288
1289         for (prio = 0; prio < 4; prio++) {
1290                 int h;
1291                 for (h = 0; h < HASH_SIZE; h++) {
1292                         struct ip_tunnel *t;
1293                         while ((t = ign->tunnels[prio][h]) != NULL)
1294                                 unregister_netdevice(t->dev);
1295                 }
1296         }
1297 }
1298
1299 static int ipgre_init_net(struct net *net)
1300 {
1301         int err;
1302         struct ipgre_net *ign;
1303
1304         err = -ENOMEM;
1305         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1306         if (ign == NULL)
1307                 goto err_alloc;
1308
1309         err = net_assign_generic(net, ipgre_net_id, ign);
1310         if (err < 0)
1311                 goto err_assign;
1312
1313         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1314                                            ipgre_tunnel_setup);
1315         if (!ign->fb_tunnel_dev) {
1316                 err = -ENOMEM;
1317                 goto err_alloc_dev;
1318         }
1319         dev_net_set(ign->fb_tunnel_dev, net);
1320
1321         ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1322         ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1323
1324         if ((err = register_netdev(ign->fb_tunnel_dev)))
1325                 goto err_reg_dev;
1326
1327         return 0;
1328
1329 err_reg_dev:
1330         free_netdev(ign->fb_tunnel_dev);
1331 err_alloc_dev:
1332         /* nothing */
1333 err_assign:
1334         kfree(ign);
1335 err_alloc:
1336         return err;
1337 }
1338
1339 static void ipgre_exit_net(struct net *net)
1340 {
1341         struct ipgre_net *ign;
1342
1343         ign = net_generic(net, ipgre_net_id);
1344         rtnl_lock();
1345         ipgre_destroy_tunnels(ign);
1346         rtnl_unlock();
1347         kfree(ign);
1348 }
1349
1350 static struct pernet_operations ipgre_net_ops = {
1351         .init = ipgre_init_net,
1352         .exit = ipgre_exit_net,
1353 };
1354
1355 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1356 {
1357         __be16 flags;
1358
1359         if (!data)
1360                 return 0;
1361
1362         flags = 0;
1363         if (data[IFLA_GRE_IFLAGS])
1364                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365         if (data[IFLA_GRE_OFLAGS])
1366                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367         if (flags & (GRE_VERSION|GRE_ROUTING))
1368                 return -EINVAL;
1369
1370         return 0;
1371 }
1372
1373 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1374 {
1375         __be32 daddr;
1376
1377         if (tb[IFLA_ADDRESS]) {
1378                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1379                         return -EINVAL;
1380                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1381                         return -EADDRNOTAVAIL;
1382         }
1383
1384         if (!data)
1385                 goto out;
1386
1387         if (data[IFLA_GRE_REMOTE]) {
1388                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1389                 if (!daddr)
1390                         return -EINVAL;
1391         }
1392
1393 out:
1394         return ipgre_tunnel_validate(tb, data);
1395 }
1396
1397 static void ipgre_netlink_parms(struct nlattr *data[],
1398                                 struct ip_tunnel_parm *parms)
1399 {
1400         memset(parms, 0, sizeof(*parms));
1401
1402         parms->iph.protocol = IPPROTO_GRE;
1403
1404         if (!data)
1405                 return;
1406
1407         if (data[IFLA_GRE_LINK])
1408                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1409
1410         if (data[IFLA_GRE_IFLAGS])
1411                 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1412
1413         if (data[IFLA_GRE_OFLAGS])
1414                 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1415
1416         if (data[IFLA_GRE_IKEY])
1417                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1418
1419         if (data[IFLA_GRE_OKEY])
1420                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1421
1422         if (data[IFLA_GRE_LOCAL])
1423                 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1424
1425         if (data[IFLA_GRE_REMOTE])
1426                 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1427
1428         if (data[IFLA_GRE_TTL])
1429                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1430
1431         if (data[IFLA_GRE_TOS])
1432                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1433
1434         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1435                 parms->iph.frag_off = htons(IP_DF);
1436 }
1437
1438 static int ipgre_tap_init(struct net_device *dev)
1439 {
1440         struct ip_tunnel *tunnel;
1441
1442         tunnel = netdev_priv(dev);
1443
1444         tunnel->dev = dev;
1445         strcpy(tunnel->parms.name, dev->name);
1446
1447         ipgre_tunnel_bind_dev(dev);
1448
1449         return 0;
1450 }
1451
1452 static const struct net_device_ops ipgre_tap_netdev_ops = {
1453         .ndo_init               = ipgre_tap_init,
1454         .ndo_uninit             = ipgre_tunnel_uninit,
1455         .ndo_start_xmit         = ipgre_tunnel_xmit,
1456         .ndo_set_mac_address    = eth_mac_addr,
1457         .ndo_validate_addr      = eth_validate_addr,
1458         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1459 };
1460
1461 static void ipgre_tap_setup(struct net_device *dev)
1462 {
1463
1464         ether_setup(dev);
1465
1466         dev->netdev_ops         = &ipgre_netdev_ops;
1467         dev->destructor         = free_netdev;
1468
1469         dev->iflink             = 0;
1470         dev->features           |= NETIF_F_NETNS_LOCAL;
1471 }
1472
1473 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1474                          struct nlattr *data[])
1475 {
1476         struct ip_tunnel *nt;
1477         struct net *net = dev_net(dev);
1478         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1479         int mtu;
1480         int err;
1481
1482         nt = netdev_priv(dev);
1483         ipgre_netlink_parms(data, &nt->parms);
1484
1485         if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1486                 return -EEXIST;
1487
1488         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1489                 random_ether_addr(dev->dev_addr);
1490
1491         mtu = ipgre_tunnel_bind_dev(dev);
1492         if (!tb[IFLA_MTU])
1493                 dev->mtu = mtu;
1494
1495         err = register_netdevice(dev);
1496         if (err)
1497                 goto out;
1498
1499         dev_hold(dev);
1500         ipgre_tunnel_link(ign, nt);
1501
1502 out:
1503         return err;
1504 }
1505
1506 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1507                             struct nlattr *data[])
1508 {
1509         struct ip_tunnel *t, *nt;
1510         struct net *net = dev_net(dev);
1511         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1512         struct ip_tunnel_parm p;
1513         int mtu;
1514
1515         if (dev == ign->fb_tunnel_dev)
1516                 return -EINVAL;
1517
1518         nt = netdev_priv(dev);
1519         ipgre_netlink_parms(data, &p);
1520
1521         t = ipgre_tunnel_locate(net, &p, 0);
1522
1523         if (t) {
1524                 if (t->dev != dev)
1525                         return -EEXIST;
1526         } else {
1527                 unsigned nflags = 0;
1528
1529                 t = nt;
1530
1531                 if (ipv4_is_multicast(p.iph.daddr))
1532                         nflags = IFF_BROADCAST;
1533                 else if (p.iph.daddr)
1534                         nflags = IFF_POINTOPOINT;
1535
1536                 if ((dev->flags ^ nflags) &
1537                     (IFF_POINTOPOINT | IFF_BROADCAST))
1538                         return -EINVAL;
1539
1540                 ipgre_tunnel_unlink(ign, t);
1541                 t->parms.iph.saddr = p.iph.saddr;
1542                 t->parms.iph.daddr = p.iph.daddr;
1543                 t->parms.i_key = p.i_key;
1544                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1545                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1546                 ipgre_tunnel_link(ign, t);
1547                 netdev_state_change(dev);
1548         }
1549
1550         t->parms.o_key = p.o_key;
1551         t->parms.iph.ttl = p.iph.ttl;
1552         t->parms.iph.tos = p.iph.tos;
1553         t->parms.iph.frag_off = p.iph.frag_off;
1554
1555         if (t->parms.link != p.link) {
1556                 t->parms.link = p.link;
1557                 mtu = ipgre_tunnel_bind_dev(dev);
1558                 if (!tb[IFLA_MTU])
1559                         dev->mtu = mtu;
1560                 netdev_state_change(dev);
1561         }
1562
1563         return 0;
1564 }
1565
1566 static size_t ipgre_get_size(const struct net_device *dev)
1567 {
1568         return
1569                 /* IFLA_GRE_LINK */
1570                 nla_total_size(4) +
1571                 /* IFLA_GRE_IFLAGS */
1572                 nla_total_size(2) +
1573                 /* IFLA_GRE_OFLAGS */
1574                 nla_total_size(2) +
1575                 /* IFLA_GRE_IKEY */
1576                 nla_total_size(4) +
1577                 /* IFLA_GRE_OKEY */
1578                 nla_total_size(4) +
1579                 /* IFLA_GRE_LOCAL */
1580                 nla_total_size(4) +
1581                 /* IFLA_GRE_REMOTE */
1582                 nla_total_size(4) +
1583                 /* IFLA_GRE_TTL */
1584                 nla_total_size(1) +
1585                 /* IFLA_GRE_TOS */
1586                 nla_total_size(1) +
1587                 /* IFLA_GRE_PMTUDISC */
1588                 nla_total_size(1) +
1589                 0;
1590 }
1591
1592 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1593 {
1594         struct ip_tunnel *t = netdev_priv(dev);
1595         struct ip_tunnel_parm *p = &t->parms;
1596
1597         NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1598         NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1599         NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1600         NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1601         NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1602         NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1603         NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1604         NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1605         NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1606         NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1607
1608         return 0;
1609
1610 nla_put_failure:
1611         return -EMSGSIZE;
1612 }
1613
1614 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1615         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1616         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1617         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1618         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1619         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1620         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1621         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1622         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1623         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1624         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1625 };
1626
1627 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1628         .kind           = "gre",
1629         .maxtype        = IFLA_GRE_MAX,
1630         .policy         = ipgre_policy,
1631         .priv_size      = sizeof(struct ip_tunnel),
1632         .setup          = ipgre_tunnel_setup,
1633         .validate       = ipgre_tunnel_validate,
1634         .newlink        = ipgre_newlink,
1635         .changelink     = ipgre_changelink,
1636         .get_size       = ipgre_get_size,
1637         .fill_info      = ipgre_fill_info,
1638 };
1639
1640 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1641         .kind           = "gretap",
1642         .maxtype        = IFLA_GRE_MAX,
1643         .policy         = ipgre_policy,
1644         .priv_size      = sizeof(struct ip_tunnel),
1645         .setup          = ipgre_tap_setup,
1646         .validate       = ipgre_tap_validate,
1647         .newlink        = ipgre_newlink,
1648         .changelink     = ipgre_changelink,
1649         .get_size       = ipgre_get_size,
1650         .fill_info      = ipgre_fill_info,
1651 };
1652
1653 /*
1654  *      And now the modules code and kernel interface.
1655  */
1656
1657 static int __init ipgre_init(void)
1658 {
1659         int err;
1660
1661         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1662
1663         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1664                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1665                 return -EAGAIN;
1666         }
1667
1668         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1669         if (err < 0)
1670                 goto gen_device_failed;
1671
1672         err = rtnl_link_register(&ipgre_link_ops);
1673         if (err < 0)
1674                 goto rtnl_link_failed;
1675
1676         err = rtnl_link_register(&ipgre_tap_ops);
1677         if (err < 0)
1678                 goto tap_ops_failed;
1679
1680 out:
1681         return err;
1682
1683 tap_ops_failed:
1684         rtnl_link_unregister(&ipgre_link_ops);
1685 rtnl_link_failed:
1686         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1687 gen_device_failed:
1688         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1689         goto out;
1690 }
1691
1692 static void __exit ipgre_fini(void)
1693 {
1694         rtnl_link_unregister(&ipgre_tap_ops);
1695         rtnl_link_unregister(&ipgre_link_ops);
1696         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1697         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1698                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1699 }
1700
1701 module_init(ipgre_init);
1702 module_exit(ipgre_fini);
1703 MODULE_LICENSE("GPL");
1704 MODULE_ALIAS_RTNL_LINK("gre");
1705 MODULE_ALIAS_RTNL_LINK("gretap");