]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/ipv6/route.c
mtd: add "platform:" prefix for platform modalias
[mv-sheeva.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
80 static unsigned int      ip6_default_mtu(const struct dst_entry *dst);
81 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82 static void             ip6_dst_destroy(struct dst_entry *);
83 static void             ip6_dst_ifdown(struct dst_entry *,
84                                        struct net_device *dev, int how);
85 static int               ip6_dst_gc(struct dst_ops *ops);
86
87 static int              ip6_pkt_discard(struct sk_buff *skb);
88 static int              ip6_pkt_discard_out(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
92 #ifdef CONFIG_IPV6_ROUTE_INFO
93 static struct rt6_info *rt6_add_route_info(struct net *net,
94                                            struct in6_addr *prefix, int prefixlen,
95                                            struct in6_addr *gwaddr, int ifindex,
96                                            unsigned pref);
97 static struct rt6_info *rt6_get_route_info(struct net *net,
98                                            struct in6_addr *prefix, int prefixlen,
99                                            struct in6_addr *gwaddr, int ifindex);
100 #endif
101
102 static struct dst_ops ip6_dst_ops_template = {
103         .family                 =       AF_INET6,
104         .protocol               =       cpu_to_be16(ETH_P_IPV6),
105         .gc                     =       ip6_dst_gc,
106         .gc_thresh              =       1024,
107         .check                  =       ip6_dst_check,
108         .default_advmss         =       ip6_default_advmss,
109         .default_mtu            =       ip6_default_mtu,
110         .destroy                =       ip6_dst_destroy,
111         .ifdown                 =       ip6_dst_ifdown,
112         .negative_advice        =       ip6_negative_advice,
113         .link_failure           =       ip6_link_failure,
114         .update_pmtu            =       ip6_rt_update_pmtu,
115         .local_out              =       __ip6_local_out,
116 };
117
118 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
119 {
120 }
121
122 static struct dst_ops ip6_dst_blackhole_ops = {
123         .family                 =       AF_INET6,
124         .protocol               =       cpu_to_be16(ETH_P_IPV6),
125         .destroy                =       ip6_dst_destroy,
126         .check                  =       ip6_dst_check,
127         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
128 };
129
130 static struct rt6_info ip6_null_entry_template = {
131         .dst = {
132                 .__refcnt       = ATOMIC_INIT(1),
133                 .__use          = 1,
134                 .obsolete       = -1,
135                 .error          = -ENETUNREACH,
136                 .input          = ip6_pkt_discard,
137                 .output         = ip6_pkt_discard_out,
138         },
139         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
140         .rt6i_protocol  = RTPROT_KERNEL,
141         .rt6i_metric    = ~(u32) 0,
142         .rt6i_ref       = ATOMIC_INIT(1),
143 };
144
145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
147 static int ip6_pkt_prohibit(struct sk_buff *skb);
148 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
149
150 static struct rt6_info ip6_prohibit_entry_template = {
151         .dst = {
152                 .__refcnt       = ATOMIC_INIT(1),
153                 .__use          = 1,
154                 .obsolete       = -1,
155                 .error          = -EACCES,
156                 .input          = ip6_pkt_prohibit,
157                 .output         = ip6_pkt_prohibit_out,
158         },
159         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
160         .rt6i_protocol  = RTPROT_KERNEL,
161         .rt6i_metric    = ~(u32) 0,
162         .rt6i_ref       = ATOMIC_INIT(1),
163 };
164
165 static struct rt6_info ip6_blk_hole_entry_template = {
166         .dst = {
167                 .__refcnt       = ATOMIC_INIT(1),
168                 .__use          = 1,
169                 .obsolete       = -1,
170                 .error          = -EINVAL,
171                 .input          = dst_discard,
172                 .output         = dst_discard,
173         },
174         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
175         .rt6i_protocol  = RTPROT_KERNEL,
176         .rt6i_metric    = ~(u32) 0,
177         .rt6i_ref       = ATOMIC_INIT(1),
178 };
179
180 #endif
181
182 /* allocate dst with ip6_dst_ops */
183 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
184 {
185         return (struct rt6_info *)dst_alloc(ops);
186 }
187
188 static void ip6_dst_destroy(struct dst_entry *dst)
189 {
190         struct rt6_info *rt = (struct rt6_info *)dst;
191         struct inet6_dev *idev = rt->rt6i_idev;
192         struct inet_peer *peer = rt->rt6i_peer;
193
194         if (idev != NULL) {
195                 rt->rt6i_idev = NULL;
196                 in6_dev_put(idev);
197         }
198         if (peer) {
199                 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
200                 rt->rt6i_peer = NULL;
201                 inet_putpeer(peer);
202         }
203 }
204
205 void rt6_bind_peer(struct rt6_info *rt, int create)
206 {
207         struct inet_peer *peer;
208
209         if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
210                 return;
211
212         peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
213         if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
214                 inet_putpeer(peer);
215 }
216
217 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
218                            int how)
219 {
220         struct rt6_info *rt = (struct rt6_info *)dst;
221         struct inet6_dev *idev = rt->rt6i_idev;
222         struct net_device *loopback_dev =
223                 dev_net(dev)->loopback_dev;
224
225         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
226                 struct inet6_dev *loopback_idev =
227                         in6_dev_get(loopback_dev);
228                 if (loopback_idev != NULL) {
229                         rt->rt6i_idev = loopback_idev;
230                         in6_dev_put(idev);
231                 }
232         }
233 }
234
235 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
236 {
237         return (rt->rt6i_flags & RTF_EXPIRES) &&
238                 time_after(jiffies, rt->rt6i_expires);
239 }
240
241 static inline int rt6_need_strict(struct in6_addr *daddr)
242 {
243         return ipv6_addr_type(daddr) &
244                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
245 }
246
247 /*
248  *      Route lookup. Any table->tb6_lock is implied.
249  */
250
251 static inline struct rt6_info *rt6_device_match(struct net *net,
252                                                     struct rt6_info *rt,
253                                                     struct in6_addr *saddr,
254                                                     int oif,
255                                                     int flags)
256 {
257         struct rt6_info *local = NULL;
258         struct rt6_info *sprt;
259
260         if (!oif && ipv6_addr_any(saddr))
261                 goto out;
262
263         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
264                 struct net_device *dev = sprt->rt6i_dev;
265
266                 if (oif) {
267                         if (dev->ifindex == oif)
268                                 return sprt;
269                         if (dev->flags & IFF_LOOPBACK) {
270                                 if (sprt->rt6i_idev == NULL ||
271                                     sprt->rt6i_idev->dev->ifindex != oif) {
272                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
273                                                 continue;
274                                         if (local && (!oif ||
275                                                       local->rt6i_idev->dev->ifindex == oif))
276                                                 continue;
277                                 }
278                                 local = sprt;
279                         }
280                 } else {
281                         if (ipv6_chk_addr(net, saddr, dev,
282                                           flags & RT6_LOOKUP_F_IFACE))
283                                 return sprt;
284                 }
285         }
286
287         if (oif) {
288                 if (local)
289                         return local;
290
291                 if (flags & RT6_LOOKUP_F_IFACE)
292                         return net->ipv6.ip6_null_entry;
293         }
294 out:
295         return rt;
296 }
297
298 #ifdef CONFIG_IPV6_ROUTER_PREF
299 static void rt6_probe(struct rt6_info *rt)
300 {
301         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
302         /*
303          * Okay, this does not seem to be appropriate
304          * for now, however, we need to check if it
305          * is really so; aka Router Reachability Probing.
306          *
307          * Router Reachability Probe MUST be rate-limited
308          * to no more than one per minute.
309          */
310         if (!neigh || (neigh->nud_state & NUD_VALID))
311                 return;
312         read_lock_bh(&neigh->lock);
313         if (!(neigh->nud_state & NUD_VALID) &&
314             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
315                 struct in6_addr mcaddr;
316                 struct in6_addr *target;
317
318                 neigh->updated = jiffies;
319                 read_unlock_bh(&neigh->lock);
320
321                 target = (struct in6_addr *)&neigh->primary_key;
322                 addrconf_addr_solict_mult(target, &mcaddr);
323                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
324         } else
325                 read_unlock_bh(&neigh->lock);
326 }
327 #else
328 static inline void rt6_probe(struct rt6_info *rt)
329 {
330 }
331 #endif
332
333 /*
334  * Default Router Selection (RFC 2461 6.3.6)
335  */
336 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
337 {
338         struct net_device *dev = rt->rt6i_dev;
339         if (!oif || dev->ifindex == oif)
340                 return 2;
341         if ((dev->flags & IFF_LOOPBACK) &&
342             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
343                 return 1;
344         return 0;
345 }
346
347 static inline int rt6_check_neigh(struct rt6_info *rt)
348 {
349         struct neighbour *neigh = rt->rt6i_nexthop;
350         int m;
351         if (rt->rt6i_flags & RTF_NONEXTHOP ||
352             !(rt->rt6i_flags & RTF_GATEWAY))
353                 m = 1;
354         else if (neigh) {
355                 read_lock_bh(&neigh->lock);
356                 if (neigh->nud_state & NUD_VALID)
357                         m = 2;
358 #ifdef CONFIG_IPV6_ROUTER_PREF
359                 else if (neigh->nud_state & NUD_FAILED)
360                         m = 0;
361 #endif
362                 else
363                         m = 1;
364                 read_unlock_bh(&neigh->lock);
365         } else
366                 m = 0;
367         return m;
368 }
369
370 static int rt6_score_route(struct rt6_info *rt, int oif,
371                            int strict)
372 {
373         int m, n;
374
375         m = rt6_check_dev(rt, oif);
376         if (!m && (strict & RT6_LOOKUP_F_IFACE))
377                 return -1;
378 #ifdef CONFIG_IPV6_ROUTER_PREF
379         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
380 #endif
381         n = rt6_check_neigh(rt);
382         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
383                 return -1;
384         return m;
385 }
386
387 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
388                                    int *mpri, struct rt6_info *match)
389 {
390         int m;
391
392         if (rt6_check_expired(rt))
393                 goto out;
394
395         m = rt6_score_route(rt, oif, strict);
396         if (m < 0)
397                 goto out;
398
399         if (m > *mpri) {
400                 if (strict & RT6_LOOKUP_F_REACHABLE)
401                         rt6_probe(match);
402                 *mpri = m;
403                 match = rt;
404         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
405                 rt6_probe(rt);
406         }
407
408 out:
409         return match;
410 }
411
412 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
413                                      struct rt6_info *rr_head,
414                                      u32 metric, int oif, int strict)
415 {
416         struct rt6_info *rt, *match;
417         int mpri = -1;
418
419         match = NULL;
420         for (rt = rr_head; rt && rt->rt6i_metric == metric;
421              rt = rt->dst.rt6_next)
422                 match = find_match(rt, oif, strict, &mpri, match);
423         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
424              rt = rt->dst.rt6_next)
425                 match = find_match(rt, oif, strict, &mpri, match);
426
427         return match;
428 }
429
430 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
431 {
432         struct rt6_info *match, *rt0;
433         struct net *net;
434
435         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
436                   __func__, fn->leaf, oif);
437
438         rt0 = fn->rr_ptr;
439         if (!rt0)
440                 fn->rr_ptr = rt0 = fn->leaf;
441
442         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
443
444         if (!match &&
445             (strict & RT6_LOOKUP_F_REACHABLE)) {
446                 struct rt6_info *next = rt0->dst.rt6_next;
447
448                 /* no entries matched; do round-robin */
449                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
450                         next = fn->leaf;
451
452                 if (next != rt0)
453                         fn->rr_ptr = next;
454         }
455
456         RT6_TRACE("%s() => %p\n",
457                   __func__, match);
458
459         net = dev_net(rt0->rt6i_dev);
460         return match ? match : net->ipv6.ip6_null_entry;
461 }
462
463 #ifdef CONFIG_IPV6_ROUTE_INFO
464 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
465                   struct in6_addr *gwaddr)
466 {
467         struct net *net = dev_net(dev);
468         struct route_info *rinfo = (struct route_info *) opt;
469         struct in6_addr prefix_buf, *prefix;
470         unsigned int pref;
471         unsigned long lifetime;
472         struct rt6_info *rt;
473
474         if (len < sizeof(struct route_info)) {
475                 return -EINVAL;
476         }
477
478         /* Sanity check for prefix_len and length */
479         if (rinfo->length > 3) {
480                 return -EINVAL;
481         } else if (rinfo->prefix_len > 128) {
482                 return -EINVAL;
483         } else if (rinfo->prefix_len > 64) {
484                 if (rinfo->length < 2) {
485                         return -EINVAL;
486                 }
487         } else if (rinfo->prefix_len > 0) {
488                 if (rinfo->length < 1) {
489                         return -EINVAL;
490                 }
491         }
492
493         pref = rinfo->route_pref;
494         if (pref == ICMPV6_ROUTER_PREF_INVALID)
495                 return -EINVAL;
496
497         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
498
499         if (rinfo->length == 3)
500                 prefix = (struct in6_addr *)rinfo->prefix;
501         else {
502                 /* this function is safe */
503                 ipv6_addr_prefix(&prefix_buf,
504                                  (struct in6_addr *)rinfo->prefix,
505                                  rinfo->prefix_len);
506                 prefix = &prefix_buf;
507         }
508
509         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
510                                 dev->ifindex);
511
512         if (rt && !lifetime) {
513                 ip6_del_rt(rt);
514                 rt = NULL;
515         }
516
517         if (!rt && lifetime)
518                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
519                                         pref);
520         else if (rt)
521                 rt->rt6i_flags = RTF_ROUTEINFO |
522                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
523
524         if (rt) {
525                 if (!addrconf_finite_timeout(lifetime)) {
526                         rt->rt6i_flags &= ~RTF_EXPIRES;
527                 } else {
528                         rt->rt6i_expires = jiffies + HZ * lifetime;
529                         rt->rt6i_flags |= RTF_EXPIRES;
530                 }
531                 dst_release(&rt->dst);
532         }
533         return 0;
534 }
535 #endif
536
537 #define BACKTRACK(__net, saddr)                 \
538 do { \
539         if (rt == __net->ipv6.ip6_null_entry) { \
540                 struct fib6_node *pn; \
541                 while (1) { \
542                         if (fn->fn_flags & RTN_TL_ROOT) \
543                                 goto out; \
544                         pn = fn->parent; \
545                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
546                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
547                         else \
548                                 fn = pn; \
549                         if (fn->fn_flags & RTN_RTINFO) \
550                                 goto restart; \
551                 } \
552         } \
553 } while(0)
554
555 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
556                                              struct fib6_table *table,
557                                              struct flowi *fl, int flags)
558 {
559         struct fib6_node *fn;
560         struct rt6_info *rt;
561
562         read_lock_bh(&table->tb6_lock);
563         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
564 restart:
565         rt = fn->leaf;
566         rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
567         BACKTRACK(net, &fl->fl6_src);
568 out:
569         dst_use(&rt->dst, jiffies);
570         read_unlock_bh(&table->tb6_lock);
571         return rt;
572
573 }
574
575 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
576                             const struct in6_addr *saddr, int oif, int strict)
577 {
578         struct flowi fl = {
579                 .oif = oif,
580                 .fl6_dst = *daddr,
581         };
582         struct dst_entry *dst;
583         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
584
585         if (saddr) {
586                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
587                 flags |= RT6_LOOKUP_F_HAS_SADDR;
588         }
589
590         dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
591         if (dst->error == 0)
592                 return (struct rt6_info *) dst;
593
594         dst_release(dst);
595
596         return NULL;
597 }
598
599 EXPORT_SYMBOL(rt6_lookup);
600
601 /* ip6_ins_rt is called with FREE table->tb6_lock.
602    It takes new route entry, the addition fails by any reason the
603    route is freed. In any case, if caller does not hold it, it may
604    be destroyed.
605  */
606
607 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
608 {
609         int err;
610         struct fib6_table *table;
611
612         table = rt->rt6i_table;
613         write_lock_bh(&table->tb6_lock);
614         err = fib6_add(&table->tb6_root, rt, info);
615         write_unlock_bh(&table->tb6_lock);
616
617         return err;
618 }
619
620 int ip6_ins_rt(struct rt6_info *rt)
621 {
622         struct nl_info info = {
623                 .nl_net = dev_net(rt->rt6i_dev),
624         };
625         return __ip6_ins_rt(rt, &info);
626 }
627
628 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
629                                       struct in6_addr *saddr)
630 {
631         struct rt6_info *rt;
632
633         /*
634          *      Clone the route.
635          */
636
637         rt = ip6_rt_copy(ort);
638
639         if (rt) {
640                 struct neighbour *neigh;
641                 int attempts = !in_softirq();
642
643                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
644                         if (rt->rt6i_dst.plen != 128 &&
645                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
646                                 rt->rt6i_flags |= RTF_ANYCAST;
647                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
648                 }
649
650                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
651                 rt->rt6i_dst.plen = 128;
652                 rt->rt6i_flags |= RTF_CACHE;
653                 rt->dst.flags |= DST_HOST;
654
655 #ifdef CONFIG_IPV6_SUBTREES
656                 if (rt->rt6i_src.plen && saddr) {
657                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
658                         rt->rt6i_src.plen = 128;
659                 }
660 #endif
661
662         retry:
663                 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
664                 if (IS_ERR(neigh)) {
665                         struct net *net = dev_net(rt->rt6i_dev);
666                         int saved_rt_min_interval =
667                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
668                         int saved_rt_elasticity =
669                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
670
671                         if (attempts-- > 0) {
672                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
673                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
674
675                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
676
677                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
678                                         saved_rt_elasticity;
679                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
680                                         saved_rt_min_interval;
681                                 goto retry;
682                         }
683
684                         if (net_ratelimit())
685                                 printk(KERN_WARNING
686                                        "ipv6: Neighbour table overflow.\n");
687                         dst_free(&rt->dst);
688                         return NULL;
689                 }
690                 rt->rt6i_nexthop = neigh;
691
692         }
693
694         return rt;
695 }
696
697 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
698 {
699         struct rt6_info *rt = ip6_rt_copy(ort);
700         if (rt) {
701                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
702                 rt->rt6i_dst.plen = 128;
703                 rt->rt6i_flags |= RTF_CACHE;
704                 rt->dst.flags |= DST_HOST;
705                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
706         }
707         return rt;
708 }
709
710 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
711                                       struct flowi *fl, int flags)
712 {
713         struct fib6_node *fn;
714         struct rt6_info *rt, *nrt;
715         int strict = 0;
716         int attempts = 3;
717         int err;
718         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
719
720         strict |= flags & RT6_LOOKUP_F_IFACE;
721
722 relookup:
723         read_lock_bh(&table->tb6_lock);
724
725 restart_2:
726         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
727
728 restart:
729         rt = rt6_select(fn, oif, strict | reachable);
730
731         BACKTRACK(net, &fl->fl6_src);
732         if (rt == net->ipv6.ip6_null_entry ||
733             rt->rt6i_flags & RTF_CACHE)
734                 goto out;
735
736         dst_hold(&rt->dst);
737         read_unlock_bh(&table->tb6_lock);
738
739         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
740                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
741         else {
742 #if CLONE_OFFLINK_ROUTE
743                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
744 #else
745                 goto out2;
746 #endif
747         }
748
749         dst_release(&rt->dst);
750         rt = nrt ? : net->ipv6.ip6_null_entry;
751
752         dst_hold(&rt->dst);
753         if (nrt) {
754                 err = ip6_ins_rt(nrt);
755                 if (!err)
756                         goto out2;
757         }
758
759         if (--attempts <= 0)
760                 goto out2;
761
762         /*
763          * Race condition! In the gap, when table->tb6_lock was
764          * released someone could insert this route.  Relookup.
765          */
766         dst_release(&rt->dst);
767         goto relookup;
768
769 out:
770         if (reachable) {
771                 reachable = 0;
772                 goto restart_2;
773         }
774         dst_hold(&rt->dst);
775         read_unlock_bh(&table->tb6_lock);
776 out2:
777         rt->dst.lastuse = jiffies;
778         rt->dst.__use++;
779
780         return rt;
781 }
782
783 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
784                                             struct flowi *fl, int flags)
785 {
786         return ip6_pol_route(net, table, fl->iif, fl, flags);
787 }
788
789 void ip6_route_input(struct sk_buff *skb)
790 {
791         struct ipv6hdr *iph = ipv6_hdr(skb);
792         struct net *net = dev_net(skb->dev);
793         int flags = RT6_LOOKUP_F_HAS_SADDR;
794         struct flowi fl = {
795                 .iif = skb->dev->ifindex,
796                 .fl6_dst = iph->daddr,
797                 .fl6_src = iph->saddr,
798                 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
799                 .mark = skb->mark,
800                 .proto = iph->nexthdr,
801         };
802
803         if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
804                 flags |= RT6_LOOKUP_F_IFACE;
805
806         skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
807 }
808
809 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
810                                              struct flowi *fl, int flags)
811 {
812         return ip6_pol_route(net, table, fl->oif, fl, flags);
813 }
814
815 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
816                                     struct flowi *fl)
817 {
818         int flags = 0;
819
820         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
821                 flags |= RT6_LOOKUP_F_IFACE;
822
823         if (!ipv6_addr_any(&fl->fl6_src))
824                 flags |= RT6_LOOKUP_F_HAS_SADDR;
825         else if (sk)
826                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
827
828         return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
829 }
830
831 EXPORT_SYMBOL(ip6_route_output);
832
833 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
834 {
835         struct rt6_info *ort = (struct rt6_info *) *dstp;
836         struct rt6_info *rt = (struct rt6_info *)
837                 dst_alloc(&ip6_dst_blackhole_ops);
838         struct dst_entry *new = NULL;
839
840         if (rt) {
841                 new = &rt->dst;
842
843                 atomic_set(&new->__refcnt, 1);
844                 new->__use = 1;
845                 new->input = dst_discard;
846                 new->output = dst_discard;
847
848                 dst_copy_metrics(new, &ort->dst);
849                 new->dev = ort->dst.dev;
850                 if (new->dev)
851                         dev_hold(new->dev);
852                 rt->rt6i_idev = ort->rt6i_idev;
853                 if (rt->rt6i_idev)
854                         in6_dev_hold(rt->rt6i_idev);
855                 rt->rt6i_expires = 0;
856
857                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
858                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
859                 rt->rt6i_metric = 0;
860
861                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
862 #ifdef CONFIG_IPV6_SUBTREES
863                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
864 #endif
865
866                 dst_free(new);
867         }
868
869         dst_release(*dstp);
870         *dstp = new;
871         return new ? 0 : -ENOMEM;
872 }
873 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
874
875 /*
876  *      Destination cache support functions
877  */
878
879 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
880 {
881         struct rt6_info *rt;
882
883         rt = (struct rt6_info *) dst;
884
885         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
886                 return dst;
887
888         return NULL;
889 }
890
891 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
892 {
893         struct rt6_info *rt = (struct rt6_info *) dst;
894
895         if (rt) {
896                 if (rt->rt6i_flags & RTF_CACHE) {
897                         if (rt6_check_expired(rt)) {
898                                 ip6_del_rt(rt);
899                                 dst = NULL;
900                         }
901                 } else {
902                         dst_release(dst);
903                         dst = NULL;
904                 }
905         }
906         return dst;
907 }
908
909 static void ip6_link_failure(struct sk_buff *skb)
910 {
911         struct rt6_info *rt;
912
913         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
914
915         rt = (struct rt6_info *) skb_dst(skb);
916         if (rt) {
917                 if (rt->rt6i_flags&RTF_CACHE) {
918                         dst_set_expires(&rt->dst, 0);
919                         rt->rt6i_flags |= RTF_EXPIRES;
920                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
921                         rt->rt6i_node->fn_sernum = -1;
922         }
923 }
924
925 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
926 {
927         struct rt6_info *rt6 = (struct rt6_info*)dst;
928
929         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
930                 rt6->rt6i_flags |= RTF_MODIFIED;
931                 if (mtu < IPV6_MIN_MTU) {
932                         u32 features = dst_metric(dst, RTAX_FEATURES);
933                         mtu = IPV6_MIN_MTU;
934                         features |= RTAX_FEATURE_ALLFRAG;
935                         dst_metric_set(dst, RTAX_FEATURES, features);
936                 }
937                 dst_metric_set(dst, RTAX_MTU, mtu);
938                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
939         }
940 }
941
942 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
943 {
944         struct net_device *dev = dst->dev;
945         unsigned int mtu = dst_mtu(dst);
946         struct net *net = dev_net(dev);
947
948         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
949
950         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
951                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
952
953         /*
954          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
955          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
956          * IPV6_MAXPLEN is also valid and means: "any MSS,
957          * rely only on pmtu discovery"
958          */
959         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
960                 mtu = IPV6_MAXPLEN;
961         return mtu;
962 }
963
964 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
965 {
966         unsigned int mtu = IPV6_MIN_MTU;
967         struct inet6_dev *idev;
968
969         rcu_read_lock();
970         idev = __in6_dev_get(dst->dev);
971         if (idev)
972                 mtu = idev->cnf.mtu6;
973         rcu_read_unlock();
974
975         return mtu;
976 }
977
978 static struct dst_entry *icmp6_dst_gc_list;
979 static DEFINE_SPINLOCK(icmp6_dst_lock);
980
981 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
982                                   struct neighbour *neigh,
983                                   const struct in6_addr *addr)
984 {
985         struct rt6_info *rt;
986         struct inet6_dev *idev = in6_dev_get(dev);
987         struct net *net = dev_net(dev);
988
989         if (unlikely(idev == NULL))
990                 return NULL;
991
992         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
993         if (unlikely(rt == NULL)) {
994                 in6_dev_put(idev);
995                 goto out;
996         }
997
998         dev_hold(dev);
999         if (neigh)
1000                 neigh_hold(neigh);
1001         else {
1002                 neigh = ndisc_get_neigh(dev, addr);
1003                 if (IS_ERR(neigh))
1004                         neigh = NULL;
1005         }
1006
1007         rt->rt6i_dev      = dev;
1008         rt->rt6i_idev     = idev;
1009         rt->rt6i_nexthop  = neigh;
1010         atomic_set(&rt->dst.__refcnt, 1);
1011         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1012         rt->dst.output  = ip6_output;
1013
1014 #if 0   /* there's no chance to use these for ndisc */
1015         rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1016                                 ? DST_HOST
1017                                 : 0;
1018         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1019         rt->rt6i_dst.plen = 128;
1020 #endif
1021
1022         spin_lock_bh(&icmp6_dst_lock);
1023         rt->dst.next = icmp6_dst_gc_list;
1024         icmp6_dst_gc_list = &rt->dst;
1025         spin_unlock_bh(&icmp6_dst_lock);
1026
1027         fib6_force_start_gc(net);
1028
1029 out:
1030         return &rt->dst;
1031 }
1032
1033 int icmp6_dst_gc(void)
1034 {
1035         struct dst_entry *dst, *next, **pprev;
1036         int more = 0;
1037
1038         next = NULL;
1039
1040         spin_lock_bh(&icmp6_dst_lock);
1041         pprev = &icmp6_dst_gc_list;
1042
1043         while ((dst = *pprev) != NULL) {
1044                 if (!atomic_read(&dst->__refcnt)) {
1045                         *pprev = dst->next;
1046                         dst_free(dst);
1047                 } else {
1048                         pprev = &dst->next;
1049                         ++more;
1050                 }
1051         }
1052
1053         spin_unlock_bh(&icmp6_dst_lock);
1054
1055         return more;
1056 }
1057
1058 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1059                             void *arg)
1060 {
1061         struct dst_entry *dst, **pprev;
1062
1063         spin_lock_bh(&icmp6_dst_lock);
1064         pprev = &icmp6_dst_gc_list;
1065         while ((dst = *pprev) != NULL) {
1066                 struct rt6_info *rt = (struct rt6_info *) dst;
1067                 if (func(rt, arg)) {
1068                         *pprev = dst->next;
1069                         dst_free(dst);
1070                 } else {
1071                         pprev = &dst->next;
1072                 }
1073         }
1074         spin_unlock_bh(&icmp6_dst_lock);
1075 }
1076
1077 static int ip6_dst_gc(struct dst_ops *ops)
1078 {
1079         unsigned long now = jiffies;
1080         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1081         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1082         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1083         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1084         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1085         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1086         int entries;
1087
1088         entries = dst_entries_get_fast(ops);
1089         if (time_after(rt_last_gc + rt_min_interval, now) &&
1090             entries <= rt_max_size)
1091                 goto out;
1092
1093         net->ipv6.ip6_rt_gc_expire++;
1094         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1095         net->ipv6.ip6_rt_last_gc = now;
1096         entries = dst_entries_get_slow(ops);
1097         if (entries < ops->gc_thresh)
1098                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1099 out:
1100         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1101         return entries > rt_max_size;
1102 }
1103
1104 /* Clean host part of a prefix. Not necessary in radix tree,
1105    but results in cleaner routing tables.
1106
1107    Remove it only when all the things will work!
1108  */
1109
1110 int ip6_dst_hoplimit(struct dst_entry *dst)
1111 {
1112         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1113         if (hoplimit == 0) {
1114                 struct net_device *dev = dst->dev;
1115                 struct inet6_dev *idev;
1116
1117                 rcu_read_lock();
1118                 idev = __in6_dev_get(dev);
1119                 if (idev)
1120                         hoplimit = idev->cnf.hop_limit;
1121                 else
1122                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1123                 rcu_read_unlock();
1124         }
1125         return hoplimit;
1126 }
1127 EXPORT_SYMBOL(ip6_dst_hoplimit);
1128
1129 /*
1130  *
1131  */
1132
1133 int ip6_route_add(struct fib6_config *cfg)
1134 {
1135         int err;
1136         struct net *net = cfg->fc_nlinfo.nl_net;
1137         struct rt6_info *rt = NULL;
1138         struct net_device *dev = NULL;
1139         struct inet6_dev *idev = NULL;
1140         struct fib6_table *table;
1141         int addr_type;
1142
1143         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1144                 return -EINVAL;
1145 #ifndef CONFIG_IPV6_SUBTREES
1146         if (cfg->fc_src_len)
1147                 return -EINVAL;
1148 #endif
1149         if (cfg->fc_ifindex) {
1150                 err = -ENODEV;
1151                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1152                 if (!dev)
1153                         goto out;
1154                 idev = in6_dev_get(dev);
1155                 if (!idev)
1156                         goto out;
1157         }
1158
1159         if (cfg->fc_metric == 0)
1160                 cfg->fc_metric = IP6_RT_PRIO_USER;
1161
1162         table = fib6_new_table(net, cfg->fc_table);
1163         if (table == NULL) {
1164                 err = -ENOBUFS;
1165                 goto out;
1166         }
1167
1168         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1169
1170         if (rt == NULL) {
1171                 err = -ENOMEM;
1172                 goto out;
1173         }
1174
1175         rt->dst.obsolete = -1;
1176         rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1177                                 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1178                                 0;
1179
1180         if (cfg->fc_protocol == RTPROT_UNSPEC)
1181                 cfg->fc_protocol = RTPROT_BOOT;
1182         rt->rt6i_protocol = cfg->fc_protocol;
1183
1184         addr_type = ipv6_addr_type(&cfg->fc_dst);
1185
1186         if (addr_type & IPV6_ADDR_MULTICAST)
1187                 rt->dst.input = ip6_mc_input;
1188         else if (cfg->fc_flags & RTF_LOCAL)
1189                 rt->dst.input = ip6_input;
1190         else
1191                 rt->dst.input = ip6_forward;
1192
1193         rt->dst.output = ip6_output;
1194
1195         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1196         rt->rt6i_dst.plen = cfg->fc_dst_len;
1197         if (rt->rt6i_dst.plen == 128)
1198                rt->dst.flags = DST_HOST;
1199
1200 #ifdef CONFIG_IPV6_SUBTREES
1201         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1202         rt->rt6i_src.plen = cfg->fc_src_len;
1203 #endif
1204
1205         rt->rt6i_metric = cfg->fc_metric;
1206
1207         /* We cannot add true routes via loopback here,
1208            they would result in kernel looping; promote them to reject routes
1209          */
1210         if ((cfg->fc_flags & RTF_REJECT) ||
1211             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1212                                               && !(cfg->fc_flags&RTF_LOCAL))) {
1213                 /* hold loopback dev/idev if we haven't done so. */
1214                 if (dev != net->loopback_dev) {
1215                         if (dev) {
1216                                 dev_put(dev);
1217                                 in6_dev_put(idev);
1218                         }
1219                         dev = net->loopback_dev;
1220                         dev_hold(dev);
1221                         idev = in6_dev_get(dev);
1222                         if (!idev) {
1223                                 err = -ENODEV;
1224                                 goto out;
1225                         }
1226                 }
1227                 rt->dst.output = ip6_pkt_discard_out;
1228                 rt->dst.input = ip6_pkt_discard;
1229                 rt->dst.error = -ENETUNREACH;
1230                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1231                 goto install_route;
1232         }
1233
1234         if (cfg->fc_flags & RTF_GATEWAY) {
1235                 struct in6_addr *gw_addr;
1236                 int gwa_type;
1237
1238                 gw_addr = &cfg->fc_gateway;
1239                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1240                 gwa_type = ipv6_addr_type(gw_addr);
1241
1242                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1243                         struct rt6_info *grt;
1244
1245                         /* IPv6 strictly inhibits using not link-local
1246                            addresses as nexthop address.
1247                            Otherwise, router will not able to send redirects.
1248                            It is very good, but in some (rare!) circumstances
1249                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1250                            some exceptions. --ANK
1251                          */
1252                         err = -EINVAL;
1253                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1254                                 goto out;
1255
1256                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1257
1258                         err = -EHOSTUNREACH;
1259                         if (grt == NULL)
1260                                 goto out;
1261                         if (dev) {
1262                                 if (dev != grt->rt6i_dev) {
1263                                         dst_release(&grt->dst);
1264                                         goto out;
1265                                 }
1266                         } else {
1267                                 dev = grt->rt6i_dev;
1268                                 idev = grt->rt6i_idev;
1269                                 dev_hold(dev);
1270                                 in6_dev_hold(grt->rt6i_idev);
1271                         }
1272                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1273                                 err = 0;
1274                         dst_release(&grt->dst);
1275
1276                         if (err)
1277                                 goto out;
1278                 }
1279                 err = -EINVAL;
1280                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1281                         goto out;
1282         }
1283
1284         err = -ENODEV;
1285         if (dev == NULL)
1286                 goto out;
1287
1288         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1289                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1290                 if (IS_ERR(rt->rt6i_nexthop)) {
1291                         err = PTR_ERR(rt->rt6i_nexthop);
1292                         rt->rt6i_nexthop = NULL;
1293                         goto out;
1294                 }
1295         }
1296
1297         rt->rt6i_flags = cfg->fc_flags;
1298
1299 install_route:
1300         if (cfg->fc_mx) {
1301                 struct nlattr *nla;
1302                 int remaining;
1303
1304                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1305                         int type = nla_type(nla);
1306
1307                         if (type) {
1308                                 if (type > RTAX_MAX) {
1309                                         err = -EINVAL;
1310                                         goto out;
1311                                 }
1312
1313                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1314                         }
1315                 }
1316         }
1317
1318         rt->dst.dev = dev;
1319         rt->rt6i_idev = idev;
1320         rt->rt6i_table = table;
1321
1322         cfg->fc_nlinfo.nl_net = dev_net(dev);
1323
1324         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1325
1326 out:
1327         if (dev)
1328                 dev_put(dev);
1329         if (idev)
1330                 in6_dev_put(idev);
1331         if (rt)
1332                 dst_free(&rt->dst);
1333         return err;
1334 }
1335
1336 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1337 {
1338         int err;
1339         struct fib6_table *table;
1340         struct net *net = dev_net(rt->rt6i_dev);
1341
1342         if (rt == net->ipv6.ip6_null_entry)
1343                 return -ENOENT;
1344
1345         table = rt->rt6i_table;
1346         write_lock_bh(&table->tb6_lock);
1347
1348         err = fib6_del(rt, info);
1349         dst_release(&rt->dst);
1350
1351         write_unlock_bh(&table->tb6_lock);
1352
1353         return err;
1354 }
1355
1356 int ip6_del_rt(struct rt6_info *rt)
1357 {
1358         struct nl_info info = {
1359                 .nl_net = dev_net(rt->rt6i_dev),
1360         };
1361         return __ip6_del_rt(rt, &info);
1362 }
1363
1364 static int ip6_route_del(struct fib6_config *cfg)
1365 {
1366         struct fib6_table *table;
1367         struct fib6_node *fn;
1368         struct rt6_info *rt;
1369         int err = -ESRCH;
1370
1371         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1372         if (table == NULL)
1373                 return err;
1374
1375         read_lock_bh(&table->tb6_lock);
1376
1377         fn = fib6_locate(&table->tb6_root,
1378                          &cfg->fc_dst, cfg->fc_dst_len,
1379                          &cfg->fc_src, cfg->fc_src_len);
1380
1381         if (fn) {
1382                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1383                         if (cfg->fc_ifindex &&
1384                             (rt->rt6i_dev == NULL ||
1385                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1386                                 continue;
1387                         if (cfg->fc_flags & RTF_GATEWAY &&
1388                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1389                                 continue;
1390                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1391                                 continue;
1392                         dst_hold(&rt->dst);
1393                         read_unlock_bh(&table->tb6_lock);
1394
1395                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1396                 }
1397         }
1398         read_unlock_bh(&table->tb6_lock);
1399
1400         return err;
1401 }
1402
1403 /*
1404  *      Handle redirects
1405  */
1406 struct ip6rd_flowi {
1407         struct flowi fl;
1408         struct in6_addr gateway;
1409 };
1410
1411 static struct rt6_info *__ip6_route_redirect(struct net *net,
1412                                              struct fib6_table *table,
1413                                              struct flowi *fl,
1414                                              int flags)
1415 {
1416         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1417         struct rt6_info *rt;
1418         struct fib6_node *fn;
1419
1420         /*
1421          * Get the "current" route for this destination and
1422          * check if the redirect has come from approriate router.
1423          *
1424          * RFC 2461 specifies that redirects should only be
1425          * accepted if they come from the nexthop to the target.
1426          * Due to the way the routes are chosen, this notion
1427          * is a bit fuzzy and one might need to check all possible
1428          * routes.
1429          */
1430
1431         read_lock_bh(&table->tb6_lock);
1432         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1433 restart:
1434         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1435                 /*
1436                  * Current route is on-link; redirect is always invalid.
1437                  *
1438                  * Seems, previous statement is not true. It could
1439                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1440                  * But then router serving it might decide, that we should
1441                  * know truth 8)8) --ANK (980726).
1442                  */
1443                 if (rt6_check_expired(rt))
1444                         continue;
1445                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1446                         continue;
1447                 if (fl->oif != rt->rt6i_dev->ifindex)
1448                         continue;
1449                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1450                         continue;
1451                 break;
1452         }
1453
1454         if (!rt)
1455                 rt = net->ipv6.ip6_null_entry;
1456         BACKTRACK(net, &fl->fl6_src);
1457 out:
1458         dst_hold(&rt->dst);
1459
1460         read_unlock_bh(&table->tb6_lock);
1461
1462         return rt;
1463 };
1464
1465 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1466                                            struct in6_addr *src,
1467                                            struct in6_addr *gateway,
1468                                            struct net_device *dev)
1469 {
1470         int flags = RT6_LOOKUP_F_HAS_SADDR;
1471         struct net *net = dev_net(dev);
1472         struct ip6rd_flowi rdfl = {
1473                 .fl = {
1474                         .oif = dev->ifindex,
1475                         .fl6_dst = *dest,
1476                         .fl6_src = *src,
1477                 },
1478         };
1479
1480         ipv6_addr_copy(&rdfl.gateway, gateway);
1481
1482         if (rt6_need_strict(dest))
1483                 flags |= RT6_LOOKUP_F_IFACE;
1484
1485         return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1486                                                    flags, __ip6_route_redirect);
1487 }
1488
1489 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1490                   struct in6_addr *saddr,
1491                   struct neighbour *neigh, u8 *lladdr, int on_link)
1492 {
1493         struct rt6_info *rt, *nrt = NULL;
1494         struct netevent_redirect netevent;
1495         struct net *net = dev_net(neigh->dev);
1496
1497         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1498
1499         if (rt == net->ipv6.ip6_null_entry) {
1500                 if (net_ratelimit())
1501                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1502                                "for redirect target\n");
1503                 goto out;
1504         }
1505
1506         /*
1507          *      We have finally decided to accept it.
1508          */
1509
1510         neigh_update(neigh, lladdr, NUD_STALE,
1511                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1512                      NEIGH_UPDATE_F_OVERRIDE|
1513                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1514                                      NEIGH_UPDATE_F_ISROUTER))
1515                      );
1516
1517         /*
1518          * Redirect received -> path was valid.
1519          * Look, redirects are sent only in response to data packets,
1520          * so that this nexthop apparently is reachable. --ANK
1521          */
1522         dst_confirm(&rt->dst);
1523
1524         /* Duplicate redirect: silently ignore. */
1525         if (neigh == rt->dst.neighbour)
1526                 goto out;
1527
1528         nrt = ip6_rt_copy(rt);
1529         if (nrt == NULL)
1530                 goto out;
1531
1532         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1533         if (on_link)
1534                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1535
1536         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1537         nrt->rt6i_dst.plen = 128;
1538         nrt->dst.flags |= DST_HOST;
1539
1540         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1541         nrt->rt6i_nexthop = neigh_clone(neigh);
1542
1543         if (ip6_ins_rt(nrt))
1544                 goto out;
1545
1546         netevent.old = &rt->dst;
1547         netevent.new = &nrt->dst;
1548         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549
1550         if (rt->rt6i_flags&RTF_CACHE) {
1551                 ip6_del_rt(rt);
1552                 return;
1553         }
1554
1555 out:
1556         dst_release(&rt->dst);
1557 }
1558
1559 /*
1560  *      Handle ICMP "packet too big" messages
1561  *      i.e. Path MTU discovery
1562  */
1563
1564 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1565                              struct net *net, u32 pmtu, int ifindex)
1566 {
1567         struct rt6_info *rt, *nrt;
1568         int allfrag = 0;
1569 again:
1570         rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1571         if (rt == NULL)
1572                 return;
1573
1574         if (rt6_check_expired(rt)) {
1575                 ip6_del_rt(rt);
1576                 goto again;
1577         }
1578
1579         if (pmtu >= dst_mtu(&rt->dst))
1580                 goto out;
1581
1582         if (pmtu < IPV6_MIN_MTU) {
1583                 /*
1584                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1585                  * MTU (1280) and a fragment header should always be included
1586                  * after a node receiving Too Big message reporting PMTU is
1587                  * less than the IPv6 Minimum Link MTU.
1588                  */
1589                 pmtu = IPV6_MIN_MTU;
1590                 allfrag = 1;
1591         }
1592
1593         /* New mtu received -> path was valid.
1594            They are sent only in response to data packets,
1595            so that this nexthop apparently is reachable. --ANK
1596          */
1597         dst_confirm(&rt->dst);
1598
1599         /* Host route. If it is static, it would be better
1600            not to override it, but add new one, so that
1601            when cache entry will expire old pmtu
1602            would return automatically.
1603          */
1604         if (rt->rt6i_flags & RTF_CACHE) {
1605                 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1606                 if (allfrag) {
1607                         u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1608                         features |= RTAX_FEATURE_ALLFRAG;
1609                         dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1610                 }
1611                 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1612                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1613                 goto out;
1614         }
1615
1616         /* Network route.
1617            Two cases are possible:
1618            1. It is connected route. Action: COW
1619            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1620          */
1621         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1622                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1623         else
1624                 nrt = rt6_alloc_clone(rt, daddr);
1625
1626         if (nrt) {
1627                 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1628                 if (allfrag) {
1629                         u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1630                         features |= RTAX_FEATURE_ALLFRAG;
1631                         dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1632                 }
1633
1634                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1635                  * happened within 5 mins, the recommended timer is 10 mins.
1636                  * Here this route expiration time is set to ip6_rt_mtu_expires
1637                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1638                  * and detecting PMTU increase will be automatically happened.
1639                  */
1640                 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1641                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1642
1643                 ip6_ins_rt(nrt);
1644         }
1645 out:
1646         dst_release(&rt->dst);
1647 }
1648
1649 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1650                         struct net_device *dev, u32 pmtu)
1651 {
1652         struct net *net = dev_net(dev);
1653
1654         /*
1655          * RFC 1981 states that a node "MUST reduce the size of the packets it
1656          * is sending along the path" that caused the Packet Too Big message.
1657          * Since it's not possible in the general case to determine which
1658          * interface was used to send the original packet, we update the MTU
1659          * on the interface that will be used to send future packets. We also
1660          * update the MTU on the interface that received the Packet Too Big in
1661          * case the original packet was forced out that interface with
1662          * SO_BINDTODEVICE or similar. This is the next best thing to the
1663          * correct behaviour, which would be to update the MTU on all
1664          * interfaces.
1665          */
1666         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1667         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1668 }
1669
1670 /*
1671  *      Misc support functions
1672  */
1673
1674 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1675 {
1676         struct net *net = dev_net(ort->rt6i_dev);
1677         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1678
1679         if (rt) {
1680                 rt->dst.input = ort->dst.input;
1681                 rt->dst.output = ort->dst.output;
1682
1683                 dst_copy_metrics(&rt->dst, &ort->dst);
1684                 rt->dst.error = ort->dst.error;
1685                 rt->dst.dev = ort->dst.dev;
1686                 if (rt->dst.dev)
1687                         dev_hold(rt->dst.dev);
1688                 rt->rt6i_idev = ort->rt6i_idev;
1689                 if (rt->rt6i_idev)
1690                         in6_dev_hold(rt->rt6i_idev);
1691                 rt->dst.lastuse = jiffies;
1692                 rt->rt6i_expires = 0;
1693
1694                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1695                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1696                 rt->rt6i_metric = 0;
1697
1698                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1699 #ifdef CONFIG_IPV6_SUBTREES
1700                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1701 #endif
1702                 rt->rt6i_table = ort->rt6i_table;
1703         }
1704         return rt;
1705 }
1706
1707 #ifdef CONFIG_IPV6_ROUTE_INFO
1708 static struct rt6_info *rt6_get_route_info(struct net *net,
1709                                            struct in6_addr *prefix, int prefixlen,
1710                                            struct in6_addr *gwaddr, int ifindex)
1711 {
1712         struct fib6_node *fn;
1713         struct rt6_info *rt = NULL;
1714         struct fib6_table *table;
1715
1716         table = fib6_get_table(net, RT6_TABLE_INFO);
1717         if (table == NULL)
1718                 return NULL;
1719
1720         write_lock_bh(&table->tb6_lock);
1721         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1722         if (!fn)
1723                 goto out;
1724
1725         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1726                 if (rt->rt6i_dev->ifindex != ifindex)
1727                         continue;
1728                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1729                         continue;
1730                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1731                         continue;
1732                 dst_hold(&rt->dst);
1733                 break;
1734         }
1735 out:
1736         write_unlock_bh(&table->tb6_lock);
1737         return rt;
1738 }
1739
1740 static struct rt6_info *rt6_add_route_info(struct net *net,
1741                                            struct in6_addr *prefix, int prefixlen,
1742                                            struct in6_addr *gwaddr, int ifindex,
1743                                            unsigned pref)
1744 {
1745         struct fib6_config cfg = {
1746                 .fc_table       = RT6_TABLE_INFO,
1747                 .fc_metric      = IP6_RT_PRIO_USER,
1748                 .fc_ifindex     = ifindex,
1749                 .fc_dst_len     = prefixlen,
1750                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1751                                   RTF_UP | RTF_PREF(pref),
1752                 .fc_nlinfo.pid = 0,
1753                 .fc_nlinfo.nlh = NULL,
1754                 .fc_nlinfo.nl_net = net,
1755         };
1756
1757         ipv6_addr_copy(&cfg.fc_dst, prefix);
1758         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1759
1760         /* We should treat it as a default route if prefix length is 0. */
1761         if (!prefixlen)
1762                 cfg.fc_flags |= RTF_DEFAULT;
1763
1764         ip6_route_add(&cfg);
1765
1766         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1767 }
1768 #endif
1769
1770 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1771 {
1772         struct rt6_info *rt;
1773         struct fib6_table *table;
1774
1775         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1776         if (table == NULL)
1777                 return NULL;
1778
1779         write_lock_bh(&table->tb6_lock);
1780         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1781                 if (dev == rt->rt6i_dev &&
1782                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1783                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1784                         break;
1785         }
1786         if (rt)
1787                 dst_hold(&rt->dst);
1788         write_unlock_bh(&table->tb6_lock);
1789         return rt;
1790 }
1791
1792 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1793                                      struct net_device *dev,
1794                                      unsigned int pref)
1795 {
1796         struct fib6_config cfg = {
1797                 .fc_table       = RT6_TABLE_DFLT,
1798                 .fc_metric      = IP6_RT_PRIO_USER,
1799                 .fc_ifindex     = dev->ifindex,
1800                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1801                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1802                 .fc_nlinfo.pid = 0,
1803                 .fc_nlinfo.nlh = NULL,
1804                 .fc_nlinfo.nl_net = dev_net(dev),
1805         };
1806
1807         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1808
1809         ip6_route_add(&cfg);
1810
1811         return rt6_get_dflt_router(gwaddr, dev);
1812 }
1813
1814 void rt6_purge_dflt_routers(struct net *net)
1815 {
1816         struct rt6_info *rt;
1817         struct fib6_table *table;
1818
1819         /* NOTE: Keep consistent with rt6_get_dflt_router */
1820         table = fib6_get_table(net, RT6_TABLE_DFLT);
1821         if (table == NULL)
1822                 return;
1823
1824 restart:
1825         read_lock_bh(&table->tb6_lock);
1826         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1827                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1828                         dst_hold(&rt->dst);
1829                         read_unlock_bh(&table->tb6_lock);
1830                         ip6_del_rt(rt);
1831                         goto restart;
1832                 }
1833         }
1834         read_unlock_bh(&table->tb6_lock);
1835 }
1836
1837 static void rtmsg_to_fib6_config(struct net *net,
1838                                  struct in6_rtmsg *rtmsg,
1839                                  struct fib6_config *cfg)
1840 {
1841         memset(cfg, 0, sizeof(*cfg));
1842
1843         cfg->fc_table = RT6_TABLE_MAIN;
1844         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1845         cfg->fc_metric = rtmsg->rtmsg_metric;
1846         cfg->fc_expires = rtmsg->rtmsg_info;
1847         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1848         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1849         cfg->fc_flags = rtmsg->rtmsg_flags;
1850
1851         cfg->fc_nlinfo.nl_net = net;
1852
1853         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1854         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1855         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1856 }
1857
1858 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1859 {
1860         struct fib6_config cfg;
1861         struct in6_rtmsg rtmsg;
1862         int err;
1863
1864         switch(cmd) {
1865         case SIOCADDRT:         /* Add a route */
1866         case SIOCDELRT:         /* Delete a route */
1867                 if (!capable(CAP_NET_ADMIN))
1868                         return -EPERM;
1869                 err = copy_from_user(&rtmsg, arg,
1870                                      sizeof(struct in6_rtmsg));
1871                 if (err)
1872                         return -EFAULT;
1873
1874                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1875
1876                 rtnl_lock();
1877                 switch (cmd) {
1878                 case SIOCADDRT:
1879                         err = ip6_route_add(&cfg);
1880                         break;
1881                 case SIOCDELRT:
1882                         err = ip6_route_del(&cfg);
1883                         break;
1884                 default:
1885                         err = -EINVAL;
1886                 }
1887                 rtnl_unlock();
1888
1889                 return err;
1890         }
1891
1892         return -EINVAL;
1893 }
1894
1895 /*
1896  *      Drop the packet on the floor
1897  */
1898
1899 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1900 {
1901         int type;
1902         struct dst_entry *dst = skb_dst(skb);
1903         switch (ipstats_mib_noroutes) {
1904         case IPSTATS_MIB_INNOROUTES:
1905                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1906                 if (type == IPV6_ADDR_ANY) {
1907                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1908                                       IPSTATS_MIB_INADDRERRORS);
1909                         break;
1910                 }
1911                 /* FALLTHROUGH */
1912         case IPSTATS_MIB_OUTNOROUTES:
1913                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1914                               ipstats_mib_noroutes);
1915                 break;
1916         }
1917         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1918         kfree_skb(skb);
1919         return 0;
1920 }
1921
1922 static int ip6_pkt_discard(struct sk_buff *skb)
1923 {
1924         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1925 }
1926
1927 static int ip6_pkt_discard_out(struct sk_buff *skb)
1928 {
1929         skb->dev = skb_dst(skb)->dev;
1930         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1931 }
1932
1933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1934
1935 static int ip6_pkt_prohibit(struct sk_buff *skb)
1936 {
1937         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1938 }
1939
1940 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1941 {
1942         skb->dev = skb_dst(skb)->dev;
1943         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1944 }
1945
1946 #endif
1947
1948 /*
1949  *      Allocate a dst for local (unicast / anycast) address.
1950  */
1951
1952 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1953                                     const struct in6_addr *addr,
1954                                     int anycast)
1955 {
1956         struct net *net = dev_net(idev->dev);
1957         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1958         struct neighbour *neigh;
1959
1960         if (rt == NULL) {
1961                 if (net_ratelimit())
1962                         pr_warning("IPv6:  Maximum number of routes reached,"
1963                                    " consider increasing route/max_size.\n");
1964                 return ERR_PTR(-ENOMEM);
1965         }
1966
1967         dev_hold(net->loopback_dev);
1968         in6_dev_hold(idev);
1969
1970         rt->dst.flags = DST_HOST;
1971         rt->dst.input = ip6_input;
1972         rt->dst.output = ip6_output;
1973         rt->rt6i_dev = net->loopback_dev;
1974         rt->rt6i_idev = idev;
1975         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1976         rt->dst.obsolete = -1;
1977
1978         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1979         if (anycast)
1980                 rt->rt6i_flags |= RTF_ANYCAST;
1981         else
1982                 rt->rt6i_flags |= RTF_LOCAL;
1983         neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1984         if (IS_ERR(neigh)) {
1985                 dst_free(&rt->dst);
1986
1987                 /* We are casting this because that is the return
1988                  * value type.  But an errno encoded pointer is the
1989                  * same regardless of the underlying pointer type,
1990                  * and that's what we are returning.  So this is OK.
1991                  */
1992                 return (struct rt6_info *) neigh;
1993         }
1994         rt->rt6i_nexthop = neigh;
1995
1996         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1997         rt->rt6i_dst.plen = 128;
1998         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1999
2000         atomic_set(&rt->dst.__refcnt, 1);
2001
2002         return rt;
2003 }
2004
2005 struct arg_dev_net {
2006         struct net_device *dev;
2007         struct net *net;
2008 };
2009
2010 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2011 {
2012         const struct arg_dev_net *adn = arg;
2013         const struct net_device *dev = adn->dev;
2014
2015         if ((rt->rt6i_dev == dev || dev == NULL) &&
2016             rt != adn->net->ipv6.ip6_null_entry) {
2017                 RT6_TRACE("deleted by ifdown %p\n", rt);
2018                 return -1;
2019         }
2020         return 0;
2021 }
2022
2023 void rt6_ifdown(struct net *net, struct net_device *dev)
2024 {
2025         struct arg_dev_net adn = {
2026                 .dev = dev,
2027                 .net = net,
2028         };
2029
2030         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2031         icmp6_clean_all(fib6_ifdown, &adn);
2032 }
2033
2034 struct rt6_mtu_change_arg
2035 {
2036         struct net_device *dev;
2037         unsigned mtu;
2038 };
2039
2040 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2041 {
2042         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2043         struct inet6_dev *idev;
2044
2045         /* In IPv6 pmtu discovery is not optional,
2046            so that RTAX_MTU lock cannot disable it.
2047            We still use this lock to block changes
2048            caused by addrconf/ndisc.
2049         */
2050
2051         idev = __in6_dev_get(arg->dev);
2052         if (idev == NULL)
2053                 return 0;
2054
2055         /* For administrative MTU increase, there is no way to discover
2056            IPv6 PMTU increase, so PMTU increase should be updated here.
2057            Since RFC 1981 doesn't include administrative MTU increase
2058            update PMTU increase is a MUST. (i.e. jumbo frame)
2059          */
2060         /*
2061            If new MTU is less than route PMTU, this new MTU will be the
2062            lowest MTU in the path, update the route PMTU to reflect PMTU
2063            decreases; if new MTU is greater than route PMTU, and the
2064            old MTU is the lowest MTU in the path, update the route PMTU
2065            to reflect the increase. In this case if the other nodes' MTU
2066            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2067            PMTU discouvery.
2068          */
2069         if (rt->rt6i_dev == arg->dev &&
2070             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2071             (dst_mtu(&rt->dst) >= arg->mtu ||
2072              (dst_mtu(&rt->dst) < arg->mtu &&
2073               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2074                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2075         }
2076         return 0;
2077 }
2078
2079 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2080 {
2081         struct rt6_mtu_change_arg arg = {
2082                 .dev = dev,
2083                 .mtu = mtu,
2084         };
2085
2086         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2087 }
2088
2089 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2090         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2091         [RTA_OIF]               = { .type = NLA_U32 },
2092         [RTA_IIF]               = { .type = NLA_U32 },
2093         [RTA_PRIORITY]          = { .type = NLA_U32 },
2094         [RTA_METRICS]           = { .type = NLA_NESTED },
2095 };
2096
2097 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2098                               struct fib6_config *cfg)
2099 {
2100         struct rtmsg *rtm;
2101         struct nlattr *tb[RTA_MAX+1];
2102         int err;
2103
2104         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2105         if (err < 0)
2106                 goto errout;
2107
2108         err = -EINVAL;
2109         rtm = nlmsg_data(nlh);
2110         memset(cfg, 0, sizeof(*cfg));
2111
2112         cfg->fc_table = rtm->rtm_table;
2113         cfg->fc_dst_len = rtm->rtm_dst_len;
2114         cfg->fc_src_len = rtm->rtm_src_len;
2115         cfg->fc_flags = RTF_UP;
2116         cfg->fc_protocol = rtm->rtm_protocol;
2117
2118         if (rtm->rtm_type == RTN_UNREACHABLE)
2119                 cfg->fc_flags |= RTF_REJECT;
2120
2121         if (rtm->rtm_type == RTN_LOCAL)
2122                 cfg->fc_flags |= RTF_LOCAL;
2123
2124         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2125         cfg->fc_nlinfo.nlh = nlh;
2126         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2127
2128         if (tb[RTA_GATEWAY]) {
2129                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2130                 cfg->fc_flags |= RTF_GATEWAY;
2131         }
2132
2133         if (tb[RTA_DST]) {
2134                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2135
2136                 if (nla_len(tb[RTA_DST]) < plen)
2137                         goto errout;
2138
2139                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2140         }
2141
2142         if (tb[RTA_SRC]) {
2143                 int plen = (rtm->rtm_src_len + 7) >> 3;
2144
2145                 if (nla_len(tb[RTA_SRC]) < plen)
2146                         goto errout;
2147
2148                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2149         }
2150
2151         if (tb[RTA_OIF])
2152                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2153
2154         if (tb[RTA_PRIORITY])
2155                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2156
2157         if (tb[RTA_METRICS]) {
2158                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2159                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2160         }
2161
2162         if (tb[RTA_TABLE])
2163                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2164
2165         err = 0;
2166 errout:
2167         return err;
2168 }
2169
2170 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2171 {
2172         struct fib6_config cfg;
2173         int err;
2174
2175         err = rtm_to_fib6_config(skb, nlh, &cfg);
2176         if (err < 0)
2177                 return err;
2178
2179         return ip6_route_del(&cfg);
2180 }
2181
2182 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2183 {
2184         struct fib6_config cfg;
2185         int err;
2186
2187         err = rtm_to_fib6_config(skb, nlh, &cfg);
2188         if (err < 0)
2189                 return err;
2190
2191         return ip6_route_add(&cfg);
2192 }
2193
2194 static inline size_t rt6_nlmsg_size(void)
2195 {
2196         return NLMSG_ALIGN(sizeof(struct rtmsg))
2197                + nla_total_size(16) /* RTA_SRC */
2198                + nla_total_size(16) /* RTA_DST */
2199                + nla_total_size(16) /* RTA_GATEWAY */
2200                + nla_total_size(16) /* RTA_PREFSRC */
2201                + nla_total_size(4) /* RTA_TABLE */
2202                + nla_total_size(4) /* RTA_IIF */
2203                + nla_total_size(4) /* RTA_OIF */
2204                + nla_total_size(4) /* RTA_PRIORITY */
2205                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2206                + nla_total_size(sizeof(struct rta_cacheinfo));
2207 }
2208
2209 static int rt6_fill_node(struct net *net,
2210                          struct sk_buff *skb, struct rt6_info *rt,
2211                          struct in6_addr *dst, struct in6_addr *src,
2212                          int iif, int type, u32 pid, u32 seq,
2213                          int prefix, int nowait, unsigned int flags)
2214 {
2215         struct rtmsg *rtm;
2216         struct nlmsghdr *nlh;
2217         long expires;
2218         u32 table;
2219
2220         if (prefix) {   /* user wants prefix routes only */
2221                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2222                         /* success since this is not a prefix route */
2223                         return 1;
2224                 }
2225         }
2226
2227         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2228         if (nlh == NULL)
2229                 return -EMSGSIZE;
2230
2231         rtm = nlmsg_data(nlh);
2232         rtm->rtm_family = AF_INET6;
2233         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2234         rtm->rtm_src_len = rt->rt6i_src.plen;
2235         rtm->rtm_tos = 0;
2236         if (rt->rt6i_table)
2237                 table = rt->rt6i_table->tb6_id;
2238         else
2239                 table = RT6_TABLE_UNSPEC;
2240         rtm->rtm_table = table;
2241         NLA_PUT_U32(skb, RTA_TABLE, table);
2242         if (rt->rt6i_flags&RTF_REJECT)
2243                 rtm->rtm_type = RTN_UNREACHABLE;
2244         else if (rt->rt6i_flags&RTF_LOCAL)
2245                 rtm->rtm_type = RTN_LOCAL;
2246         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2247                 rtm->rtm_type = RTN_LOCAL;
2248         else
2249                 rtm->rtm_type = RTN_UNICAST;
2250         rtm->rtm_flags = 0;
2251         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2252         rtm->rtm_protocol = rt->rt6i_protocol;
2253         if (rt->rt6i_flags&RTF_DYNAMIC)
2254                 rtm->rtm_protocol = RTPROT_REDIRECT;
2255         else if (rt->rt6i_flags & RTF_ADDRCONF)
2256                 rtm->rtm_protocol = RTPROT_KERNEL;
2257         else if (rt->rt6i_flags&RTF_DEFAULT)
2258                 rtm->rtm_protocol = RTPROT_RA;
2259
2260         if (rt->rt6i_flags&RTF_CACHE)
2261                 rtm->rtm_flags |= RTM_F_CLONED;
2262
2263         if (dst) {
2264                 NLA_PUT(skb, RTA_DST, 16, dst);
2265                 rtm->rtm_dst_len = 128;
2266         } else if (rtm->rtm_dst_len)
2267                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2268 #ifdef CONFIG_IPV6_SUBTREES
2269         if (src) {
2270                 NLA_PUT(skb, RTA_SRC, 16, src);
2271                 rtm->rtm_src_len = 128;
2272         } else if (rtm->rtm_src_len)
2273                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2274 #endif
2275         if (iif) {
2276 #ifdef CONFIG_IPV6_MROUTE
2277                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2278                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2279                         if (err <= 0) {
2280                                 if (!nowait) {
2281                                         if (err == 0)
2282                                                 return 0;
2283                                         goto nla_put_failure;
2284                                 } else {
2285                                         if (err == -EMSGSIZE)
2286                                                 goto nla_put_failure;
2287                                 }
2288                         }
2289                 } else
2290 #endif
2291                         NLA_PUT_U32(skb, RTA_IIF, iif);
2292         } else if (dst) {
2293                 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2294                 struct in6_addr saddr_buf;
2295                 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2296                                        dst, 0, &saddr_buf) == 0)
2297                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2298         }
2299
2300         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2301                 goto nla_put_failure;
2302
2303         if (rt->dst.neighbour)
2304                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2305
2306         if (rt->dst.dev)
2307                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2308
2309         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2310
2311         if (!(rt->rt6i_flags & RTF_EXPIRES))
2312                 expires = 0;
2313         else if (rt->rt6i_expires - jiffies < INT_MAX)
2314                 expires = rt->rt6i_expires - jiffies;
2315         else
2316                 expires = INT_MAX;
2317
2318         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2319                                expires, rt->dst.error) < 0)
2320                 goto nla_put_failure;
2321
2322         return nlmsg_end(skb, nlh);
2323
2324 nla_put_failure:
2325         nlmsg_cancel(skb, nlh);
2326         return -EMSGSIZE;
2327 }
2328
2329 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2330 {
2331         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2332         int prefix;
2333
2334         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2335                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2336                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2337         } else
2338                 prefix = 0;
2339
2340         return rt6_fill_node(arg->net,
2341                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2342                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2343                      prefix, 0, NLM_F_MULTI);
2344 }
2345
2346 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2347 {
2348         struct net *net = sock_net(in_skb->sk);
2349         struct nlattr *tb[RTA_MAX+1];
2350         struct rt6_info *rt;
2351         struct sk_buff *skb;
2352         struct rtmsg *rtm;
2353         struct flowi fl;
2354         int err, iif = 0;
2355
2356         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2357         if (err < 0)
2358                 goto errout;
2359
2360         err = -EINVAL;
2361         memset(&fl, 0, sizeof(fl));
2362
2363         if (tb[RTA_SRC]) {
2364                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2365                         goto errout;
2366
2367                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2368         }
2369
2370         if (tb[RTA_DST]) {
2371                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2372                         goto errout;
2373
2374                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2375         }
2376
2377         if (tb[RTA_IIF])
2378                 iif = nla_get_u32(tb[RTA_IIF]);
2379
2380         if (tb[RTA_OIF])
2381                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2382
2383         if (iif) {
2384                 struct net_device *dev;
2385                 dev = __dev_get_by_index(net, iif);
2386                 if (!dev) {
2387                         err = -ENODEV;
2388                         goto errout;
2389                 }
2390         }
2391
2392         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2393         if (skb == NULL) {
2394                 err = -ENOBUFS;
2395                 goto errout;
2396         }
2397
2398         /* Reserve room for dummy headers, this skb can pass
2399            through good chunk of routing engine.
2400          */
2401         skb_reset_mac_header(skb);
2402         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2403
2404         rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2405         skb_dst_set(skb, &rt->dst);
2406
2407         err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2408                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2409                             nlh->nlmsg_seq, 0, 0, 0);
2410         if (err < 0) {
2411                 kfree_skb(skb);
2412                 goto errout;
2413         }
2414
2415         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2416 errout:
2417         return err;
2418 }
2419
2420 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2421 {
2422         struct sk_buff *skb;
2423         struct net *net = info->nl_net;
2424         u32 seq;
2425         int err;
2426
2427         err = -ENOBUFS;
2428         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2429
2430         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2431         if (skb == NULL)
2432                 goto errout;
2433
2434         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2435                                 event, info->pid, seq, 0, 0, 0);
2436         if (err < 0) {
2437                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2438                 WARN_ON(err == -EMSGSIZE);
2439                 kfree_skb(skb);
2440                 goto errout;
2441         }
2442         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2443                     info->nlh, gfp_any());
2444         return;
2445 errout:
2446         if (err < 0)
2447                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2448 }
2449
2450 static int ip6_route_dev_notify(struct notifier_block *this,
2451                                 unsigned long event, void *data)
2452 {
2453         struct net_device *dev = (struct net_device *)data;
2454         struct net *net = dev_net(dev);
2455
2456         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2457                 net->ipv6.ip6_null_entry->dst.dev = dev;
2458                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2459 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2460                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2461                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2462                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2463                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2464 #endif
2465         }
2466
2467         return NOTIFY_OK;
2468 }
2469
2470 /*
2471  *      /proc
2472  */
2473
2474 #ifdef CONFIG_PROC_FS
2475
2476 struct rt6_proc_arg
2477 {
2478         char *buffer;
2479         int offset;
2480         int length;
2481         int skip;
2482         int len;
2483 };
2484
2485 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2486 {
2487         struct seq_file *m = p_arg;
2488
2489         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2490
2491 #ifdef CONFIG_IPV6_SUBTREES
2492         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2493 #else
2494         seq_puts(m, "00000000000000000000000000000000 00 ");
2495 #endif
2496
2497         if (rt->rt6i_nexthop) {
2498                 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2499         } else {
2500                 seq_puts(m, "00000000000000000000000000000000");
2501         }
2502         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2503                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2504                    rt->dst.__use, rt->rt6i_flags,
2505                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2506         return 0;
2507 }
2508
2509 static int ipv6_route_show(struct seq_file *m, void *v)
2510 {
2511         struct net *net = (struct net *)m->private;
2512         fib6_clean_all(net, rt6_info_route, 0, m);
2513         return 0;
2514 }
2515
2516 static int ipv6_route_open(struct inode *inode, struct file *file)
2517 {
2518         return single_open_net(inode, file, ipv6_route_show);
2519 }
2520
2521 static const struct file_operations ipv6_route_proc_fops = {
2522         .owner          = THIS_MODULE,
2523         .open           = ipv6_route_open,
2524         .read           = seq_read,
2525         .llseek         = seq_lseek,
2526         .release        = single_release_net,
2527 };
2528
2529 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2530 {
2531         struct net *net = (struct net *)seq->private;
2532         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2533                    net->ipv6.rt6_stats->fib_nodes,
2534                    net->ipv6.rt6_stats->fib_route_nodes,
2535                    net->ipv6.rt6_stats->fib_rt_alloc,
2536                    net->ipv6.rt6_stats->fib_rt_entries,
2537                    net->ipv6.rt6_stats->fib_rt_cache,
2538                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2539                    net->ipv6.rt6_stats->fib_discarded_routes);
2540
2541         return 0;
2542 }
2543
2544 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2545 {
2546         return single_open_net(inode, file, rt6_stats_seq_show);
2547 }
2548
2549 static const struct file_operations rt6_stats_seq_fops = {
2550         .owner   = THIS_MODULE,
2551         .open    = rt6_stats_seq_open,
2552         .read    = seq_read,
2553         .llseek  = seq_lseek,
2554         .release = single_release_net,
2555 };
2556 #endif  /* CONFIG_PROC_FS */
2557
2558 #ifdef CONFIG_SYSCTL
2559
2560 static
2561 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2562                               void __user *buffer, size_t *lenp, loff_t *ppos)
2563 {
2564         struct net *net = current->nsproxy->net_ns;
2565         int delay = net->ipv6.sysctl.flush_delay;
2566         if (write) {
2567                 proc_dointvec(ctl, write, buffer, lenp, ppos);
2568                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2569                 return 0;
2570         } else
2571                 return -EINVAL;
2572 }
2573
2574 ctl_table ipv6_route_table_template[] = {
2575         {
2576                 .procname       =       "flush",
2577                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2578                 .maxlen         =       sizeof(int),
2579                 .mode           =       0200,
2580                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2581         },
2582         {
2583                 .procname       =       "gc_thresh",
2584                 .data           =       &ip6_dst_ops_template.gc_thresh,
2585                 .maxlen         =       sizeof(int),
2586                 .mode           =       0644,
2587                 .proc_handler   =       proc_dointvec,
2588         },
2589         {
2590                 .procname       =       "max_size",
2591                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2592                 .maxlen         =       sizeof(int),
2593                 .mode           =       0644,
2594                 .proc_handler   =       proc_dointvec,
2595         },
2596         {
2597                 .procname       =       "gc_min_interval",
2598                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2599                 .maxlen         =       sizeof(int),
2600                 .mode           =       0644,
2601                 .proc_handler   =       proc_dointvec_jiffies,
2602         },
2603         {
2604                 .procname       =       "gc_timeout",
2605                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2606                 .maxlen         =       sizeof(int),
2607                 .mode           =       0644,
2608                 .proc_handler   =       proc_dointvec_jiffies,
2609         },
2610         {
2611                 .procname       =       "gc_interval",
2612                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2613                 .maxlen         =       sizeof(int),
2614                 .mode           =       0644,
2615                 .proc_handler   =       proc_dointvec_jiffies,
2616         },
2617         {
2618                 .procname       =       "gc_elasticity",
2619                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2620                 .maxlen         =       sizeof(int),
2621                 .mode           =       0644,
2622                 .proc_handler   =       proc_dointvec,
2623         },
2624         {
2625                 .procname       =       "mtu_expires",
2626                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2627                 .maxlen         =       sizeof(int),
2628                 .mode           =       0644,
2629                 .proc_handler   =       proc_dointvec_jiffies,
2630         },
2631         {
2632                 .procname       =       "min_adv_mss",
2633                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2634                 .maxlen         =       sizeof(int),
2635                 .mode           =       0644,
2636                 .proc_handler   =       proc_dointvec,
2637         },
2638         {
2639                 .procname       =       "gc_min_interval_ms",
2640                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2641                 .maxlen         =       sizeof(int),
2642                 .mode           =       0644,
2643                 .proc_handler   =       proc_dointvec_ms_jiffies,
2644         },
2645         { }
2646 };
2647
2648 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2649 {
2650         struct ctl_table *table;
2651
2652         table = kmemdup(ipv6_route_table_template,
2653                         sizeof(ipv6_route_table_template),
2654                         GFP_KERNEL);
2655
2656         if (table) {
2657                 table[0].data = &net->ipv6.sysctl.flush_delay;
2658                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2659                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2660                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2661                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2662                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2663                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2664                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2665                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2666                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2667         }
2668
2669         return table;
2670 }
2671 #endif
2672
2673 static int __net_init ip6_route_net_init(struct net *net)
2674 {
2675         int ret = -ENOMEM;
2676
2677         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2678                sizeof(net->ipv6.ip6_dst_ops));
2679
2680         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2681                 goto out_ip6_dst_ops;
2682
2683         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2684                                            sizeof(*net->ipv6.ip6_null_entry),
2685                                            GFP_KERNEL);
2686         if (!net->ipv6.ip6_null_entry)
2687                 goto out_ip6_dst_entries;
2688         net->ipv6.ip6_null_entry->dst.path =
2689                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2690         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2691         dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2692
2693 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2695                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2696                                                GFP_KERNEL);
2697         if (!net->ipv6.ip6_prohibit_entry)
2698                 goto out_ip6_null_entry;
2699         net->ipv6.ip6_prohibit_entry->dst.path =
2700                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2701         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2702         dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2703
2704         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2705                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2706                                                GFP_KERNEL);
2707         if (!net->ipv6.ip6_blk_hole_entry)
2708                 goto out_ip6_prohibit_entry;
2709         net->ipv6.ip6_blk_hole_entry->dst.path =
2710                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2711         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2712         dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2713 #endif
2714
2715         net->ipv6.sysctl.flush_delay = 0;
2716         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2717         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2718         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2719         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2720         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2721         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2722         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2723
2724 #ifdef CONFIG_PROC_FS
2725         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2726         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2727 #endif
2728         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2729
2730         ret = 0;
2731 out:
2732         return ret;
2733
2734 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2735 out_ip6_prohibit_entry:
2736         kfree(net->ipv6.ip6_prohibit_entry);
2737 out_ip6_null_entry:
2738         kfree(net->ipv6.ip6_null_entry);
2739 #endif
2740 out_ip6_dst_entries:
2741         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2742 out_ip6_dst_ops:
2743         goto out;
2744 }
2745
2746 static void __net_exit ip6_route_net_exit(struct net *net)
2747 {
2748 #ifdef CONFIG_PROC_FS
2749         proc_net_remove(net, "ipv6_route");
2750         proc_net_remove(net, "rt6_stats");
2751 #endif
2752         kfree(net->ipv6.ip6_null_entry);
2753 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2754         kfree(net->ipv6.ip6_prohibit_entry);
2755         kfree(net->ipv6.ip6_blk_hole_entry);
2756 #endif
2757         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2758 }
2759
2760 static struct pernet_operations ip6_route_net_ops = {
2761         .init = ip6_route_net_init,
2762         .exit = ip6_route_net_exit,
2763 };
2764
2765 static struct notifier_block ip6_route_dev_notifier = {
2766         .notifier_call = ip6_route_dev_notify,
2767         .priority = 0,
2768 };
2769
2770 int __init ip6_route_init(void)
2771 {
2772         int ret;
2773
2774         ret = -ENOMEM;
2775         ip6_dst_ops_template.kmem_cachep =
2776                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2777                                   SLAB_HWCACHE_ALIGN, NULL);
2778         if (!ip6_dst_ops_template.kmem_cachep)
2779                 goto out;
2780
2781         ret = dst_entries_init(&ip6_dst_blackhole_ops);
2782         if (ret)
2783                 goto out_kmem_cache;
2784
2785         ret = register_pernet_subsys(&ip6_route_net_ops);
2786         if (ret)
2787                 goto out_dst_entries;
2788
2789         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2790
2791         /* Registering of the loopback is done before this portion of code,
2792          * the loopback reference in rt6_info will not be taken, do it
2793          * manually for init_net */
2794         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2795         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2796   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2797         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2798         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2799         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2800         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2801   #endif
2802         ret = fib6_init();
2803         if (ret)
2804                 goto out_register_subsys;
2805
2806         ret = xfrm6_init();
2807         if (ret)
2808                 goto out_fib6_init;
2809
2810         ret = fib6_rules_init();
2811         if (ret)
2812                 goto xfrm6_init;
2813
2814         ret = -ENOBUFS;
2815         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2816             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2817             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2818                 goto fib6_rules_init;
2819
2820         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2821         if (ret)
2822                 goto fib6_rules_init;
2823
2824 out:
2825         return ret;
2826
2827 fib6_rules_init:
2828         fib6_rules_cleanup();
2829 xfrm6_init:
2830         xfrm6_fini();
2831 out_fib6_init:
2832         fib6_gc_cleanup();
2833 out_register_subsys:
2834         unregister_pernet_subsys(&ip6_route_net_ops);
2835 out_dst_entries:
2836         dst_entries_destroy(&ip6_dst_blackhole_ops);
2837 out_kmem_cache:
2838         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2839         goto out;
2840 }
2841
2842 void ip6_route_cleanup(void)
2843 {
2844         unregister_netdevice_notifier(&ip6_route_dev_notifier);
2845         fib6_rules_cleanup();
2846         xfrm6_fini();
2847         fib6_gc_cleanup();
2848         unregister_pernet_subsys(&ip6_route_net_ops);
2849         dst_entries_destroy(&ip6_dst_blackhole_ops);
2850         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2851 }