]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
xen/hvc: constify hv_ops structures
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <asm/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74         RT6_NUD_FAIL_HARD = -3,
75         RT6_NUD_FAIL_PROBE = -2,
76         RT6_NUD_FAIL_DO_RR = -1,
77         RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int      ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void             ip6_dst_destroy(struct dst_entry *);
86 static void             ip6_dst_ifdown(struct dst_entry *,
87                                        struct net_device *dev, int how);
88 static int               ip6_dst_gc(struct dst_ops *ops);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int              ip6_pkt_prohibit(struct sk_buff *skb);
93 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96                                            struct sk_buff *skb, u32 mtu);
97 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98                                         struct sk_buff *skb);
99 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex,
106                                            unsigned int pref);
107 static struct rt6_info *rt6_get_route_info(struct net *net,
108                                            const struct in6_addr *prefix, int prefixlen,
109                                            const struct in6_addr *gwaddr, int ifindex);
110 #endif
111
112 struct uncached_list {
113         spinlock_t              lock;
114         struct list_head        head;
115 };
116
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119 static void rt6_uncached_list_add(struct rt6_info *rt)
120 {
121         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122
123         rt->dst.flags |= DST_NOCACHE;
124         rt->rt6i_uncached_list = ul;
125
126         spin_lock_bh(&ul->lock);
127         list_add_tail(&rt->rt6i_uncached, &ul->head);
128         spin_unlock_bh(&ul->lock);
129 }
130
131 static void rt6_uncached_list_del(struct rt6_info *rt)
132 {
133         if (!list_empty(&rt->rt6i_uncached)) {
134                 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136                 spin_lock_bh(&ul->lock);
137                 list_del(&rt->rt6i_uncached);
138                 spin_unlock_bh(&ul->lock);
139         }
140 }
141
142 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143 {
144         struct net_device *loopback_dev = net->loopback_dev;
145         int cpu;
146
147         if (dev == loopback_dev)
148                 return;
149
150         for_each_possible_cpu(cpu) {
151                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152                 struct rt6_info *rt;
153
154                 spin_lock_bh(&ul->lock);
155                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156                         struct inet6_dev *rt_idev = rt->rt6i_idev;
157                         struct net_device *rt_dev = rt->dst.dev;
158
159                         if (rt_idev->dev == dev) {
160                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
161                                 in6_dev_put(rt_idev);
162                         }
163
164                         if (rt_dev == dev) {
165                                 rt->dst.dev = loopback_dev;
166                                 dev_hold(rt->dst.dev);
167                                 dev_put(rt_dev);
168                         }
169                 }
170                 spin_unlock_bh(&ul->lock);
171         }
172 }
173
174 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175 {
176         return dst_metrics_write_ptr(rt->dst.from);
177 }
178
179 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180 {
181         struct rt6_info *rt = (struct rt6_info *)dst;
182
183         if (rt->rt6i_flags & RTF_PCPU)
184                 return rt6_pcpu_cow_metrics(rt);
185         else if (rt->rt6i_flags & RTF_CACHE)
186                 return NULL;
187         else
188                 return dst_cow_metrics_generic(dst, old);
189 }
190
191 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192                                              struct sk_buff *skb,
193                                              const void *daddr)
194 {
195         struct in6_addr *p = &rt->rt6i_gateway;
196
197         if (!ipv6_addr_any(p))
198                 return (const void *) p;
199         else if (skb)
200                 return &ipv6_hdr(skb)->daddr;
201         return daddr;
202 }
203
204 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205                                           struct sk_buff *skb,
206                                           const void *daddr)
207 {
208         struct rt6_info *rt = (struct rt6_info *) dst;
209         struct neighbour *n;
210
211         daddr = choose_neigh_daddr(rt, skb, daddr);
212         n = __ipv6_neigh_lookup(dst->dev, daddr);
213         if (n)
214                 return n;
215         return neigh_create(&nd_tbl, daddr, dst->dev);
216 }
217
218 static struct dst_ops ip6_dst_ops_template = {
219         .family                 =       AF_INET6,
220         .gc                     =       ip6_dst_gc,
221         .gc_thresh              =       1024,
222         .check                  =       ip6_dst_check,
223         .default_advmss         =       ip6_default_advmss,
224         .mtu                    =       ip6_mtu,
225         .cow_metrics            =       ipv6_cow_metrics,
226         .destroy                =       ip6_dst_destroy,
227         .ifdown                 =       ip6_dst_ifdown,
228         .negative_advice        =       ip6_negative_advice,
229         .link_failure           =       ip6_link_failure,
230         .update_pmtu            =       ip6_rt_update_pmtu,
231         .redirect               =       rt6_do_redirect,
232         .local_out              =       __ip6_local_out,
233         .neigh_lookup           =       ip6_neigh_lookup,
234 };
235
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
237 {
238         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239
240         return mtu ? : dst->dev->mtu;
241 }
242
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244                                          struct sk_buff *skb, u32 mtu)
245 {
246 }
247
248 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249                                       struct sk_buff *skb)
250 {
251 }
252
253 static struct dst_ops ip6_dst_blackhole_ops = {
254         .family                 =       AF_INET6,
255         .destroy                =       ip6_dst_destroy,
256         .check                  =       ip6_dst_check,
257         .mtu                    =       ip6_blackhole_mtu,
258         .default_advmss         =       ip6_default_advmss,
259         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
260         .redirect               =       ip6_rt_blackhole_redirect,
261         .cow_metrics            =       dst_cow_metrics_generic,
262         .neigh_lookup           =       ip6_neigh_lookup,
263 };
264
265 static const u32 ip6_template_metrics[RTAX_MAX] = {
266         [RTAX_HOPLIMIT - 1] = 0,
267 };
268
269 static const struct rt6_info ip6_null_entry_template = {
270         .dst = {
271                 .__refcnt       = ATOMIC_INIT(1),
272                 .__use          = 1,
273                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
274                 .error          = -ENETUNREACH,
275                 .input          = ip6_pkt_discard,
276                 .output         = ip6_pkt_discard_out,
277         },
278         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
279         .rt6i_protocol  = RTPROT_KERNEL,
280         .rt6i_metric    = ~(u32) 0,
281         .rt6i_ref       = ATOMIC_INIT(1),
282 };
283
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285
286 static const struct rt6_info ip6_prohibit_entry_template = {
287         .dst = {
288                 .__refcnt       = ATOMIC_INIT(1),
289                 .__use          = 1,
290                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
291                 .error          = -EACCES,
292                 .input          = ip6_pkt_prohibit,
293                 .output         = ip6_pkt_prohibit_out,
294         },
295         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
296         .rt6i_protocol  = RTPROT_KERNEL,
297         .rt6i_metric    = ~(u32) 0,
298         .rt6i_ref       = ATOMIC_INIT(1),
299 };
300
301 static const struct rt6_info ip6_blk_hole_entry_template = {
302         .dst = {
303                 .__refcnt       = ATOMIC_INIT(1),
304                 .__use          = 1,
305                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
306                 .error          = -EINVAL,
307                 .input          = dst_discard,
308                 .output         = dst_discard_out,
309         },
310         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
311         .rt6i_protocol  = RTPROT_KERNEL,
312         .rt6i_metric    = ~(u32) 0,
313         .rt6i_ref       = ATOMIC_INIT(1),
314 };
315
316 #endif
317
318 static void rt6_info_init(struct rt6_info *rt)
319 {
320         struct dst_entry *dst = &rt->dst;
321
322         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323         INIT_LIST_HEAD(&rt->rt6i_siblings);
324         INIT_LIST_HEAD(&rt->rt6i_uncached);
325 }
326
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info *__ip6_dst_alloc(struct net *net,
329                                         struct net_device *dev,
330                                         int flags)
331 {
332         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
333                                         0, DST_OBSOLETE_FORCE_CHK, flags);
334
335         if (rt)
336                 rt6_info_init(rt);
337
338         return rt;
339 }
340
341 static struct rt6_info *ip6_dst_alloc(struct net *net,
342                                       struct net_device *dev,
343                                       int flags)
344 {
345         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346
347         if (rt) {
348                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349                 if (rt->rt6i_pcpu) {
350                         int cpu;
351
352                         for_each_possible_cpu(cpu) {
353                                 struct rt6_info **p;
354
355                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356                                 /* no one shares rt */
357                                 *p =  NULL;
358                         }
359                 } else {
360                         dst_destroy((struct dst_entry *)rt);
361                         return NULL;
362                 }
363         }
364
365         return rt;
366 }
367
368 static void ip6_dst_destroy(struct dst_entry *dst)
369 {
370         struct rt6_info *rt = (struct rt6_info *)dst;
371         struct dst_entry *from = dst->from;
372         struct inet6_dev *idev;
373
374         dst_destroy_metrics_generic(dst);
375         free_percpu(rt->rt6i_pcpu);
376         rt6_uncached_list_del(rt);
377
378         idev = rt->rt6i_idev;
379         if (idev) {
380                 rt->rt6i_idev = NULL;
381                 in6_dev_put(idev);
382         }
383
384         dst->from = NULL;
385         dst_release(from);
386 }
387
388 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389                            int how)
390 {
391         struct rt6_info *rt = (struct rt6_info *)dst;
392         struct inet6_dev *idev = rt->rt6i_idev;
393         struct net_device *loopback_dev =
394                 dev_net(dev)->loopback_dev;
395
396         if (dev != loopback_dev) {
397                 if (idev && idev->dev == dev) {
398                         struct inet6_dev *loopback_idev =
399                                 in6_dev_get(loopback_dev);
400                         if (loopback_idev) {
401                                 rt->rt6i_idev = loopback_idev;
402                                 in6_dev_put(idev);
403                         }
404                 }
405         }
406 }
407
408 static bool __rt6_check_expired(const struct rt6_info *rt)
409 {
410         if (rt->rt6i_flags & RTF_EXPIRES)
411                 return time_after(jiffies, rt->dst.expires);
412         else
413                 return false;
414 }
415
416 static bool rt6_check_expired(const struct rt6_info *rt)
417 {
418         if (rt->rt6i_flags & RTF_EXPIRES) {
419                 if (time_after(jiffies, rt->dst.expires))
420                         return true;
421         } else if (rt->dst.from) {
422                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
423         }
424         return false;
425 }
426
427 /* Multipath route selection:
428  *   Hash based function using packet header and flowlabel.
429  * Adapted from fib_info_hashfn()
430  */
431 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432                                const struct flowi6 *fl6)
433 {
434         return get_hash_from_flowi6(fl6) % candidate_count;
435 }
436
437 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
438                                              struct flowi6 *fl6, int oif,
439                                              int strict)
440 {
441         struct rt6_info *sibling, *next_sibling;
442         int route_choosen;
443
444         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445         /* Don't change the route, if route_choosen == 0
446          * (siblings does not include ourself)
447          */
448         if (route_choosen)
449                 list_for_each_entry_safe(sibling, next_sibling,
450                                 &match->rt6i_siblings, rt6i_siblings) {
451                         route_choosen--;
452                         if (route_choosen == 0) {
453                                 if (rt6_score_route(sibling, oif, strict) < 0)
454                                         break;
455                                 match = sibling;
456                                 break;
457                         }
458                 }
459         return match;
460 }
461
462 /*
463  *      Route lookup. Any table->tb6_lock is implied.
464  */
465
466 static inline struct rt6_info *rt6_device_match(struct net *net,
467                                                     struct rt6_info *rt,
468                                                     const struct in6_addr *saddr,
469                                                     int oif,
470                                                     int flags)
471 {
472         struct rt6_info *local = NULL;
473         struct rt6_info *sprt;
474
475         if (!oif && ipv6_addr_any(saddr))
476                 goto out;
477
478         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
479                 struct net_device *dev = sprt->dst.dev;
480
481                 if (oif) {
482                         if (dev->ifindex == oif)
483                                 return sprt;
484                         if (dev->flags & IFF_LOOPBACK) {
485                                 if (!sprt->rt6i_idev ||
486                                     sprt->rt6i_idev->dev->ifindex != oif) {
487                                         if (flags & RT6_LOOKUP_F_IFACE)
488                                                 continue;
489                                         if (local &&
490                                             local->rt6i_idev->dev->ifindex == oif)
491                                                 continue;
492                                 }
493                                 local = sprt;
494                         }
495                 } else {
496                         if (ipv6_chk_addr(net, saddr, dev,
497                                           flags & RT6_LOOKUP_F_IFACE))
498                                 return sprt;
499                 }
500         }
501
502         if (oif) {
503                 if (local)
504                         return local;
505
506                 if (flags & RT6_LOOKUP_F_IFACE)
507                         return net->ipv6.ip6_null_entry;
508         }
509 out:
510         return rt;
511 }
512
513 #ifdef CONFIG_IPV6_ROUTER_PREF
514 struct __rt6_probe_work {
515         struct work_struct work;
516         struct in6_addr target;
517         struct net_device *dev;
518 };
519
520 static void rt6_probe_deferred(struct work_struct *w)
521 {
522         struct in6_addr mcaddr;
523         struct __rt6_probe_work *work =
524                 container_of(w, struct __rt6_probe_work, work);
525
526         addrconf_addr_solict_mult(&work->target, &mcaddr);
527         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
528         dev_put(work->dev);
529         kfree(work);
530 }
531
532 static void rt6_probe(struct rt6_info *rt)
533 {
534         struct __rt6_probe_work *work;
535         struct neighbour *neigh;
536         /*
537          * Okay, this does not seem to be appropriate
538          * for now, however, we need to check if it
539          * is really so; aka Router Reachability Probing.
540          *
541          * Router Reachability Probe MUST be rate-limited
542          * to no more than one per minute.
543          */
544         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
545                 return;
546         rcu_read_lock_bh();
547         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548         if (neigh) {
549                 if (neigh->nud_state & NUD_VALID)
550                         goto out;
551
552                 work = NULL;
553                 write_lock(&neigh->lock);
554                 if (!(neigh->nud_state & NUD_VALID) &&
555                     time_after(jiffies,
556                                neigh->updated +
557                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
558                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
559                         if (work)
560                                 __neigh_set_probe_once(neigh);
561                 }
562                 write_unlock(&neigh->lock);
563         } else {
564                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
565         }
566
567         if (work) {
568                 INIT_WORK(&work->work, rt6_probe_deferred);
569                 work->target = rt->rt6i_gateway;
570                 dev_hold(rt->dst.dev);
571                 work->dev = rt->dst.dev;
572                 schedule_work(&work->work);
573         }
574
575 out:
576         rcu_read_unlock_bh();
577 }
578 #else
579 static inline void rt6_probe(struct rt6_info *rt)
580 {
581 }
582 #endif
583
584 /*
585  * Default Router Selection (RFC 2461 6.3.6)
586  */
587 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
588 {
589         struct net_device *dev = rt->dst.dev;
590         if (!oif || dev->ifindex == oif)
591                 return 2;
592         if ((dev->flags & IFF_LOOPBACK) &&
593             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594                 return 1;
595         return 0;
596 }
597
598 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
599 {
600         struct neighbour *neigh;
601         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
602
603         if (rt->rt6i_flags & RTF_NONEXTHOP ||
604             !(rt->rt6i_flags & RTF_GATEWAY))
605                 return RT6_NUD_SUCCEED;
606
607         rcu_read_lock_bh();
608         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609         if (neigh) {
610                 read_lock(&neigh->lock);
611                 if (neigh->nud_state & NUD_VALID)
612                         ret = RT6_NUD_SUCCEED;
613 #ifdef CONFIG_IPV6_ROUTER_PREF
614                 else if (!(neigh->nud_state & NUD_FAILED))
615                         ret = RT6_NUD_SUCCEED;
616                 else
617                         ret = RT6_NUD_FAIL_PROBE;
618 #endif
619                 read_unlock(&neigh->lock);
620         } else {
621                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
622                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
623         }
624         rcu_read_unlock_bh();
625
626         return ret;
627 }
628
629 static int rt6_score_route(struct rt6_info *rt, int oif,
630                            int strict)
631 {
632         int m;
633
634         m = rt6_check_dev(rt, oif);
635         if (!m && (strict & RT6_LOOKUP_F_IFACE))
636                 return RT6_NUD_FAIL_HARD;
637 #ifdef CONFIG_IPV6_ROUTER_PREF
638         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639 #endif
640         if (strict & RT6_LOOKUP_F_REACHABLE) {
641                 int n = rt6_check_neigh(rt);
642                 if (n < 0)
643                         return n;
644         }
645         return m;
646 }
647
648 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
649                                    int *mpri, struct rt6_info *match,
650                                    bool *do_rr)
651 {
652         int m;
653         bool match_do_rr = false;
654         struct inet6_dev *idev = rt->rt6i_idev;
655         struct net_device *dev = rt->dst.dev;
656
657         if (dev && !netif_carrier_ok(dev) &&
658             idev->cnf.ignore_routes_with_linkdown)
659                 goto out;
660
661         if (rt6_check_expired(rt))
662                 goto out;
663
664         m = rt6_score_route(rt, oif, strict);
665         if (m == RT6_NUD_FAIL_DO_RR) {
666                 match_do_rr = true;
667                 m = 0; /* lowest valid score */
668         } else if (m == RT6_NUD_FAIL_HARD) {
669                 goto out;
670         }
671
672         if (strict & RT6_LOOKUP_F_REACHABLE)
673                 rt6_probe(rt);
674
675         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
676         if (m > *mpri) {
677                 *do_rr = match_do_rr;
678                 *mpri = m;
679                 match = rt;
680         }
681 out:
682         return match;
683 }
684
685 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686                                      struct rt6_info *rr_head,
687                                      u32 metric, int oif, int strict,
688                                      bool *do_rr)
689 {
690         struct rt6_info *rt, *match, *cont;
691         int mpri = -1;
692
693         match = NULL;
694         cont = NULL;
695         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696                 if (rt->rt6i_metric != metric) {
697                         cont = rt;
698                         break;
699                 }
700
701                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702         }
703
704         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705                 if (rt->rt6i_metric != metric) {
706                         cont = rt;
707                         break;
708                 }
709
710                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
711         }
712
713         if (match || !cont)
714                 return match;
715
716         for (rt = cont; rt; rt = rt->dst.rt6_next)
717                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
718
719         return match;
720 }
721
722 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723 {
724         struct rt6_info *match, *rt0;
725         struct net *net;
726         bool do_rr = false;
727
728         rt0 = fn->rr_ptr;
729         if (!rt0)
730                 fn->rr_ptr = rt0 = fn->leaf;
731
732         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733                              &do_rr);
734
735         if (do_rr) {
736                 struct rt6_info *next = rt0->dst.rt6_next;
737
738                 /* no entries matched; do round-robin */
739                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
740                         next = fn->leaf;
741
742                 if (next != rt0)
743                         fn->rr_ptr = next;
744         }
745
746         net = dev_net(rt0->dst.dev);
747         return match ? match : net->ipv6.ip6_null_entry;
748 }
749
750 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751 {
752         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753 }
754
755 #ifdef CONFIG_IPV6_ROUTE_INFO
756 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
757                   const struct in6_addr *gwaddr)
758 {
759         struct net *net = dev_net(dev);
760         struct route_info *rinfo = (struct route_info *) opt;
761         struct in6_addr prefix_buf, *prefix;
762         unsigned int pref;
763         unsigned long lifetime;
764         struct rt6_info *rt;
765
766         if (len < sizeof(struct route_info)) {
767                 return -EINVAL;
768         }
769
770         /* Sanity check for prefix_len and length */
771         if (rinfo->length > 3) {
772                 return -EINVAL;
773         } else if (rinfo->prefix_len > 128) {
774                 return -EINVAL;
775         } else if (rinfo->prefix_len > 64) {
776                 if (rinfo->length < 2) {
777                         return -EINVAL;
778                 }
779         } else if (rinfo->prefix_len > 0) {
780                 if (rinfo->length < 1) {
781                         return -EINVAL;
782                 }
783         }
784
785         pref = rinfo->route_pref;
786         if (pref == ICMPV6_ROUTER_PREF_INVALID)
787                 return -EINVAL;
788
789         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
790
791         if (rinfo->length == 3)
792                 prefix = (struct in6_addr *)rinfo->prefix;
793         else {
794                 /* this function is safe */
795                 ipv6_addr_prefix(&prefix_buf,
796                                  (struct in6_addr *)rinfo->prefix,
797                                  rinfo->prefix_len);
798                 prefix = &prefix_buf;
799         }
800
801         if (rinfo->prefix_len == 0)
802                 rt = rt6_get_dflt_router(gwaddr, dev);
803         else
804                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805                                         gwaddr, dev->ifindex);
806
807         if (rt && !lifetime) {
808                 ip6_del_rt(rt);
809                 rt = NULL;
810         }
811
812         if (!rt && lifetime)
813                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
814                                         pref);
815         else if (rt)
816                 rt->rt6i_flags = RTF_ROUTEINFO |
817                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818
819         if (rt) {
820                 if (!addrconf_finite_timeout(lifetime))
821                         rt6_clean_expires(rt);
822                 else
823                         rt6_set_expires(rt, jiffies + HZ * lifetime);
824
825                 ip6_rt_put(rt);
826         }
827         return 0;
828 }
829 #endif
830
831 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832                                         struct in6_addr *saddr)
833 {
834         struct fib6_node *pn;
835         while (1) {
836                 if (fn->fn_flags & RTN_TL_ROOT)
837                         return NULL;
838                 pn = fn->parent;
839                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841                 else
842                         fn = pn;
843                 if (fn->fn_flags & RTN_RTINFO)
844                         return fn;
845         }
846 }
847
848 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849                                              struct fib6_table *table,
850                                              struct flowi6 *fl6, int flags)
851 {
852         struct fib6_node *fn;
853         struct rt6_info *rt;
854
855         read_lock_bh(&table->tb6_lock);
856         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
857 restart:
858         rt = fn->leaf;
859         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
860         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
861                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
862         if (rt == net->ipv6.ip6_null_entry) {
863                 fn = fib6_backtrack(fn, &fl6->saddr);
864                 if (fn)
865                         goto restart;
866         }
867         dst_use(&rt->dst, jiffies);
868         read_unlock_bh(&table->tb6_lock);
869
870         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871
872         return rt;
873
874 }
875
876 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
877                                     int flags)
878 {
879         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880 }
881 EXPORT_SYMBOL_GPL(ip6_route_lookup);
882
883 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884                             const struct in6_addr *saddr, int oif, int strict)
885 {
886         struct flowi6 fl6 = {
887                 .flowi6_oif = oif,
888                 .daddr = *daddr,
889         };
890         struct dst_entry *dst;
891         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
892
893         if (saddr) {
894                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
895                 flags |= RT6_LOOKUP_F_HAS_SADDR;
896         }
897
898         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
899         if (dst->error == 0)
900                 return (struct rt6_info *) dst;
901
902         dst_release(dst);
903
904         return NULL;
905 }
906 EXPORT_SYMBOL(rt6_lookup);
907
908 /* ip6_ins_rt is called with FREE table->tb6_lock.
909    It takes new route entry, the addition fails by any reason the
910    route is freed. In any case, if caller does not hold it, it may
911    be destroyed.
912  */
913
914 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
915                         struct mx6_config *mxc)
916 {
917         int err;
918         struct fib6_table *table;
919
920         table = rt->rt6i_table;
921         write_lock_bh(&table->tb6_lock);
922         err = fib6_add(&table->tb6_root, rt, info, mxc);
923         write_unlock_bh(&table->tb6_lock);
924
925         return err;
926 }
927
928 int ip6_ins_rt(struct rt6_info *rt)
929 {
930         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
931         struct mx6_config mxc = { .mx = NULL, };
932
933         return __ip6_ins_rt(rt, &info, &mxc);
934 }
935
936 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937                                            const struct in6_addr *daddr,
938                                            const struct in6_addr *saddr)
939 {
940         struct rt6_info *rt;
941
942         /*
943          *      Clone the route.
944          */
945
946         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
947                 ort = (struct rt6_info *)ort->dst.from;
948
949         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
950
951         if (!rt)
952                 return NULL;
953
954         ip6_rt_copy_init(rt, ort);
955         rt->rt6i_flags |= RTF_CACHE;
956         rt->rt6i_metric = 0;
957         rt->dst.flags |= DST_HOST;
958         rt->rt6i_dst.addr = *daddr;
959         rt->rt6i_dst.plen = 128;
960
961         if (!rt6_is_gw_or_nonexthop(ort)) {
962                 if (ort->rt6i_dst.plen != 128 &&
963                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964                         rt->rt6i_flags |= RTF_ANYCAST;
965 #ifdef CONFIG_IPV6_SUBTREES
966                 if (rt->rt6i_src.plen && saddr) {
967                         rt->rt6i_src.addr = *saddr;
968                         rt->rt6i_src.plen = 128;
969                 }
970 #endif
971         }
972
973         return rt;
974 }
975
976 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977 {
978         struct rt6_info *pcpu_rt;
979
980         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
981                                   rt->dst.dev, rt->dst.flags);
982
983         if (!pcpu_rt)
984                 return NULL;
985         ip6_rt_copy_init(pcpu_rt, rt);
986         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987         pcpu_rt->rt6i_flags |= RTF_PCPU;
988         return pcpu_rt;
989 }
990
991 /* It should be called with read_lock_bh(&tb6_lock) acquired */
992 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993 {
994         struct rt6_info *pcpu_rt, **p;
995
996         p = this_cpu_ptr(rt->rt6i_pcpu);
997         pcpu_rt = *p;
998
999         if (pcpu_rt) {
1000                 dst_hold(&pcpu_rt->dst);
1001                 rt6_dst_from_metrics_check(pcpu_rt);
1002         }
1003         return pcpu_rt;
1004 }
1005
1006 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007 {
1008         struct fib6_table *table = rt->rt6i_table;
1009         struct rt6_info *pcpu_rt, *prev, **p;
1010
1011         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012         if (!pcpu_rt) {
1013                 struct net *net = dev_net(rt->dst.dev);
1014
1015                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1016                 return net->ipv6.ip6_null_entry;
1017         }
1018
1019         read_lock_bh(&table->tb6_lock);
1020         if (rt->rt6i_pcpu) {
1021                 p = this_cpu_ptr(rt->rt6i_pcpu);
1022                 prev = cmpxchg(p, NULL, pcpu_rt);
1023                 if (prev) {
1024                         /* If someone did it before us, return prev instead */
1025                         dst_destroy(&pcpu_rt->dst);
1026                         pcpu_rt = prev;
1027                 }
1028         } else {
1029                 /* rt has been removed from the fib6 tree
1030                  * before we have a chance to acquire the read_lock.
1031                  * In this case, don't brother to create a pcpu rt
1032                  * since rt is going away anyway.  The next
1033                  * dst_check() will trigger a re-lookup.
1034                  */
1035                 dst_destroy(&pcpu_rt->dst);
1036                 pcpu_rt = rt;
1037         }
1038         dst_hold(&pcpu_rt->dst);
1039         rt6_dst_from_metrics_check(pcpu_rt);
1040         read_unlock_bh(&table->tb6_lock);
1041         return pcpu_rt;
1042 }
1043
1044 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1045                                       struct flowi6 *fl6, int flags)
1046 {
1047         struct fib6_node *fn, *saved_fn;
1048         struct rt6_info *rt;
1049         int strict = 0;
1050
1051         strict |= flags & RT6_LOOKUP_F_IFACE;
1052         if (net->ipv6.devconf_all->forwarding == 0)
1053                 strict |= RT6_LOOKUP_F_REACHABLE;
1054
1055         read_lock_bh(&table->tb6_lock);
1056
1057         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1058         saved_fn = fn;
1059
1060         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061                 oif = 0;
1062
1063 redo_rt6_select:
1064         rt = rt6_select(fn, oif, strict);
1065         if (rt->rt6i_nsiblings)
1066                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1067         if (rt == net->ipv6.ip6_null_entry) {
1068                 fn = fib6_backtrack(fn, &fl6->saddr);
1069                 if (fn)
1070                         goto redo_rt6_select;
1071                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072                         /* also consider unreachable route */
1073                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1074                         fn = saved_fn;
1075                         goto redo_rt6_select;
1076                 }
1077         }
1078
1079
1080         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1081                 dst_use(&rt->dst, jiffies);
1082                 read_unlock_bh(&table->tb6_lock);
1083
1084                 rt6_dst_from_metrics_check(rt);
1085
1086                 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1087                 return rt;
1088         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1090                 /* Create a RTF_CACHE clone which will not be
1091                  * owned by the fib6 tree.  It is for the special case where
1092                  * the daddr in the skb during the neighbor look-up is different
1093                  * from the fl6->daddr used to look-up route here.
1094                  */
1095
1096                 struct rt6_info *uncached_rt;
1097
1098                 dst_use(&rt->dst, jiffies);
1099                 read_unlock_bh(&table->tb6_lock);
1100
1101                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102                 dst_release(&rt->dst);
1103
1104                 if (uncached_rt)
1105                         rt6_uncached_list_add(uncached_rt);
1106                 else
1107                         uncached_rt = net->ipv6.ip6_null_entry;
1108
1109                 dst_hold(&uncached_rt->dst);
1110
1111                 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1112                 return uncached_rt;
1113
1114         } else {
1115                 /* Get a percpu copy */
1116
1117                 struct rt6_info *pcpu_rt;
1118
1119                 rt->dst.lastuse = jiffies;
1120                 rt->dst.__use++;
1121                 pcpu_rt = rt6_get_pcpu_route(rt);
1122
1123                 if (pcpu_rt) {
1124                         read_unlock_bh(&table->tb6_lock);
1125                 } else {
1126                         /* We have to do the read_unlock first
1127                          * because rt6_make_pcpu_route() may trigger
1128                          * ip6_dst_gc() which will take the write_lock.
1129                          */
1130                         dst_hold(&rt->dst);
1131                         read_unlock_bh(&table->tb6_lock);
1132                         pcpu_rt = rt6_make_pcpu_route(rt);
1133                         dst_release(&rt->dst);
1134                 }
1135
1136                 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1137                 return pcpu_rt;
1138
1139         }
1140 }
1141
1142 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1143                                             struct flowi6 *fl6, int flags)
1144 {
1145         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1146 }
1147
1148 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149                                                 struct net_device *dev,
1150                                                 struct flowi6 *fl6, int flags)
1151 {
1152         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153                 flags |= RT6_LOOKUP_F_IFACE;
1154
1155         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156 }
1157
1158 void ip6_route_input(struct sk_buff *skb)
1159 {
1160         const struct ipv6hdr *iph = ipv6_hdr(skb);
1161         struct net *net = dev_net(skb->dev);
1162         int flags = RT6_LOOKUP_F_HAS_SADDR;
1163         struct ip_tunnel_info *tun_info;
1164         struct flowi6 fl6 = {
1165                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1166                 .daddr = iph->daddr,
1167                 .saddr = iph->saddr,
1168                 .flowlabel = ip6_flowinfo(iph),
1169                 .flowi6_mark = skb->mark,
1170                 .flowi6_proto = iph->nexthdr,
1171         };
1172
1173         tun_info = skb_tunnel_info(skb);
1174         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1175                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1176         skb_dst_drop(skb);
1177         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1178 }
1179
1180 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1181                                              struct flowi6 *fl6, int flags)
1182 {
1183         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1184 }
1185
1186 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1187                                     struct flowi6 *fl6)
1188 {
1189         struct dst_entry *dst;
1190         int flags = 0;
1191         bool any_src;
1192
1193         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194         if (dst)
1195                 return dst;
1196
1197         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
1199         any_src = ipv6_addr_any(&fl6->saddr);
1200         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201             (fl6->flowi6_oif && any_src))
1202                 flags |= RT6_LOOKUP_F_IFACE;
1203
1204         if (!any_src)
1205                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1206         else if (sk)
1207                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210 }
1211 EXPORT_SYMBOL(ip6_route_output);
1212
1213 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214 {
1215         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216         struct dst_entry *new = NULL;
1217
1218         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219         if (rt) {
1220                 rt6_info_init(rt);
1221
1222                 new = &rt->dst;
1223                 new->__use = 1;
1224                 new->input = dst_discard;
1225                 new->output = dst_discard_out;
1226
1227                 dst_copy_metrics(new, &ort->dst);
1228                 rt->rt6i_idev = ort->rt6i_idev;
1229                 if (rt->rt6i_idev)
1230                         in6_dev_hold(rt->rt6i_idev);
1231
1232                 rt->rt6i_gateway = ort->rt6i_gateway;
1233                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234                 rt->rt6i_metric = 0;
1235
1236                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237 #ifdef CONFIG_IPV6_SUBTREES
1238                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239 #endif
1240
1241                 dst_free(new);
1242         }
1243
1244         dst_release(dst_orig);
1245         return new ? new : ERR_PTR(-ENOMEM);
1246 }
1247
1248 /*
1249  *      Destination cache support functions
1250  */
1251
1252 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253 {
1254         if (rt->dst.from &&
1255             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257 }
1258
1259 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260 {
1261         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262                 return NULL;
1263
1264         if (rt6_check_expired(rt))
1265                 return NULL;
1266
1267         return &rt->dst;
1268 }
1269
1270 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271 {
1272         if (!__rt6_check_expired(rt) &&
1273             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275                 return &rt->dst;
1276         else
1277                 return NULL;
1278 }
1279
1280 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281 {
1282         struct rt6_info *rt;
1283
1284         rt = (struct rt6_info *) dst;
1285
1286         /* All IPV6 dsts are created with ->obsolete set to the value
1287          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288          * into this function always.
1289          */
1290
1291         rt6_dst_from_metrics_check(rt);
1292
1293         if (rt->rt6i_flags & RTF_PCPU ||
1294             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295                 return rt6_dst_from_check(rt, cookie);
1296         else
1297                 return rt6_check(rt, cookie);
1298 }
1299
1300 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301 {
1302         struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304         if (rt) {
1305                 if (rt->rt6i_flags & RTF_CACHE) {
1306                         if (rt6_check_expired(rt)) {
1307                                 ip6_del_rt(rt);
1308                                 dst = NULL;
1309                         }
1310                 } else {
1311                         dst_release(dst);
1312                         dst = NULL;
1313                 }
1314         }
1315         return dst;
1316 }
1317
1318 static void ip6_link_failure(struct sk_buff *skb)
1319 {
1320         struct rt6_info *rt;
1321
1322         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324         rt = (struct rt6_info *) skb_dst(skb);
1325         if (rt) {
1326                 if (rt->rt6i_flags & RTF_CACHE) {
1327                         dst_hold(&rt->dst);
1328                         ip6_del_rt(rt);
1329                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330                         rt->rt6i_node->fn_sernum = -1;
1331                 }
1332         }
1333 }
1334
1335 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336 {
1337         struct net *net = dev_net(rt->dst.dev);
1338
1339         rt->rt6i_flags |= RTF_MODIFIED;
1340         rt->rt6i_pmtu = mtu;
1341         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342 }
1343
1344 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345 {
1346         return !(rt->rt6i_flags & RTF_CACHE) &&
1347                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348 }
1349
1350 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351                                  const struct ipv6hdr *iph, u32 mtu)
1352 {
1353         struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355         if (rt6->rt6i_flags & RTF_LOCAL)
1356                 return;
1357
1358         dst_confirm(dst);
1359         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360         if (mtu >= dst_mtu(dst))
1361                 return;
1362
1363         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364                 rt6_do_update_pmtu(rt6, mtu);
1365         } else {
1366                 const struct in6_addr *daddr, *saddr;
1367                 struct rt6_info *nrt6;
1368
1369                 if (iph) {
1370                         daddr = &iph->daddr;
1371                         saddr = &iph->saddr;
1372                 } else if (sk) {
1373                         daddr = &sk->sk_v6_daddr;
1374                         saddr = &inet6_sk(sk)->saddr;
1375                 } else {
1376                         return;
1377                 }
1378                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379                 if (nrt6) {
1380                         rt6_do_update_pmtu(nrt6, mtu);
1381
1382                         /* ip6_ins_rt(nrt6) will bump the
1383                          * rt6->rt6i_node->fn_sernum
1384                          * which will fail the next rt6_check() and
1385                          * invalidate the sk->sk_dst_cache.
1386                          */
1387                         ip6_ins_rt(nrt6);
1388                 }
1389         }
1390 }
1391
1392 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393                                struct sk_buff *skb, u32 mtu)
1394 {
1395         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396 }
1397
1398 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399                      int oif, u32 mark)
1400 {
1401         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402         struct dst_entry *dst;
1403         struct flowi6 fl6;
1404
1405         memset(&fl6, 0, sizeof(fl6));
1406         fl6.flowi6_oif = oif;
1407         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408         fl6.daddr = iph->daddr;
1409         fl6.saddr = iph->saddr;
1410         fl6.flowlabel = ip6_flowinfo(iph);
1411
1412         dst = ip6_route_output(net, NULL, &fl6);
1413         if (!dst->error)
1414                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415         dst_release(dst);
1416 }
1417 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420 {
1421         ip6_update_pmtu(skb, sock_net(sk), mtu,
1422                         sk->sk_bound_dev_if, sk->sk_mark);
1423 }
1424 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1425
1426 /* Handle redirects */
1427 struct ip6rd_flowi {
1428         struct flowi6 fl6;
1429         struct in6_addr gateway;
1430 };
1431
1432 static struct rt6_info *__ip6_route_redirect(struct net *net,
1433                                              struct fib6_table *table,
1434                                              struct flowi6 *fl6,
1435                                              int flags)
1436 {
1437         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1438         struct rt6_info *rt;
1439         struct fib6_node *fn;
1440
1441         /* Get the "current" route for this destination and
1442          * check if the redirect has come from approriate router.
1443          *
1444          * RFC 4861 specifies that redirects should only be
1445          * accepted if they come from the nexthop to the target.
1446          * Due to the way the routes are chosen, this notion
1447          * is a bit fuzzy and one might need to check all possible
1448          * routes.
1449          */
1450
1451         read_lock_bh(&table->tb6_lock);
1452         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1453 restart:
1454         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1455                 if (rt6_check_expired(rt))
1456                         continue;
1457                 if (rt->dst.error)
1458                         break;
1459                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1460                         continue;
1461                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1462                         continue;
1463                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1464                         continue;
1465                 break;
1466         }
1467
1468         if (!rt)
1469                 rt = net->ipv6.ip6_null_entry;
1470         else if (rt->dst.error) {
1471                 rt = net->ipv6.ip6_null_entry;
1472                 goto out;
1473         }
1474
1475         if (rt == net->ipv6.ip6_null_entry) {
1476                 fn = fib6_backtrack(fn, &fl6->saddr);
1477                 if (fn)
1478                         goto restart;
1479         }
1480
1481 out:
1482         dst_hold(&rt->dst);
1483
1484         read_unlock_bh(&table->tb6_lock);
1485
1486         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1487         return rt;
1488 };
1489
1490 static struct dst_entry *ip6_route_redirect(struct net *net,
1491                                         const struct flowi6 *fl6,
1492                                         const struct in6_addr *gateway)
1493 {
1494         int flags = RT6_LOOKUP_F_HAS_SADDR;
1495         struct ip6rd_flowi rdfl;
1496
1497         rdfl.fl6 = *fl6;
1498         rdfl.gateway = *gateway;
1499
1500         return fib6_rule_lookup(net, &rdfl.fl6,
1501                                 flags, __ip6_route_redirect);
1502 }
1503
1504 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1505 {
1506         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1507         struct dst_entry *dst;
1508         struct flowi6 fl6;
1509
1510         memset(&fl6, 0, sizeof(fl6));
1511         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1512         fl6.flowi6_oif = oif;
1513         fl6.flowi6_mark = mark;
1514         fl6.daddr = iph->daddr;
1515         fl6.saddr = iph->saddr;
1516         fl6.flowlabel = ip6_flowinfo(iph);
1517
1518         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1519         rt6_do_redirect(dst, NULL, skb);
1520         dst_release(dst);
1521 }
1522 EXPORT_SYMBOL_GPL(ip6_redirect);
1523
1524 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1525                             u32 mark)
1526 {
1527         const struct ipv6hdr *iph = ipv6_hdr(skb);
1528         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1529         struct dst_entry *dst;
1530         struct flowi6 fl6;
1531
1532         memset(&fl6, 0, sizeof(fl6));
1533         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1534         fl6.flowi6_oif = oif;
1535         fl6.flowi6_mark = mark;
1536         fl6.daddr = msg->dest;
1537         fl6.saddr = iph->daddr;
1538
1539         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1540         rt6_do_redirect(dst, NULL, skb);
1541         dst_release(dst);
1542 }
1543
1544 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1545 {
1546         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1547 }
1548 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1549
1550 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1551 {
1552         struct net_device *dev = dst->dev;
1553         unsigned int mtu = dst_mtu(dst);
1554         struct net *net = dev_net(dev);
1555
1556         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1557
1558         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1559                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1560
1561         /*
1562          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1563          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1564          * IPV6_MAXPLEN is also valid and means: "any MSS,
1565          * rely only on pmtu discovery"
1566          */
1567         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1568                 mtu = IPV6_MAXPLEN;
1569         return mtu;
1570 }
1571
1572 static unsigned int ip6_mtu(const struct dst_entry *dst)
1573 {
1574         const struct rt6_info *rt = (const struct rt6_info *)dst;
1575         unsigned int mtu = rt->rt6i_pmtu;
1576         struct inet6_dev *idev;
1577
1578         if (mtu)
1579                 goto out;
1580
1581         mtu = dst_metric_raw(dst, RTAX_MTU);
1582         if (mtu)
1583                 goto out;
1584
1585         mtu = IPV6_MIN_MTU;
1586
1587         rcu_read_lock();
1588         idev = __in6_dev_get(dst->dev);
1589         if (idev)
1590                 mtu = idev->cnf.mtu6;
1591         rcu_read_unlock();
1592
1593 out:
1594         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1595 }
1596
1597 static struct dst_entry *icmp6_dst_gc_list;
1598 static DEFINE_SPINLOCK(icmp6_dst_lock);
1599
1600 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1601                                   struct flowi6 *fl6)
1602 {
1603         struct dst_entry *dst;
1604         struct rt6_info *rt;
1605         struct inet6_dev *idev = in6_dev_get(dev);
1606         struct net *net = dev_net(dev);
1607
1608         if (unlikely(!idev))
1609                 return ERR_PTR(-ENODEV);
1610
1611         rt = ip6_dst_alloc(net, dev, 0);
1612         if (unlikely(!rt)) {
1613                 in6_dev_put(idev);
1614                 dst = ERR_PTR(-ENOMEM);
1615                 goto out;
1616         }
1617
1618         rt->dst.flags |= DST_HOST;
1619         rt->dst.output  = ip6_output;
1620         atomic_set(&rt->dst.__refcnt, 1);
1621         rt->rt6i_gateway  = fl6->daddr;
1622         rt->rt6i_dst.addr = fl6->daddr;
1623         rt->rt6i_dst.plen = 128;
1624         rt->rt6i_idev     = idev;
1625         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1626
1627         spin_lock_bh(&icmp6_dst_lock);
1628         rt->dst.next = icmp6_dst_gc_list;
1629         icmp6_dst_gc_list = &rt->dst;
1630         spin_unlock_bh(&icmp6_dst_lock);
1631
1632         fib6_force_start_gc(net);
1633
1634         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1635
1636 out:
1637         return dst;
1638 }
1639
1640 int icmp6_dst_gc(void)
1641 {
1642         struct dst_entry *dst, **pprev;
1643         int more = 0;
1644
1645         spin_lock_bh(&icmp6_dst_lock);
1646         pprev = &icmp6_dst_gc_list;
1647
1648         while ((dst = *pprev) != NULL) {
1649                 if (!atomic_read(&dst->__refcnt)) {
1650                         *pprev = dst->next;
1651                         dst_free(dst);
1652                 } else {
1653                         pprev = &dst->next;
1654                         ++more;
1655                 }
1656         }
1657
1658         spin_unlock_bh(&icmp6_dst_lock);
1659
1660         return more;
1661 }
1662
1663 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1664                             void *arg)
1665 {
1666         struct dst_entry *dst, **pprev;
1667
1668         spin_lock_bh(&icmp6_dst_lock);
1669         pprev = &icmp6_dst_gc_list;
1670         while ((dst = *pprev) != NULL) {
1671                 struct rt6_info *rt = (struct rt6_info *) dst;
1672                 if (func(rt, arg)) {
1673                         *pprev = dst->next;
1674                         dst_free(dst);
1675                 } else {
1676                         pprev = &dst->next;
1677                 }
1678         }
1679         spin_unlock_bh(&icmp6_dst_lock);
1680 }
1681
1682 static int ip6_dst_gc(struct dst_ops *ops)
1683 {
1684         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1685         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1686         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1687         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1688         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1689         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1690         int entries;
1691
1692         entries = dst_entries_get_fast(ops);
1693         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1694             entries <= rt_max_size)
1695                 goto out;
1696
1697         net->ipv6.ip6_rt_gc_expire++;
1698         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1699         entries = dst_entries_get_slow(ops);
1700         if (entries < ops->gc_thresh)
1701                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1702 out:
1703         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1704         return entries > rt_max_size;
1705 }
1706
1707 static int ip6_convert_metrics(struct mx6_config *mxc,
1708                                const struct fib6_config *cfg)
1709 {
1710         bool ecn_ca = false;
1711         struct nlattr *nla;
1712         int remaining;
1713         u32 *mp;
1714
1715         if (!cfg->fc_mx)
1716                 return 0;
1717
1718         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1719         if (unlikely(!mp))
1720                 return -ENOMEM;
1721
1722         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1723                 int type = nla_type(nla);
1724                 u32 val;
1725
1726                 if (!type)
1727                         continue;
1728                 if (unlikely(type > RTAX_MAX))
1729                         goto err;
1730
1731                 if (type == RTAX_CC_ALGO) {
1732                         char tmp[TCP_CA_NAME_MAX];
1733
1734                         nla_strlcpy(tmp, nla, sizeof(tmp));
1735                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1736                         if (val == TCP_CA_UNSPEC)
1737                                 goto err;
1738                 } else {
1739                         val = nla_get_u32(nla);
1740                 }
1741                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1742                         goto err;
1743
1744                 mp[type - 1] = val;
1745                 __set_bit(type - 1, mxc->mx_valid);
1746         }
1747
1748         if (ecn_ca) {
1749                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1750                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1751         }
1752
1753         mxc->mx = mp;
1754         return 0;
1755  err:
1756         kfree(mp);
1757         return -EINVAL;
1758 }
1759
1760 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1761 {
1762         struct net *net = cfg->fc_nlinfo.nl_net;
1763         struct rt6_info *rt = NULL;
1764         struct net_device *dev = NULL;
1765         struct inet6_dev *idev = NULL;
1766         struct fib6_table *table;
1767         int addr_type;
1768         int err = -EINVAL;
1769
1770         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1771                 goto out;
1772 #ifndef CONFIG_IPV6_SUBTREES
1773         if (cfg->fc_src_len)
1774                 goto out;
1775 #endif
1776         if (cfg->fc_ifindex) {
1777                 err = -ENODEV;
1778                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1779                 if (!dev)
1780                         goto out;
1781                 idev = in6_dev_get(dev);
1782                 if (!idev)
1783                         goto out;
1784         }
1785
1786         if (cfg->fc_metric == 0)
1787                 cfg->fc_metric = IP6_RT_PRIO_USER;
1788
1789         err = -ENOBUFS;
1790         if (cfg->fc_nlinfo.nlh &&
1791             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1792                 table = fib6_get_table(net, cfg->fc_table);
1793                 if (!table) {
1794                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1795                         table = fib6_new_table(net, cfg->fc_table);
1796                 }
1797         } else {
1798                 table = fib6_new_table(net, cfg->fc_table);
1799         }
1800
1801         if (!table)
1802                 goto out;
1803
1804         rt = ip6_dst_alloc(net, NULL,
1805                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1806
1807         if (!rt) {
1808                 err = -ENOMEM;
1809                 goto out;
1810         }
1811
1812         if (cfg->fc_flags & RTF_EXPIRES)
1813                 rt6_set_expires(rt, jiffies +
1814                                 clock_t_to_jiffies(cfg->fc_expires));
1815         else
1816                 rt6_clean_expires(rt);
1817
1818         if (cfg->fc_protocol == RTPROT_UNSPEC)
1819                 cfg->fc_protocol = RTPROT_BOOT;
1820         rt->rt6i_protocol = cfg->fc_protocol;
1821
1822         addr_type = ipv6_addr_type(&cfg->fc_dst);
1823
1824         if (addr_type & IPV6_ADDR_MULTICAST)
1825                 rt->dst.input = ip6_mc_input;
1826         else if (cfg->fc_flags & RTF_LOCAL)
1827                 rt->dst.input = ip6_input;
1828         else
1829                 rt->dst.input = ip6_forward;
1830
1831         rt->dst.output = ip6_output;
1832
1833         if (cfg->fc_encap) {
1834                 struct lwtunnel_state *lwtstate;
1835
1836                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1837                                            cfg->fc_encap, AF_INET6, cfg,
1838                                            &lwtstate);
1839                 if (err)
1840                         goto out;
1841                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1842                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1843                         rt->dst.lwtstate->orig_output = rt->dst.output;
1844                         rt->dst.output = lwtunnel_output;
1845                 }
1846                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1847                         rt->dst.lwtstate->orig_input = rt->dst.input;
1848                         rt->dst.input = lwtunnel_input;
1849                 }
1850         }
1851
1852         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1853         rt->rt6i_dst.plen = cfg->fc_dst_len;
1854         if (rt->rt6i_dst.plen == 128)
1855                 rt->dst.flags |= DST_HOST;
1856
1857 #ifdef CONFIG_IPV6_SUBTREES
1858         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1859         rt->rt6i_src.plen = cfg->fc_src_len;
1860 #endif
1861
1862         rt->rt6i_metric = cfg->fc_metric;
1863
1864         /* We cannot add true routes via loopback here,
1865            they would result in kernel looping; promote them to reject routes
1866          */
1867         if ((cfg->fc_flags & RTF_REJECT) ||
1868             (dev && (dev->flags & IFF_LOOPBACK) &&
1869              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1870              !(cfg->fc_flags & RTF_LOCAL))) {
1871                 /* hold loopback dev/idev if we haven't done so. */
1872                 if (dev != net->loopback_dev) {
1873                         if (dev) {
1874                                 dev_put(dev);
1875                                 in6_dev_put(idev);
1876                         }
1877                         dev = net->loopback_dev;
1878                         dev_hold(dev);
1879                         idev = in6_dev_get(dev);
1880                         if (!idev) {
1881                                 err = -ENODEV;
1882                                 goto out;
1883                         }
1884                 }
1885                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1886                 switch (cfg->fc_type) {
1887                 case RTN_BLACKHOLE:
1888                         rt->dst.error = -EINVAL;
1889                         rt->dst.output = dst_discard_out;
1890                         rt->dst.input = dst_discard;
1891                         break;
1892                 case RTN_PROHIBIT:
1893                         rt->dst.error = -EACCES;
1894                         rt->dst.output = ip6_pkt_prohibit_out;
1895                         rt->dst.input = ip6_pkt_prohibit;
1896                         break;
1897                 case RTN_THROW:
1898                 case RTN_UNREACHABLE:
1899                 default:
1900                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1901                                         : (cfg->fc_type == RTN_UNREACHABLE)
1902                                         ? -EHOSTUNREACH : -ENETUNREACH;
1903                         rt->dst.output = ip6_pkt_discard_out;
1904                         rt->dst.input = ip6_pkt_discard;
1905                         break;
1906                 }
1907                 goto install_route;
1908         }
1909
1910         if (cfg->fc_flags & RTF_GATEWAY) {
1911                 const struct in6_addr *gw_addr;
1912                 int gwa_type;
1913
1914                 gw_addr = &cfg->fc_gateway;
1915                 gwa_type = ipv6_addr_type(gw_addr);
1916
1917                 /* if gw_addr is local we will fail to detect this in case
1918                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1919                  * will return already-added prefix route via interface that
1920                  * prefix route was assigned to, which might be non-loopback.
1921                  */
1922                 err = -EINVAL;
1923                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1924                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1925                                             dev : NULL, 0, 0))
1926                         goto out;
1927
1928                 rt->rt6i_gateway = *gw_addr;
1929
1930                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1931                         struct rt6_info *grt;
1932
1933                         /* IPv6 strictly inhibits using not link-local
1934                            addresses as nexthop address.
1935                            Otherwise, router will not able to send redirects.
1936                            It is very good, but in some (rare!) circumstances
1937                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1938                            some exceptions. --ANK
1939                          */
1940                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1941                                 goto out;
1942
1943                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1944
1945                         err = -EHOSTUNREACH;
1946                         if (!grt)
1947                                 goto out;
1948                         if (dev) {
1949                                 if (dev != grt->dst.dev) {
1950                                         ip6_rt_put(grt);
1951                                         goto out;
1952                                 }
1953                         } else {
1954                                 dev = grt->dst.dev;
1955                                 idev = grt->rt6i_idev;
1956                                 dev_hold(dev);
1957                                 in6_dev_hold(grt->rt6i_idev);
1958                         }
1959                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1960                                 err = 0;
1961                         ip6_rt_put(grt);
1962
1963                         if (err)
1964                                 goto out;
1965                 }
1966                 err = -EINVAL;
1967                 if (!dev || (dev->flags & IFF_LOOPBACK))
1968                         goto out;
1969         }
1970
1971         err = -ENODEV;
1972         if (!dev)
1973                 goto out;
1974
1975         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1976                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1977                         err = -EINVAL;
1978                         goto out;
1979                 }
1980                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1981                 rt->rt6i_prefsrc.plen = 128;
1982         } else
1983                 rt->rt6i_prefsrc.plen = 0;
1984
1985         rt->rt6i_flags = cfg->fc_flags;
1986
1987 install_route:
1988         rt->dst.dev = dev;
1989         rt->rt6i_idev = idev;
1990         rt->rt6i_table = table;
1991
1992         cfg->fc_nlinfo.nl_net = dev_net(dev);
1993
1994         return rt;
1995 out:
1996         if (dev)
1997                 dev_put(dev);
1998         if (idev)
1999                 in6_dev_put(idev);
2000         if (rt)
2001                 dst_free(&rt->dst);
2002
2003         return ERR_PTR(err);
2004 }
2005
2006 int ip6_route_add(struct fib6_config *cfg)
2007 {
2008         struct mx6_config mxc = { .mx = NULL, };
2009         struct rt6_info *rt;
2010         int err;
2011
2012         rt = ip6_route_info_create(cfg);
2013         if (IS_ERR(rt)) {
2014                 err = PTR_ERR(rt);
2015                 rt = NULL;
2016                 goto out;
2017         }
2018
2019         err = ip6_convert_metrics(&mxc, cfg);
2020         if (err)
2021                 goto out;
2022
2023         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2024
2025         kfree(mxc.mx);
2026
2027         return err;
2028 out:
2029         if (rt)
2030                 dst_free(&rt->dst);
2031
2032         return err;
2033 }
2034
2035 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2036 {
2037         int err;
2038         struct fib6_table *table;
2039         struct net *net = dev_net(rt->dst.dev);
2040
2041         if (rt == net->ipv6.ip6_null_entry ||
2042             rt->dst.flags & DST_NOCACHE) {
2043                 err = -ENOENT;
2044                 goto out;
2045         }
2046
2047         table = rt->rt6i_table;
2048         write_lock_bh(&table->tb6_lock);
2049         err = fib6_del(rt, info);
2050         write_unlock_bh(&table->tb6_lock);
2051
2052 out:
2053         ip6_rt_put(rt);
2054         return err;
2055 }
2056
2057 int ip6_del_rt(struct rt6_info *rt)
2058 {
2059         struct nl_info info = {
2060                 .nl_net = dev_net(rt->dst.dev),
2061         };
2062         return __ip6_del_rt(rt, &info);
2063 }
2064
2065 static int ip6_route_del(struct fib6_config *cfg)
2066 {
2067         struct fib6_table *table;
2068         struct fib6_node *fn;
2069         struct rt6_info *rt;
2070         int err = -ESRCH;
2071
2072         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2073         if (!table)
2074                 return err;
2075
2076         read_lock_bh(&table->tb6_lock);
2077
2078         fn = fib6_locate(&table->tb6_root,
2079                          &cfg->fc_dst, cfg->fc_dst_len,
2080                          &cfg->fc_src, cfg->fc_src_len);
2081
2082         if (fn) {
2083                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2084                         if ((rt->rt6i_flags & RTF_CACHE) &&
2085                             !(cfg->fc_flags & RTF_CACHE))
2086                                 continue;
2087                         if (cfg->fc_ifindex &&
2088                             (!rt->dst.dev ||
2089                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2090                                 continue;
2091                         if (cfg->fc_flags & RTF_GATEWAY &&
2092                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2093                                 continue;
2094                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2095                                 continue;
2096                         dst_hold(&rt->dst);
2097                         read_unlock_bh(&table->tb6_lock);
2098
2099                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2100                 }
2101         }
2102         read_unlock_bh(&table->tb6_lock);
2103
2104         return err;
2105 }
2106
2107 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2108 {
2109         struct netevent_redirect netevent;
2110         struct rt6_info *rt, *nrt = NULL;
2111         struct ndisc_options ndopts;
2112         struct inet6_dev *in6_dev;
2113         struct neighbour *neigh;
2114         struct rd_msg *msg;
2115         int optlen, on_link;
2116         u8 *lladdr;
2117
2118         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2119         optlen -= sizeof(*msg);
2120
2121         if (optlen < 0) {
2122                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2123                 return;
2124         }
2125
2126         msg = (struct rd_msg *)icmp6_hdr(skb);
2127
2128         if (ipv6_addr_is_multicast(&msg->dest)) {
2129                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2130                 return;
2131         }
2132
2133         on_link = 0;
2134         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2135                 on_link = 1;
2136         } else if (ipv6_addr_type(&msg->target) !=
2137                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2138                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2139                 return;
2140         }
2141
2142         in6_dev = __in6_dev_get(skb->dev);
2143         if (!in6_dev)
2144                 return;
2145         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2146                 return;
2147
2148         /* RFC2461 8.1:
2149          *      The IP source address of the Redirect MUST be the same as the current
2150          *      first-hop router for the specified ICMP Destination Address.
2151          */
2152
2153         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2154                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2155                 return;
2156         }
2157
2158         lladdr = NULL;
2159         if (ndopts.nd_opts_tgt_lladdr) {
2160                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2161                                              skb->dev);
2162                 if (!lladdr) {
2163                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2164                         return;
2165                 }
2166         }
2167
2168         rt = (struct rt6_info *) dst;
2169         if (rt->rt6i_flags & RTF_REJECT) {
2170                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2171                 return;
2172         }
2173
2174         /* Redirect received -> path was valid.
2175          * Look, redirects are sent only in response to data packets,
2176          * so that this nexthop apparently is reachable. --ANK
2177          */
2178         dst_confirm(&rt->dst);
2179
2180         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2181         if (!neigh)
2182                 return;
2183
2184         /*
2185          *      We have finally decided to accept it.
2186          */
2187
2188         neigh_update(neigh, lladdr, NUD_STALE,
2189                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2190                      NEIGH_UPDATE_F_OVERRIDE|
2191                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2192                                      NEIGH_UPDATE_F_ISROUTER))
2193                      );
2194
2195         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2196         if (!nrt)
2197                 goto out;
2198
2199         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2200         if (on_link)
2201                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2202
2203         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2204
2205         if (ip6_ins_rt(nrt))
2206                 goto out;
2207
2208         netevent.old = &rt->dst;
2209         netevent.new = &nrt->dst;
2210         netevent.daddr = &msg->dest;
2211         netevent.neigh = neigh;
2212         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2213
2214         if (rt->rt6i_flags & RTF_CACHE) {
2215                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2216                 ip6_del_rt(rt);
2217         }
2218
2219 out:
2220         neigh_release(neigh);
2221 }
2222
2223 /*
2224  *      Misc support functions
2225  */
2226
2227 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2228 {
2229         BUG_ON(from->dst.from);
2230
2231         rt->rt6i_flags &= ~RTF_EXPIRES;
2232         dst_hold(&from->dst);
2233         rt->dst.from = &from->dst;
2234         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2235 }
2236
2237 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2238 {
2239         rt->dst.input = ort->dst.input;
2240         rt->dst.output = ort->dst.output;
2241         rt->rt6i_dst = ort->rt6i_dst;
2242         rt->dst.error = ort->dst.error;
2243         rt->rt6i_idev = ort->rt6i_idev;
2244         if (rt->rt6i_idev)
2245                 in6_dev_hold(rt->rt6i_idev);
2246         rt->dst.lastuse = jiffies;
2247         rt->rt6i_gateway = ort->rt6i_gateway;
2248         rt->rt6i_flags = ort->rt6i_flags;
2249         rt6_set_from(rt, ort);
2250         rt->rt6i_metric = ort->rt6i_metric;
2251 #ifdef CONFIG_IPV6_SUBTREES
2252         rt->rt6i_src = ort->rt6i_src;
2253 #endif
2254         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2255         rt->rt6i_table = ort->rt6i_table;
2256         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2257 }
2258
2259 #ifdef CONFIG_IPV6_ROUTE_INFO
2260 static struct rt6_info *rt6_get_route_info(struct net *net,
2261                                            const struct in6_addr *prefix, int prefixlen,
2262                                            const struct in6_addr *gwaddr, int ifindex)
2263 {
2264         struct fib6_node *fn;
2265         struct rt6_info *rt = NULL;
2266         struct fib6_table *table;
2267
2268         table = fib6_get_table(net, RT6_TABLE_INFO);
2269         if (!table)
2270                 return NULL;
2271
2272         read_lock_bh(&table->tb6_lock);
2273         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2274         if (!fn)
2275                 goto out;
2276
2277         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2278                 if (rt->dst.dev->ifindex != ifindex)
2279                         continue;
2280                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2281                         continue;
2282                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2283                         continue;
2284                 dst_hold(&rt->dst);
2285                 break;
2286         }
2287 out:
2288         read_unlock_bh(&table->tb6_lock);
2289         return rt;
2290 }
2291
2292 static struct rt6_info *rt6_add_route_info(struct net *net,
2293                                            const struct in6_addr *prefix, int prefixlen,
2294                                            const struct in6_addr *gwaddr, int ifindex,
2295                                            unsigned int pref)
2296 {
2297         struct fib6_config cfg = {
2298                 .fc_metric      = IP6_RT_PRIO_USER,
2299                 .fc_ifindex     = ifindex,
2300                 .fc_dst_len     = prefixlen,
2301                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2302                                   RTF_UP | RTF_PREF(pref),
2303                 .fc_nlinfo.portid = 0,
2304                 .fc_nlinfo.nlh = NULL,
2305                 .fc_nlinfo.nl_net = net,
2306         };
2307
2308         cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2309         cfg.fc_dst = *prefix;
2310         cfg.fc_gateway = *gwaddr;
2311
2312         /* We should treat it as a default route if prefix length is 0. */
2313         if (!prefixlen)
2314                 cfg.fc_flags |= RTF_DEFAULT;
2315
2316         ip6_route_add(&cfg);
2317
2318         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2319 }
2320 #endif
2321
2322 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2323 {
2324         struct rt6_info *rt;
2325         struct fib6_table *table;
2326
2327         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2328         if (!table)
2329                 return NULL;
2330
2331         read_lock_bh(&table->tb6_lock);
2332         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2333                 if (dev == rt->dst.dev &&
2334                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2335                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2336                         break;
2337         }
2338         if (rt)
2339                 dst_hold(&rt->dst);
2340         read_unlock_bh(&table->tb6_lock);
2341         return rt;
2342 }
2343
2344 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2345                                      struct net_device *dev,
2346                                      unsigned int pref)
2347 {
2348         struct fib6_config cfg = {
2349                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2350                 .fc_metric      = IP6_RT_PRIO_USER,
2351                 .fc_ifindex     = dev->ifindex,
2352                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2353                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2354                 .fc_nlinfo.portid = 0,
2355                 .fc_nlinfo.nlh = NULL,
2356                 .fc_nlinfo.nl_net = dev_net(dev),
2357         };
2358
2359         cfg.fc_gateway = *gwaddr;
2360
2361         ip6_route_add(&cfg);
2362
2363         return rt6_get_dflt_router(gwaddr, dev);
2364 }
2365
2366 void rt6_purge_dflt_routers(struct net *net)
2367 {
2368         struct rt6_info *rt;
2369         struct fib6_table *table;
2370
2371         /* NOTE: Keep consistent with rt6_get_dflt_router */
2372         table = fib6_get_table(net, RT6_TABLE_DFLT);
2373         if (!table)
2374                 return;
2375
2376 restart:
2377         read_lock_bh(&table->tb6_lock);
2378         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2379                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2380                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2381                         dst_hold(&rt->dst);
2382                         read_unlock_bh(&table->tb6_lock);
2383                         ip6_del_rt(rt);
2384                         goto restart;
2385                 }
2386         }
2387         read_unlock_bh(&table->tb6_lock);
2388 }
2389
2390 static void rtmsg_to_fib6_config(struct net *net,
2391                                  struct in6_rtmsg *rtmsg,
2392                                  struct fib6_config *cfg)
2393 {
2394         memset(cfg, 0, sizeof(*cfg));
2395
2396         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2397                          : RT6_TABLE_MAIN;
2398         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2399         cfg->fc_metric = rtmsg->rtmsg_metric;
2400         cfg->fc_expires = rtmsg->rtmsg_info;
2401         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2402         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2403         cfg->fc_flags = rtmsg->rtmsg_flags;
2404
2405         cfg->fc_nlinfo.nl_net = net;
2406
2407         cfg->fc_dst = rtmsg->rtmsg_dst;
2408         cfg->fc_src = rtmsg->rtmsg_src;
2409         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2410 }
2411
2412 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2413 {
2414         struct fib6_config cfg;
2415         struct in6_rtmsg rtmsg;
2416         int err;
2417
2418         switch (cmd) {
2419         case SIOCADDRT:         /* Add a route */
2420         case SIOCDELRT:         /* Delete a route */
2421                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2422                         return -EPERM;
2423                 err = copy_from_user(&rtmsg, arg,
2424                                      sizeof(struct in6_rtmsg));
2425                 if (err)
2426                         return -EFAULT;
2427
2428                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2429
2430                 rtnl_lock();
2431                 switch (cmd) {
2432                 case SIOCADDRT:
2433                         err = ip6_route_add(&cfg);
2434                         break;
2435                 case SIOCDELRT:
2436                         err = ip6_route_del(&cfg);
2437                         break;
2438                 default:
2439                         err = -EINVAL;
2440                 }
2441                 rtnl_unlock();
2442
2443                 return err;
2444         }
2445
2446         return -EINVAL;
2447 }
2448
2449 /*
2450  *      Drop the packet on the floor
2451  */
2452
2453 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2454 {
2455         int type;
2456         struct dst_entry *dst = skb_dst(skb);
2457         switch (ipstats_mib_noroutes) {
2458         case IPSTATS_MIB_INNOROUTES:
2459                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2460                 if (type == IPV6_ADDR_ANY) {
2461                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2462                                       IPSTATS_MIB_INADDRERRORS);
2463                         break;
2464                 }
2465                 /* FALLTHROUGH */
2466         case IPSTATS_MIB_OUTNOROUTES:
2467                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2468                               ipstats_mib_noroutes);
2469                 break;
2470         }
2471         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2472         kfree_skb(skb);
2473         return 0;
2474 }
2475
2476 static int ip6_pkt_discard(struct sk_buff *skb)
2477 {
2478         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2479 }
2480
2481 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2482 {
2483         skb->dev = skb_dst(skb)->dev;
2484         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2485 }
2486
2487 static int ip6_pkt_prohibit(struct sk_buff *skb)
2488 {
2489         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2490 }
2491
2492 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2493 {
2494         skb->dev = skb_dst(skb)->dev;
2495         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2496 }
2497
2498 /*
2499  *      Allocate a dst for local (unicast / anycast) address.
2500  */
2501
2502 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2503                                     const struct in6_addr *addr,
2504                                     bool anycast)
2505 {
2506         u32 tb_id;
2507         struct net *net = dev_net(idev->dev);
2508         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2509                                             DST_NOCOUNT);
2510         if (!rt)
2511                 return ERR_PTR(-ENOMEM);
2512
2513         in6_dev_hold(idev);
2514
2515         rt->dst.flags |= DST_HOST;
2516         rt->dst.input = ip6_input;
2517         rt->dst.output = ip6_output;
2518         rt->rt6i_idev = idev;
2519
2520         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2521         if (anycast)
2522                 rt->rt6i_flags |= RTF_ANYCAST;
2523         else
2524                 rt->rt6i_flags |= RTF_LOCAL;
2525
2526         rt->rt6i_gateway  = *addr;
2527         rt->rt6i_dst.addr = *addr;
2528         rt->rt6i_dst.plen = 128;
2529         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2530         rt->rt6i_table = fib6_get_table(net, tb_id);
2531         rt->dst.flags |= DST_NOCACHE;
2532
2533         atomic_set(&rt->dst.__refcnt, 1);
2534
2535         return rt;
2536 }
2537
2538 int ip6_route_get_saddr(struct net *net,
2539                         struct rt6_info *rt,
2540                         const struct in6_addr *daddr,
2541                         unsigned int prefs,
2542                         struct in6_addr *saddr)
2543 {
2544         struct inet6_dev *idev =
2545                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2546         int err = 0;
2547         if (rt && rt->rt6i_prefsrc.plen)
2548                 *saddr = rt->rt6i_prefsrc.addr;
2549         else
2550                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2551                                          daddr, prefs, saddr);
2552         return err;
2553 }
2554
2555 /* remove deleted ip from prefsrc entries */
2556 struct arg_dev_net_ip {
2557         struct net_device *dev;
2558         struct net *net;
2559         struct in6_addr *addr;
2560 };
2561
2562 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2563 {
2564         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2565         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2566         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2567
2568         if (((void *)rt->dst.dev == dev || !dev) &&
2569             rt != net->ipv6.ip6_null_entry &&
2570             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2571                 /* remove prefsrc entry */
2572                 rt->rt6i_prefsrc.plen = 0;
2573         }
2574         return 0;
2575 }
2576
2577 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2578 {
2579         struct net *net = dev_net(ifp->idev->dev);
2580         struct arg_dev_net_ip adni = {
2581                 .dev = ifp->idev->dev,
2582                 .net = net,
2583                 .addr = &ifp->addr,
2584         };
2585         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2586 }
2587
2588 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2589 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2590
2591 /* Remove routers and update dst entries when gateway turn into host. */
2592 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2593 {
2594         struct in6_addr *gateway = (struct in6_addr *)arg;
2595
2596         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2597              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2598              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2599                 return -1;
2600         }
2601         return 0;
2602 }
2603
2604 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2605 {
2606         fib6_clean_all(net, fib6_clean_tohost, gateway);
2607 }
2608
2609 struct arg_dev_net {
2610         struct net_device *dev;
2611         struct net *net;
2612 };
2613
2614 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2615 {
2616         const struct arg_dev_net *adn = arg;
2617         const struct net_device *dev = adn->dev;
2618
2619         if ((rt->dst.dev == dev || !dev) &&
2620             rt != adn->net->ipv6.ip6_null_entry)
2621                 return -1;
2622
2623         return 0;
2624 }
2625
2626 void rt6_ifdown(struct net *net, struct net_device *dev)
2627 {
2628         struct arg_dev_net adn = {
2629                 .dev = dev,
2630                 .net = net,
2631         };
2632
2633         fib6_clean_all(net, fib6_ifdown, &adn);
2634         icmp6_clean_all(fib6_ifdown, &adn);
2635         if (dev)
2636                 rt6_uncached_list_flush_dev(net, dev);
2637 }
2638
2639 struct rt6_mtu_change_arg {
2640         struct net_device *dev;
2641         unsigned int mtu;
2642 };
2643
2644 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2645 {
2646         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2647         struct inet6_dev *idev;
2648
2649         /* In IPv6 pmtu discovery is not optional,
2650            so that RTAX_MTU lock cannot disable it.
2651            We still use this lock to block changes
2652            caused by addrconf/ndisc.
2653         */
2654
2655         idev = __in6_dev_get(arg->dev);
2656         if (!idev)
2657                 return 0;
2658
2659         /* For administrative MTU increase, there is no way to discover
2660            IPv6 PMTU increase, so PMTU increase should be updated here.
2661            Since RFC 1981 doesn't include administrative MTU increase
2662            update PMTU increase is a MUST. (i.e. jumbo frame)
2663          */
2664         /*
2665            If new MTU is less than route PMTU, this new MTU will be the
2666            lowest MTU in the path, update the route PMTU to reflect PMTU
2667            decreases; if new MTU is greater than route PMTU, and the
2668            old MTU is the lowest MTU in the path, update the route PMTU
2669            to reflect the increase. In this case if the other nodes' MTU
2670            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2671            PMTU discouvery.
2672          */
2673         if (rt->dst.dev == arg->dev &&
2674             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2675                 if (rt->rt6i_flags & RTF_CACHE) {
2676                         /* For RTF_CACHE with rt6i_pmtu == 0
2677                          * (i.e. a redirected route),
2678                          * the metrics of its rt->dst.from has already
2679                          * been updated.
2680                          */
2681                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2682                                 rt->rt6i_pmtu = arg->mtu;
2683                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2684                            (dst_mtu(&rt->dst) < arg->mtu &&
2685                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2686                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2687                 }
2688         }
2689         return 0;
2690 }
2691
2692 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2693 {
2694         struct rt6_mtu_change_arg arg = {
2695                 .dev = dev,
2696                 .mtu = mtu,
2697         };
2698
2699         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2700 }
2701
2702 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2703         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2704         [RTA_OIF]               = { .type = NLA_U32 },
2705         [RTA_IIF]               = { .type = NLA_U32 },
2706         [RTA_PRIORITY]          = { .type = NLA_U32 },
2707         [RTA_METRICS]           = { .type = NLA_NESTED },
2708         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2709         [RTA_PREF]              = { .type = NLA_U8 },
2710         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2711         [RTA_ENCAP]             = { .type = NLA_NESTED },
2712         [RTA_EXPIRES]           = { .type = NLA_U32 },
2713 };
2714
2715 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2716                               struct fib6_config *cfg)
2717 {
2718         struct rtmsg *rtm;
2719         struct nlattr *tb[RTA_MAX+1];
2720         unsigned int pref;
2721         int err;
2722
2723         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2724         if (err < 0)
2725                 goto errout;
2726
2727         err = -EINVAL;
2728         rtm = nlmsg_data(nlh);
2729         memset(cfg, 0, sizeof(*cfg));
2730
2731         cfg->fc_table = rtm->rtm_table;
2732         cfg->fc_dst_len = rtm->rtm_dst_len;
2733         cfg->fc_src_len = rtm->rtm_src_len;
2734         cfg->fc_flags = RTF_UP;
2735         cfg->fc_protocol = rtm->rtm_protocol;
2736         cfg->fc_type = rtm->rtm_type;
2737
2738         if (rtm->rtm_type == RTN_UNREACHABLE ||
2739             rtm->rtm_type == RTN_BLACKHOLE ||
2740             rtm->rtm_type == RTN_PROHIBIT ||
2741             rtm->rtm_type == RTN_THROW)
2742                 cfg->fc_flags |= RTF_REJECT;
2743
2744         if (rtm->rtm_type == RTN_LOCAL)
2745                 cfg->fc_flags |= RTF_LOCAL;
2746
2747         if (rtm->rtm_flags & RTM_F_CLONED)
2748                 cfg->fc_flags |= RTF_CACHE;
2749
2750         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2751         cfg->fc_nlinfo.nlh = nlh;
2752         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2753
2754         if (tb[RTA_GATEWAY]) {
2755                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2756                 cfg->fc_flags |= RTF_GATEWAY;
2757         }
2758
2759         if (tb[RTA_DST]) {
2760                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2761
2762                 if (nla_len(tb[RTA_DST]) < plen)
2763                         goto errout;
2764
2765                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2766         }
2767
2768         if (tb[RTA_SRC]) {
2769                 int plen = (rtm->rtm_src_len + 7) >> 3;
2770
2771                 if (nla_len(tb[RTA_SRC]) < plen)
2772                         goto errout;
2773
2774                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2775         }
2776
2777         if (tb[RTA_PREFSRC])
2778                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2779
2780         if (tb[RTA_OIF])
2781                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2782
2783         if (tb[RTA_PRIORITY])
2784                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2785
2786         if (tb[RTA_METRICS]) {
2787                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2788                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2789         }
2790
2791         if (tb[RTA_TABLE])
2792                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2793
2794         if (tb[RTA_MULTIPATH]) {
2795                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2796                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2797         }
2798
2799         if (tb[RTA_PREF]) {
2800                 pref = nla_get_u8(tb[RTA_PREF]);
2801                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2802                     pref != ICMPV6_ROUTER_PREF_HIGH)
2803                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2804                 cfg->fc_flags |= RTF_PREF(pref);
2805         }
2806
2807         if (tb[RTA_ENCAP])
2808                 cfg->fc_encap = tb[RTA_ENCAP];
2809
2810         if (tb[RTA_ENCAP_TYPE])
2811                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2812
2813         if (tb[RTA_EXPIRES]) {
2814                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2815
2816                 if (addrconf_finite_timeout(timeout)) {
2817                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2818                         cfg->fc_flags |= RTF_EXPIRES;
2819                 }
2820         }
2821
2822         err = 0;
2823 errout:
2824         return err;
2825 }
2826
2827 struct rt6_nh {
2828         struct rt6_info *rt6_info;
2829         struct fib6_config r_cfg;
2830         struct mx6_config mxc;
2831         struct list_head next;
2832 };
2833
2834 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2835 {
2836         struct rt6_nh *nh;
2837
2838         list_for_each_entry(nh, rt6_nh_list, next) {
2839                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2840                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2841                         nh->r_cfg.fc_ifindex);
2842         }
2843 }
2844
2845 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2846                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2847 {
2848         struct rt6_nh *nh;
2849         struct rt6_info *rtnh;
2850         int err = -EEXIST;
2851
2852         list_for_each_entry(nh, rt6_nh_list, next) {
2853                 /* check if rt6_info already exists */
2854                 rtnh = nh->rt6_info;
2855
2856                 if (rtnh->dst.dev == rt->dst.dev &&
2857                     rtnh->rt6i_idev == rt->rt6i_idev &&
2858                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2859                                     &rt->rt6i_gateway))
2860                         return err;
2861         }
2862
2863         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2864         if (!nh)
2865                 return -ENOMEM;
2866         nh->rt6_info = rt;
2867         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2868         if (err) {
2869                 kfree(nh);
2870                 return err;
2871         }
2872         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2873         list_add_tail(&nh->next, rt6_nh_list);
2874
2875         return 0;
2876 }
2877
2878 static int ip6_route_multipath_add(struct fib6_config *cfg)
2879 {
2880         struct fib6_config r_cfg;
2881         struct rtnexthop *rtnh;
2882         struct rt6_info *rt;
2883         struct rt6_nh *err_nh;
2884         struct rt6_nh *nh, *nh_safe;
2885         int remaining;
2886         int attrlen;
2887         int err = 1;
2888         int nhn = 0;
2889         int replace = (cfg->fc_nlinfo.nlh &&
2890                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2891         LIST_HEAD(rt6_nh_list);
2892
2893         remaining = cfg->fc_mp_len;
2894         rtnh = (struct rtnexthop *)cfg->fc_mp;
2895
2896         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2897          * rt6_info structs per nexthop
2898          */
2899         while (rtnh_ok(rtnh, remaining)) {
2900                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2901                 if (rtnh->rtnh_ifindex)
2902                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2903
2904                 attrlen = rtnh_attrlen(rtnh);
2905                 if (attrlen > 0) {
2906                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2907
2908                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2909                         if (nla) {
2910                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2911                                 r_cfg.fc_flags |= RTF_GATEWAY;
2912                         }
2913                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2914                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2915                         if (nla)
2916                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2917                 }
2918
2919                 rt = ip6_route_info_create(&r_cfg);
2920                 if (IS_ERR(rt)) {
2921                         err = PTR_ERR(rt);
2922                         rt = NULL;
2923                         goto cleanup;
2924                 }
2925
2926                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2927                 if (err) {
2928                         dst_free(&rt->dst);
2929                         goto cleanup;
2930                 }
2931
2932                 rtnh = rtnh_next(rtnh, &remaining);
2933         }
2934
2935         err_nh = NULL;
2936         list_for_each_entry(nh, &rt6_nh_list, next) {
2937                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2938                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2939                 nh->rt6_info = NULL;
2940                 if (err) {
2941                         if (replace && nhn)
2942                                 ip6_print_replace_route_err(&rt6_nh_list);
2943                         err_nh = nh;
2944                         goto add_errout;
2945                 }
2946
2947                 /* Because each route is added like a single route we remove
2948                  * these flags after the first nexthop: if there is a collision,
2949                  * we have already failed to add the first nexthop:
2950                  * fib6_add_rt2node() has rejected it; when replacing, old
2951                  * nexthops have been replaced by first new, the rest should
2952                  * be added to it.
2953                  */
2954                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2955                                                      NLM_F_REPLACE);
2956                 nhn++;
2957         }
2958
2959         goto cleanup;
2960
2961 add_errout:
2962         /* Delete routes that were already added */
2963         list_for_each_entry(nh, &rt6_nh_list, next) {
2964                 if (err_nh == nh)
2965                         break;
2966                 ip6_route_del(&nh->r_cfg);
2967         }
2968
2969 cleanup:
2970         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2971                 if (nh->rt6_info)
2972                         dst_free(&nh->rt6_info->dst);
2973                 kfree(nh->mxc.mx);
2974                 list_del(&nh->next);
2975                 kfree(nh);
2976         }
2977
2978         return err;
2979 }
2980
2981 static int ip6_route_multipath_del(struct fib6_config *cfg)
2982 {
2983         struct fib6_config r_cfg;
2984         struct rtnexthop *rtnh;
2985         int remaining;
2986         int attrlen;
2987         int err = 1, last_err = 0;
2988
2989         remaining = cfg->fc_mp_len;
2990         rtnh = (struct rtnexthop *)cfg->fc_mp;
2991
2992         /* Parse a Multipath Entry */
2993         while (rtnh_ok(rtnh, remaining)) {
2994                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2995                 if (rtnh->rtnh_ifindex)
2996                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2997
2998                 attrlen = rtnh_attrlen(rtnh);
2999                 if (attrlen > 0) {
3000                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3001
3002                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3003                         if (nla) {
3004                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3005                                 r_cfg.fc_flags |= RTF_GATEWAY;
3006                         }
3007                 }
3008                 err = ip6_route_del(&r_cfg);
3009                 if (err)
3010                         last_err = err;
3011
3012                 rtnh = rtnh_next(rtnh, &remaining);
3013         }
3014
3015         return last_err;
3016 }
3017
3018 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3019 {
3020         struct fib6_config cfg;
3021         int err;
3022
3023         err = rtm_to_fib6_config(skb, nlh, &cfg);
3024         if (err < 0)
3025                 return err;
3026
3027         if (cfg.fc_mp)
3028                 return ip6_route_multipath_del(&cfg);
3029         else
3030                 return ip6_route_del(&cfg);
3031 }
3032
3033 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3034 {
3035         struct fib6_config cfg;
3036         int err;
3037
3038         err = rtm_to_fib6_config(skb, nlh, &cfg);
3039         if (err < 0)
3040                 return err;
3041
3042         if (cfg.fc_mp)
3043                 return ip6_route_multipath_add(&cfg);
3044         else
3045                 return ip6_route_add(&cfg);
3046 }
3047
3048 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3049 {
3050         return NLMSG_ALIGN(sizeof(struct rtmsg))
3051                + nla_total_size(16) /* RTA_SRC */
3052                + nla_total_size(16) /* RTA_DST */
3053                + nla_total_size(16) /* RTA_GATEWAY */
3054                + nla_total_size(16) /* RTA_PREFSRC */
3055                + nla_total_size(4) /* RTA_TABLE */
3056                + nla_total_size(4) /* RTA_IIF */
3057                + nla_total_size(4) /* RTA_OIF */
3058                + nla_total_size(4) /* RTA_PRIORITY */
3059                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3060                + nla_total_size(sizeof(struct rta_cacheinfo))
3061                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3062                + nla_total_size(1) /* RTA_PREF */
3063                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3064 }
3065
3066 static int rt6_fill_node(struct net *net,
3067                          struct sk_buff *skb, struct rt6_info *rt,
3068                          struct in6_addr *dst, struct in6_addr *src,
3069                          int iif, int type, u32 portid, u32 seq,
3070                          int prefix, int nowait, unsigned int flags)
3071 {
3072         u32 metrics[RTAX_MAX];
3073         struct rtmsg *rtm;
3074         struct nlmsghdr *nlh;
3075         long expires;
3076         u32 table;
3077
3078         if (prefix) {   /* user wants prefix routes only */
3079                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3080                         /* success since this is not a prefix route */
3081                         return 1;
3082                 }
3083         }
3084
3085         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3086         if (!nlh)
3087                 return -EMSGSIZE;
3088
3089         rtm = nlmsg_data(nlh);
3090         rtm->rtm_family = AF_INET6;
3091         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3092         rtm->rtm_src_len = rt->rt6i_src.plen;
3093         rtm->rtm_tos = 0;
3094         if (rt->rt6i_table)
3095                 table = rt->rt6i_table->tb6_id;
3096         else
3097                 table = RT6_TABLE_UNSPEC;
3098         rtm->rtm_table = table;
3099         if (nla_put_u32(skb, RTA_TABLE, table))
3100                 goto nla_put_failure;
3101         if (rt->rt6i_flags & RTF_REJECT) {
3102                 switch (rt->dst.error) {
3103                 case -EINVAL:
3104                         rtm->rtm_type = RTN_BLACKHOLE;
3105                         break;
3106                 case -EACCES:
3107                         rtm->rtm_type = RTN_PROHIBIT;
3108                         break;
3109                 case -EAGAIN:
3110                         rtm->rtm_type = RTN_THROW;
3111                         break;
3112                 default:
3113                         rtm->rtm_type = RTN_UNREACHABLE;
3114                         break;
3115                 }
3116         }
3117         else if (rt->rt6i_flags & RTF_LOCAL)
3118                 rtm->rtm_type = RTN_LOCAL;
3119         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3120                 rtm->rtm_type = RTN_LOCAL;
3121         else
3122                 rtm->rtm_type = RTN_UNICAST;
3123         rtm->rtm_flags = 0;
3124         if (!netif_carrier_ok(rt->dst.dev)) {
3125                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3126                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3127                         rtm->rtm_flags |= RTNH_F_DEAD;
3128         }
3129         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3130         rtm->rtm_protocol = rt->rt6i_protocol;
3131         if (rt->rt6i_flags & RTF_DYNAMIC)
3132                 rtm->rtm_protocol = RTPROT_REDIRECT;
3133         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3134                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3135                         rtm->rtm_protocol = RTPROT_RA;
3136                 else
3137                         rtm->rtm_protocol = RTPROT_KERNEL;
3138         }
3139
3140         if (rt->rt6i_flags & RTF_CACHE)
3141                 rtm->rtm_flags |= RTM_F_CLONED;
3142
3143         if (dst) {
3144                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3145                         goto nla_put_failure;
3146                 rtm->rtm_dst_len = 128;
3147         } else if (rtm->rtm_dst_len)
3148                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3149                         goto nla_put_failure;
3150 #ifdef CONFIG_IPV6_SUBTREES
3151         if (src) {
3152                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3153                         goto nla_put_failure;
3154                 rtm->rtm_src_len = 128;
3155         } else if (rtm->rtm_src_len &&
3156                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3157                 goto nla_put_failure;
3158 #endif
3159         if (iif) {
3160 #ifdef CONFIG_IPV6_MROUTE
3161                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3162                         int err = ip6mr_get_route(net, skb, rtm, nowait);
3163                         if (err <= 0) {
3164                                 if (!nowait) {
3165                                         if (err == 0)
3166                                                 return 0;
3167                                         goto nla_put_failure;
3168                                 } else {
3169                                         if (err == -EMSGSIZE)
3170                                                 goto nla_put_failure;
3171                                 }
3172                         }
3173                 } else
3174 #endif
3175                         if (nla_put_u32(skb, RTA_IIF, iif))
3176                                 goto nla_put_failure;
3177         } else if (dst) {
3178                 struct in6_addr saddr_buf;
3179                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3180                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3181                         goto nla_put_failure;
3182         }
3183
3184         if (rt->rt6i_prefsrc.plen) {
3185                 struct in6_addr saddr_buf;
3186                 saddr_buf = rt->rt6i_prefsrc.addr;
3187                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3188                         goto nla_put_failure;
3189         }
3190
3191         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3192         if (rt->rt6i_pmtu)
3193                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3194         if (rtnetlink_put_metrics(skb, metrics) < 0)
3195                 goto nla_put_failure;
3196
3197         if (rt->rt6i_flags & RTF_GATEWAY) {
3198                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3199                         goto nla_put_failure;
3200         }
3201
3202         if (rt->dst.dev &&
3203             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3204                 goto nla_put_failure;
3205         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3206                 goto nla_put_failure;
3207
3208         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3209
3210         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3211                 goto nla_put_failure;
3212
3213         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3214                 goto nla_put_failure;
3215
3216         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3217
3218         nlmsg_end(skb, nlh);
3219         return 0;
3220
3221 nla_put_failure:
3222         nlmsg_cancel(skb, nlh);
3223         return -EMSGSIZE;
3224 }
3225
3226 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3227 {
3228         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3229         int prefix;
3230
3231         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3232                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3233                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3234         } else
3235                 prefix = 0;
3236
3237         return rt6_fill_node(arg->net,
3238                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3239                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3240                      prefix, 0, NLM_F_MULTI);
3241 }
3242
3243 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3244 {
3245         struct net *net = sock_net(in_skb->sk);
3246         struct nlattr *tb[RTA_MAX+1];
3247         struct rt6_info *rt;
3248         struct sk_buff *skb;
3249         struct rtmsg *rtm;
3250         struct flowi6 fl6;
3251         int err, iif = 0, oif = 0;
3252
3253         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3254         if (err < 0)
3255                 goto errout;
3256
3257         err = -EINVAL;
3258         memset(&fl6, 0, sizeof(fl6));
3259
3260         if (tb[RTA_SRC]) {
3261                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3262                         goto errout;
3263
3264                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3265         }
3266
3267         if (tb[RTA_DST]) {
3268                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3269                         goto errout;
3270
3271                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3272         }
3273
3274         if (tb[RTA_IIF])
3275                 iif = nla_get_u32(tb[RTA_IIF]);
3276
3277         if (tb[RTA_OIF])
3278                 oif = nla_get_u32(tb[RTA_OIF]);
3279
3280         if (tb[RTA_MARK])
3281                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3282
3283         if (iif) {
3284                 struct net_device *dev;
3285                 int flags = 0;
3286
3287                 dev = __dev_get_by_index(net, iif);
3288                 if (!dev) {
3289                         err = -ENODEV;
3290                         goto errout;
3291                 }
3292
3293                 fl6.flowi6_iif = iif;
3294
3295                 if (!ipv6_addr_any(&fl6.saddr))
3296                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3297
3298                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3299                                                                flags);
3300         } else {
3301                 fl6.flowi6_oif = oif;
3302
3303                 if (netif_index_is_l3_master(net, oif)) {
3304                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3305                                            FLOWI_FLAG_SKIP_NH_OIF;
3306                 }
3307
3308                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3309         }
3310
3311         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3312         if (!skb) {
3313                 ip6_rt_put(rt);
3314                 err = -ENOBUFS;
3315                 goto errout;
3316         }
3317
3318         /* Reserve room for dummy headers, this skb can pass
3319            through good chunk of routing engine.
3320          */
3321         skb_reset_mac_header(skb);
3322         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3323
3324         skb_dst_set(skb, &rt->dst);
3325
3326         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3327                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3328                             nlh->nlmsg_seq, 0, 0, 0);
3329         if (err < 0) {
3330                 kfree_skb(skb);
3331                 goto errout;
3332         }
3333
3334         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3335 errout:
3336         return err;
3337 }
3338
3339 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3340                      unsigned int nlm_flags)
3341 {
3342         struct sk_buff *skb;
3343         struct net *net = info->nl_net;
3344         u32 seq;
3345         int err;
3346
3347         err = -ENOBUFS;
3348         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3349
3350         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3351         if (!skb)
3352                 goto errout;
3353
3354         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3355                                 event, info->portid, seq, 0, 0, nlm_flags);
3356         if (err < 0) {
3357                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3358                 WARN_ON(err == -EMSGSIZE);
3359                 kfree_skb(skb);
3360                 goto errout;
3361         }
3362         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3363                     info->nlh, gfp_any());
3364         return;
3365 errout:
3366         if (err < 0)
3367                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3368 }
3369
3370 static int ip6_route_dev_notify(struct notifier_block *this,
3371                                 unsigned long event, void *ptr)
3372 {
3373         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3374         struct net *net = dev_net(dev);
3375
3376         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3377                 net->ipv6.ip6_null_entry->dst.dev = dev;
3378                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3379 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3380                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3381                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3382                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3383                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3384 #endif
3385         }
3386
3387         return NOTIFY_OK;
3388 }
3389
3390 /*
3391  *      /proc
3392  */
3393
3394 #ifdef CONFIG_PROC_FS
3395
3396 static const struct file_operations ipv6_route_proc_fops = {
3397         .owner          = THIS_MODULE,
3398         .open           = ipv6_route_open,
3399         .read           = seq_read,
3400         .llseek         = seq_lseek,
3401         .release        = seq_release_net,
3402 };
3403
3404 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3405 {
3406         struct net *net = (struct net *)seq->private;
3407         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3408                    net->ipv6.rt6_stats->fib_nodes,
3409                    net->ipv6.rt6_stats->fib_route_nodes,
3410                    net->ipv6.rt6_stats->fib_rt_alloc,
3411                    net->ipv6.rt6_stats->fib_rt_entries,
3412                    net->ipv6.rt6_stats->fib_rt_cache,
3413                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3414                    net->ipv6.rt6_stats->fib_discarded_routes);
3415
3416         return 0;
3417 }
3418
3419 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3420 {
3421         return single_open_net(inode, file, rt6_stats_seq_show);
3422 }
3423
3424 static const struct file_operations rt6_stats_seq_fops = {
3425         .owner   = THIS_MODULE,
3426         .open    = rt6_stats_seq_open,
3427         .read    = seq_read,
3428         .llseek  = seq_lseek,
3429         .release = single_release_net,
3430 };
3431 #endif  /* CONFIG_PROC_FS */
3432
3433 #ifdef CONFIG_SYSCTL
3434
3435 static
3436 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3437                               void __user *buffer, size_t *lenp, loff_t *ppos)
3438 {
3439         struct net *net;
3440         int delay;
3441         if (!write)
3442                 return -EINVAL;
3443
3444         net = (struct net *)ctl->extra1;
3445         delay = net->ipv6.sysctl.flush_delay;
3446         proc_dointvec(ctl, write, buffer, lenp, ppos);
3447         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3448         return 0;
3449 }
3450
3451 struct ctl_table ipv6_route_table_template[] = {
3452         {
3453                 .procname       =       "flush",
3454                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3455                 .maxlen         =       sizeof(int),
3456                 .mode           =       0200,
3457                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3458         },
3459         {
3460                 .procname       =       "gc_thresh",
3461                 .data           =       &ip6_dst_ops_template.gc_thresh,
3462                 .maxlen         =       sizeof(int),
3463                 .mode           =       0644,
3464                 .proc_handler   =       proc_dointvec,
3465         },
3466         {
3467                 .procname       =       "max_size",
3468                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3469                 .maxlen         =       sizeof(int),
3470                 .mode           =       0644,
3471                 .proc_handler   =       proc_dointvec,
3472         },
3473         {
3474                 .procname       =       "gc_min_interval",
3475                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3476                 .maxlen         =       sizeof(int),
3477                 .mode           =       0644,
3478                 .proc_handler   =       proc_dointvec_jiffies,
3479         },
3480         {
3481                 .procname       =       "gc_timeout",
3482                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3483                 .maxlen         =       sizeof(int),
3484                 .mode           =       0644,
3485                 .proc_handler   =       proc_dointvec_jiffies,
3486         },
3487         {
3488                 .procname       =       "gc_interval",
3489                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3490                 .maxlen         =       sizeof(int),
3491                 .mode           =       0644,
3492                 .proc_handler   =       proc_dointvec_jiffies,
3493         },
3494         {
3495                 .procname       =       "gc_elasticity",
3496                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3497                 .maxlen         =       sizeof(int),
3498                 .mode           =       0644,
3499                 .proc_handler   =       proc_dointvec,
3500         },
3501         {
3502                 .procname       =       "mtu_expires",
3503                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3504                 .maxlen         =       sizeof(int),
3505                 .mode           =       0644,
3506                 .proc_handler   =       proc_dointvec_jiffies,
3507         },
3508         {
3509                 .procname       =       "min_adv_mss",
3510                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3511                 .maxlen         =       sizeof(int),
3512                 .mode           =       0644,
3513                 .proc_handler   =       proc_dointvec,
3514         },
3515         {
3516                 .procname       =       "gc_min_interval_ms",
3517                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3518                 .maxlen         =       sizeof(int),
3519                 .mode           =       0644,
3520                 .proc_handler   =       proc_dointvec_ms_jiffies,
3521         },
3522         { }
3523 };
3524
3525 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3526 {
3527         struct ctl_table *table;
3528
3529         table = kmemdup(ipv6_route_table_template,
3530                         sizeof(ipv6_route_table_template),
3531                         GFP_KERNEL);
3532
3533         if (table) {
3534                 table[0].data = &net->ipv6.sysctl.flush_delay;
3535                 table[0].extra1 = net;
3536                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3537                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3538                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3539                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3540                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3541                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3542                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3543                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3544                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3545
3546                 /* Don't export sysctls to unprivileged users */
3547                 if (net->user_ns != &init_user_ns)
3548                         table[0].procname = NULL;
3549         }
3550
3551         return table;
3552 }
3553 #endif
3554
3555 static int __net_init ip6_route_net_init(struct net *net)
3556 {
3557         int ret = -ENOMEM;
3558
3559         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3560                sizeof(net->ipv6.ip6_dst_ops));
3561
3562         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3563                 goto out_ip6_dst_ops;
3564
3565         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3566                                            sizeof(*net->ipv6.ip6_null_entry),
3567                                            GFP_KERNEL);
3568         if (!net->ipv6.ip6_null_entry)
3569                 goto out_ip6_dst_entries;
3570         net->ipv6.ip6_null_entry->dst.path =
3571                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3572         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3573         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3574                          ip6_template_metrics, true);
3575
3576 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3577         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3578                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3579                                                GFP_KERNEL);
3580         if (!net->ipv6.ip6_prohibit_entry)
3581                 goto out_ip6_null_entry;
3582         net->ipv6.ip6_prohibit_entry->dst.path =
3583                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3584         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3585         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3586                          ip6_template_metrics, true);
3587
3588         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3589                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3590                                                GFP_KERNEL);
3591         if (!net->ipv6.ip6_blk_hole_entry)
3592                 goto out_ip6_prohibit_entry;
3593         net->ipv6.ip6_blk_hole_entry->dst.path =
3594                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3595         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3596         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3597                          ip6_template_metrics, true);
3598 #endif
3599
3600         net->ipv6.sysctl.flush_delay = 0;
3601         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3602         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3603         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3604         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3605         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3606         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3607         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3608
3609         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3610
3611         ret = 0;
3612 out:
3613         return ret;
3614
3615 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3616 out_ip6_prohibit_entry:
3617         kfree(net->ipv6.ip6_prohibit_entry);
3618 out_ip6_null_entry:
3619         kfree(net->ipv6.ip6_null_entry);
3620 #endif
3621 out_ip6_dst_entries:
3622         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3623 out_ip6_dst_ops:
3624         goto out;
3625 }
3626
3627 static void __net_exit ip6_route_net_exit(struct net *net)
3628 {
3629         kfree(net->ipv6.ip6_null_entry);
3630 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3631         kfree(net->ipv6.ip6_prohibit_entry);
3632         kfree(net->ipv6.ip6_blk_hole_entry);
3633 #endif
3634         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3635 }
3636
3637 static int __net_init ip6_route_net_init_late(struct net *net)
3638 {
3639 #ifdef CONFIG_PROC_FS
3640         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3641         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3642 #endif
3643         return 0;
3644 }
3645
3646 static void __net_exit ip6_route_net_exit_late(struct net *net)
3647 {
3648 #ifdef CONFIG_PROC_FS
3649         remove_proc_entry("ipv6_route", net->proc_net);
3650         remove_proc_entry("rt6_stats", net->proc_net);
3651 #endif
3652 }
3653
3654 static struct pernet_operations ip6_route_net_ops = {
3655         .init = ip6_route_net_init,
3656         .exit = ip6_route_net_exit,
3657 };
3658
3659 static int __net_init ipv6_inetpeer_init(struct net *net)
3660 {
3661         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3662
3663         if (!bp)
3664                 return -ENOMEM;
3665         inet_peer_base_init(bp);
3666         net->ipv6.peers = bp;
3667         return 0;
3668 }
3669
3670 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3671 {
3672         struct inet_peer_base *bp = net->ipv6.peers;
3673
3674         net->ipv6.peers = NULL;
3675         inetpeer_invalidate_tree(bp);
3676         kfree(bp);
3677 }
3678
3679 static struct pernet_operations ipv6_inetpeer_ops = {
3680         .init   =       ipv6_inetpeer_init,
3681         .exit   =       ipv6_inetpeer_exit,
3682 };
3683
3684 static struct pernet_operations ip6_route_net_late_ops = {
3685         .init = ip6_route_net_init_late,
3686         .exit = ip6_route_net_exit_late,
3687 };
3688
3689 static struct notifier_block ip6_route_dev_notifier = {
3690         .notifier_call = ip6_route_dev_notify,
3691         .priority = 0,
3692 };
3693
3694 int __init ip6_route_init(void)
3695 {
3696         int ret;
3697         int cpu;
3698
3699         ret = -ENOMEM;
3700         ip6_dst_ops_template.kmem_cachep =
3701                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3702                                   SLAB_HWCACHE_ALIGN, NULL);
3703         if (!ip6_dst_ops_template.kmem_cachep)
3704                 goto out;
3705
3706         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3707         if (ret)
3708                 goto out_kmem_cache;
3709
3710         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3711         if (ret)
3712                 goto out_dst_entries;
3713
3714         ret = register_pernet_subsys(&ip6_route_net_ops);
3715         if (ret)
3716                 goto out_register_inetpeer;
3717
3718         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3719
3720         /* Registering of the loopback is done before this portion of code,
3721          * the loopback reference in rt6_info will not be taken, do it
3722          * manually for init_net */
3723         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3724         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3725   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3726         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3727         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3728         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3729         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3730   #endif
3731         ret = fib6_init();
3732         if (ret)
3733                 goto out_register_subsys;
3734
3735         ret = xfrm6_init();
3736         if (ret)
3737                 goto out_fib6_init;
3738
3739         ret = fib6_rules_init();
3740         if (ret)
3741                 goto xfrm6_init;
3742
3743         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3744         if (ret)
3745                 goto fib6_rules_init;
3746
3747         ret = -ENOBUFS;
3748         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3749             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3750             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3751                 goto out_register_late_subsys;
3752
3753         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3754         if (ret)
3755                 goto out_register_late_subsys;
3756
3757         for_each_possible_cpu(cpu) {
3758                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3759
3760                 INIT_LIST_HEAD(&ul->head);
3761                 spin_lock_init(&ul->lock);
3762         }
3763
3764 out:
3765         return ret;
3766
3767 out_register_late_subsys:
3768         unregister_pernet_subsys(&ip6_route_net_late_ops);
3769 fib6_rules_init:
3770         fib6_rules_cleanup();
3771 xfrm6_init:
3772         xfrm6_fini();
3773 out_fib6_init:
3774         fib6_gc_cleanup();
3775 out_register_subsys:
3776         unregister_pernet_subsys(&ip6_route_net_ops);
3777 out_register_inetpeer:
3778         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3779 out_dst_entries:
3780         dst_entries_destroy(&ip6_dst_blackhole_ops);
3781 out_kmem_cache:
3782         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3783         goto out;
3784 }
3785
3786 void ip6_route_cleanup(void)
3787 {
3788         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3789         unregister_pernet_subsys(&ip6_route_net_late_ops);
3790         fib6_rules_cleanup();
3791         xfrm6_fini();
3792         fib6_gc_cleanup();
3793         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3794         unregister_pernet_subsys(&ip6_route_net_ops);
3795         dst_entries_destroy(&ip6_dst_blackhole_ops);
3796         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797 }