]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex,
102                                            unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
109 {
110         struct rt6_info *rt = (struct rt6_info *)dst;
111
112         if (rt->rt6i_flags & RTF_CACHE)
113                 return NULL;
114         else
115                 return dst_cow_metrics_generic(dst, old);
116 }
117
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
119                                              struct sk_buff *skb,
120                                              const void *daddr)
121 {
122         struct in6_addr *p = &rt->rt6i_gateway;
123
124         if (!ipv6_addr_any(p))
125                 return (const void *) p;
126         else if (skb)
127                 return &ipv6_hdr(skb)->daddr;
128         return daddr;
129 }
130
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
132                                           struct sk_buff *skb,
133                                           const void *daddr)
134 {
135         struct rt6_info *rt = (struct rt6_info *) dst;
136         struct neighbour *n;
137
138         daddr = choose_neigh_daddr(rt, skb, daddr);
139         n = __ipv6_neigh_lookup(dst->dev, daddr);
140         if (n)
141                 return n;
142         return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static struct dst_ops ip6_dst_ops_template = {
146         .family                 =       AF_INET6,
147         .gc                     =       ip6_dst_gc,
148         .gc_thresh              =       1024,
149         .check                  =       ip6_dst_check,
150         .default_advmss         =       ip6_default_advmss,
151         .mtu                    =       ip6_mtu,
152         .cow_metrics            =       ipv6_cow_metrics,
153         .destroy                =       ip6_dst_destroy,
154         .ifdown                 =       ip6_dst_ifdown,
155         .negative_advice        =       ip6_negative_advice,
156         .link_failure           =       ip6_link_failure,
157         .update_pmtu            =       ip6_rt_update_pmtu,
158         .redirect               =       rt6_do_redirect,
159         .local_out              =       __ip6_local_out,
160         .neigh_lookup           =       ip6_neigh_lookup,
161 };
162
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167         return mtu ? : dst->dev->mtu;
168 }
169
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171                                          struct sk_buff *skb, u32 mtu)
172 {
173 }
174
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
176                                       struct sk_buff *skb)
177 {
178 }
179
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
181                                          unsigned long old)
182 {
183         return NULL;
184 }
185
186 static struct dst_ops ip6_dst_blackhole_ops = {
187         .family                 =       AF_INET6,
188         .destroy                =       ip6_dst_destroy,
189         .check                  =       ip6_dst_check,
190         .mtu                    =       ip6_blackhole_mtu,
191         .default_advmss         =       ip6_default_advmss,
192         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
193         .redirect               =       ip6_rt_blackhole_redirect,
194         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
195         .neigh_lookup           =       ip6_neigh_lookup,
196 };
197
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199         [RTAX_HOPLIMIT - 1] = 0,
200 };
201
202 static const struct rt6_info ip6_null_entry_template = {
203         .dst = {
204                 .__refcnt       = ATOMIC_INIT(1),
205                 .__use          = 1,
206                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
207                 .error          = -ENETUNREACH,
208                 .input          = ip6_pkt_discard,
209                 .output         = ip6_pkt_discard_out,
210         },
211         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
212         .rt6i_protocol  = RTPROT_KERNEL,
213         .rt6i_metric    = ~(u32) 0,
214         .rt6i_ref       = ATOMIC_INIT(1),
215 };
216
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
218
219 static const struct rt6_info ip6_prohibit_entry_template = {
220         .dst = {
221                 .__refcnt       = ATOMIC_INIT(1),
222                 .__use          = 1,
223                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
224                 .error          = -EACCES,
225                 .input          = ip6_pkt_prohibit,
226                 .output         = ip6_pkt_prohibit_out,
227         },
228         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
229         .rt6i_protocol  = RTPROT_KERNEL,
230         .rt6i_metric    = ~(u32) 0,
231         .rt6i_ref       = ATOMIC_INIT(1),
232 };
233
234 static const struct rt6_info ip6_blk_hole_entry_template = {
235         .dst = {
236                 .__refcnt       = ATOMIC_INIT(1),
237                 .__use          = 1,
238                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
239                 .error          = -EINVAL,
240                 .input          = dst_discard,
241                 .output         = dst_discard_sk,
242         },
243         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
244         .rt6i_protocol  = RTPROT_KERNEL,
245         .rt6i_metric    = ~(u32) 0,
246         .rt6i_ref       = ATOMIC_INIT(1),
247 };
248
249 #endif
250
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253                                              struct net_device *dev,
254                                              int flags,
255                                              struct fib6_table *table)
256 {
257         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258                                         0, DST_OBSOLETE_FORCE_CHK, flags);
259
260         if (rt) {
261                 struct dst_entry *dst = &rt->dst;
262
263                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264                 INIT_LIST_HEAD(&rt->rt6i_siblings);
265         }
266         return rt;
267 }
268
269 static void ip6_dst_destroy(struct dst_entry *dst)
270 {
271         struct rt6_info *rt = (struct rt6_info *)dst;
272         struct inet6_dev *idev = rt->rt6i_idev;
273         struct dst_entry *from = dst->from;
274
275         dst_destroy_metrics_generic(dst);
276
277         if (idev) {
278                 rt->rt6i_idev = NULL;
279                 in6_dev_put(idev);
280         }
281
282         dst->from = NULL;
283         dst_release(from);
284 }
285
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287                            int how)
288 {
289         struct rt6_info *rt = (struct rt6_info *)dst;
290         struct inet6_dev *idev = rt->rt6i_idev;
291         struct net_device *loopback_dev =
292                 dev_net(dev)->loopback_dev;
293
294         if (dev != loopback_dev) {
295                 if (idev && idev->dev == dev) {
296                         struct inet6_dev *loopback_idev =
297                                 in6_dev_get(loopback_dev);
298                         if (loopback_idev) {
299                                 rt->rt6i_idev = loopback_idev;
300                                 in6_dev_put(idev);
301                         }
302                 }
303         }
304 }
305
306 static bool rt6_check_expired(const struct rt6_info *rt)
307 {
308         if (rt->rt6i_flags & RTF_EXPIRES) {
309                 if (time_after(jiffies, rt->dst.expires))
310                         return true;
311         } else if (rt->dst.from) {
312                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
313         }
314         return false;
315 }
316
317 /* Multipath route selection:
318  *   Hash based function using packet header and flowlabel.
319  * Adapted from fib_info_hashfn()
320  */
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322                                const struct flowi6 *fl6)
323 {
324         unsigned int val = fl6->flowi6_proto;
325
326         val ^= ipv6_addr_hash(&fl6->daddr);
327         val ^= ipv6_addr_hash(&fl6->saddr);
328
329         /* Work only if this not encapsulated */
330         switch (fl6->flowi6_proto) {
331         case IPPROTO_UDP:
332         case IPPROTO_TCP:
333         case IPPROTO_SCTP:
334                 val ^= (__force u16)fl6->fl6_sport;
335                 val ^= (__force u16)fl6->fl6_dport;
336                 break;
337
338         case IPPROTO_ICMPV6:
339                 val ^= (__force u16)fl6->fl6_icmp_type;
340                 val ^= (__force u16)fl6->fl6_icmp_code;
341                 break;
342         }
343         /* RFC6438 recommands to use flowlabel */
344         val ^= (__force u32)fl6->flowlabel;
345
346         /* Perhaps, we need to tune, this function? */
347         val = val ^ (val >> 7) ^ (val >> 12);
348         return val % candidate_count;
349 }
350
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352                                              struct flowi6 *fl6, int oif,
353                                              int strict)
354 {
355         struct rt6_info *sibling, *next_sibling;
356         int route_choosen;
357
358         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359         /* Don't change the route, if route_choosen == 0
360          * (siblings does not include ourself)
361          */
362         if (route_choosen)
363                 list_for_each_entry_safe(sibling, next_sibling,
364                                 &match->rt6i_siblings, rt6i_siblings) {
365                         route_choosen--;
366                         if (route_choosen == 0) {
367                                 if (rt6_score_route(sibling, oif, strict) < 0)
368                                         break;
369                                 match = sibling;
370                                 break;
371                         }
372                 }
373         return match;
374 }
375
376 /*
377  *      Route lookup. Any table->tb6_lock is implied.
378  */
379
380 static inline struct rt6_info *rt6_device_match(struct net *net,
381                                                     struct rt6_info *rt,
382                                                     const struct in6_addr *saddr,
383                                                     int oif,
384                                                     int flags)
385 {
386         struct rt6_info *local = NULL;
387         struct rt6_info *sprt;
388
389         if (!oif && ipv6_addr_any(saddr))
390                 goto out;
391
392         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393                 struct net_device *dev = sprt->dst.dev;
394
395                 if (oif) {
396                         if (dev->ifindex == oif)
397                                 return sprt;
398                         if (dev->flags & IFF_LOOPBACK) {
399                                 if (!sprt->rt6i_idev ||
400                                     sprt->rt6i_idev->dev->ifindex != oif) {
401                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
402                                                 continue;
403                                         if (local && (!oif ||
404                                                       local->rt6i_idev->dev->ifindex == oif))
405                                                 continue;
406                                 }
407                                 local = sprt;
408                         }
409                 } else {
410                         if (ipv6_chk_addr(net, saddr, dev,
411                                           flags & RT6_LOOKUP_F_IFACE))
412                                 return sprt;
413                 }
414         }
415
416         if (oif) {
417                 if (local)
418                         return local;
419
420                 if (flags & RT6_LOOKUP_F_IFACE)
421                         return net->ipv6.ip6_null_entry;
422         }
423 out:
424         return rt;
425 }
426
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429         struct work_struct work;
430         struct in6_addr target;
431         struct net_device *dev;
432 };
433
434 static void rt6_probe_deferred(struct work_struct *w)
435 {
436         struct in6_addr mcaddr;
437         struct __rt6_probe_work *work =
438                 container_of(w, struct __rt6_probe_work, work);
439
440         addrconf_addr_solict_mult(&work->target, &mcaddr);
441         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
442         dev_put(work->dev);
443         kfree(work);
444 }
445
446 static void rt6_probe(struct rt6_info *rt)
447 {
448         struct neighbour *neigh;
449         /*
450          * Okay, this does not seem to be appropriate
451          * for now, however, we need to check if it
452          * is really so; aka Router Reachability Probing.
453          *
454          * Router Reachability Probe MUST be rate-limited
455          * to no more than one per minute.
456          */
457         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
458                 return;
459         rcu_read_lock_bh();
460         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
461         if (neigh) {
462                 write_lock(&neigh->lock);
463                 if (neigh->nud_state & NUD_VALID)
464                         goto out;
465         }
466
467         if (!neigh ||
468             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469                 struct __rt6_probe_work *work;
470
471                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
472
473                 if (neigh && work)
474                         __neigh_set_probe_once(neigh);
475
476                 if (neigh)
477                         write_unlock(&neigh->lock);
478
479                 if (work) {
480                         INIT_WORK(&work->work, rt6_probe_deferred);
481                         work->target = rt->rt6i_gateway;
482                         dev_hold(rt->dst.dev);
483                         work->dev = rt->dst.dev;
484                         schedule_work(&work->work);
485                 }
486         } else {
487 out:
488                 write_unlock(&neigh->lock);
489         }
490         rcu_read_unlock_bh();
491 }
492 #else
493 static inline void rt6_probe(struct rt6_info *rt)
494 {
495 }
496 #endif
497
498 /*
499  * Default Router Selection (RFC 2461 6.3.6)
500  */
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
502 {
503         struct net_device *dev = rt->dst.dev;
504         if (!oif || dev->ifindex == oif)
505                 return 2;
506         if ((dev->flags & IFF_LOOPBACK) &&
507             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
508                 return 1;
509         return 0;
510 }
511
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
513 {
514         struct neighbour *neigh;
515         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
516
517         if (rt->rt6i_flags & RTF_NONEXTHOP ||
518             !(rt->rt6i_flags & RTF_GATEWAY))
519                 return RT6_NUD_SUCCEED;
520
521         rcu_read_lock_bh();
522         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
523         if (neigh) {
524                 read_lock(&neigh->lock);
525                 if (neigh->nud_state & NUD_VALID)
526                         ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528                 else if (!(neigh->nud_state & NUD_FAILED))
529                         ret = RT6_NUD_SUCCEED;
530                 else
531                         ret = RT6_NUD_FAIL_PROBE;
532 #endif
533                 read_unlock(&neigh->lock);
534         } else {
535                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
537         }
538         rcu_read_unlock_bh();
539
540         return ret;
541 }
542
543 static int rt6_score_route(struct rt6_info *rt, int oif,
544                            int strict)
545 {
546         int m;
547
548         m = rt6_check_dev(rt, oif);
549         if (!m && (strict & RT6_LOOKUP_F_IFACE))
550                 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
553 #endif
554         if (strict & RT6_LOOKUP_F_REACHABLE) {
555                 int n = rt6_check_neigh(rt);
556                 if (n < 0)
557                         return n;
558         }
559         return m;
560 }
561
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563                                    int *mpri, struct rt6_info *match,
564                                    bool *do_rr)
565 {
566         int m;
567         bool match_do_rr = false;
568
569         if (rt6_check_expired(rt))
570                 goto out;
571
572         m = rt6_score_route(rt, oif, strict);
573         if (m == RT6_NUD_FAIL_DO_RR) {
574                 match_do_rr = true;
575                 m = 0; /* lowest valid score */
576         } else if (m == RT6_NUD_FAIL_HARD) {
577                 goto out;
578         }
579
580         if (strict & RT6_LOOKUP_F_REACHABLE)
581                 rt6_probe(rt);
582
583         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
584         if (m > *mpri) {
585                 *do_rr = match_do_rr;
586                 *mpri = m;
587                 match = rt;
588         }
589 out:
590         return match;
591 }
592
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594                                      struct rt6_info *rr_head,
595                                      u32 metric, int oif, int strict,
596                                      bool *do_rr)
597 {
598         struct rt6_info *rt, *match, *cont;
599         int mpri = -1;
600
601         match = NULL;
602         cont = NULL;
603         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604                 if (rt->rt6i_metric != metric) {
605                         cont = rt;
606                         break;
607                 }
608
609                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
610         }
611
612         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613                 if (rt->rt6i_metric != metric) {
614                         cont = rt;
615                         break;
616                 }
617
618                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
619         }
620
621         if (match || !cont)
622                 return match;
623
624         for (rt = cont; rt; rt = rt->dst.rt6_next)
625                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
626
627         return match;
628 }
629
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
631 {
632         struct rt6_info *match, *rt0;
633         struct net *net;
634         bool do_rr = false;
635
636         rt0 = fn->rr_ptr;
637         if (!rt0)
638                 fn->rr_ptr = rt0 = fn->leaf;
639
640         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
641                              &do_rr);
642
643         if (do_rr) {
644                 struct rt6_info *next = rt0->dst.rt6_next;
645
646                 /* no entries matched; do round-robin */
647                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
648                         next = fn->leaf;
649
650                 if (next != rt0)
651                         fn->rr_ptr = next;
652         }
653
654         net = dev_net(rt0->dst.dev);
655         return match ? match : net->ipv6.ip6_null_entry;
656 }
657
658 #ifdef CONFIG_IPV6_ROUTE_INFO
659 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
660                   const struct in6_addr *gwaddr)
661 {
662         struct net *net = dev_net(dev);
663         struct route_info *rinfo = (struct route_info *) opt;
664         struct in6_addr prefix_buf, *prefix;
665         unsigned int pref;
666         unsigned long lifetime;
667         struct rt6_info *rt;
668
669         if (len < sizeof(struct route_info)) {
670                 return -EINVAL;
671         }
672
673         /* Sanity check for prefix_len and length */
674         if (rinfo->length > 3) {
675                 return -EINVAL;
676         } else if (rinfo->prefix_len > 128) {
677                 return -EINVAL;
678         } else if (rinfo->prefix_len > 64) {
679                 if (rinfo->length < 2) {
680                         return -EINVAL;
681                 }
682         } else if (rinfo->prefix_len > 0) {
683                 if (rinfo->length < 1) {
684                         return -EINVAL;
685                 }
686         }
687
688         pref = rinfo->route_pref;
689         if (pref == ICMPV6_ROUTER_PREF_INVALID)
690                 return -EINVAL;
691
692         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
693
694         if (rinfo->length == 3)
695                 prefix = (struct in6_addr *)rinfo->prefix;
696         else {
697                 /* this function is safe */
698                 ipv6_addr_prefix(&prefix_buf,
699                                  (struct in6_addr *)rinfo->prefix,
700                                  rinfo->prefix_len);
701                 prefix = &prefix_buf;
702         }
703
704         if (rinfo->prefix_len == 0)
705                 rt = rt6_get_dflt_router(gwaddr, dev);
706         else
707                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
708                                         gwaddr, dev->ifindex);
709
710         if (rt && !lifetime) {
711                 ip6_del_rt(rt);
712                 rt = NULL;
713         }
714
715         if (!rt && lifetime)
716                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
717                                         pref);
718         else if (rt)
719                 rt->rt6i_flags = RTF_ROUTEINFO |
720                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
721
722         if (rt) {
723                 if (!addrconf_finite_timeout(lifetime))
724                         rt6_clean_expires(rt);
725                 else
726                         rt6_set_expires(rt, jiffies + HZ * lifetime);
727
728                 ip6_rt_put(rt);
729         }
730         return 0;
731 }
732 #endif
733
734 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
735                                         struct in6_addr *saddr)
736 {
737         struct fib6_node *pn;
738         while (1) {
739                 if (fn->fn_flags & RTN_TL_ROOT)
740                         return NULL;
741                 pn = fn->parent;
742                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
743                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
744                 else
745                         fn = pn;
746                 if (fn->fn_flags & RTN_RTINFO)
747                         return fn;
748         }
749 }
750
751 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
752                                              struct fib6_table *table,
753                                              struct flowi6 *fl6, int flags)
754 {
755         struct fib6_node *fn;
756         struct rt6_info *rt;
757
758         read_lock_bh(&table->tb6_lock);
759         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
760 restart:
761         rt = fn->leaf;
762         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
763         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
764                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
765         if (rt == net->ipv6.ip6_null_entry) {
766                 fn = fib6_backtrack(fn, &fl6->saddr);
767                 if (fn)
768                         goto restart;
769         }
770         dst_use(&rt->dst, jiffies);
771         read_unlock_bh(&table->tb6_lock);
772         return rt;
773
774 }
775
776 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
777                                     int flags)
778 {
779         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
780 }
781 EXPORT_SYMBOL_GPL(ip6_route_lookup);
782
783 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
784                             const struct in6_addr *saddr, int oif, int strict)
785 {
786         struct flowi6 fl6 = {
787                 .flowi6_oif = oif,
788                 .daddr = *daddr,
789         };
790         struct dst_entry *dst;
791         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
792
793         if (saddr) {
794                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
795                 flags |= RT6_LOOKUP_F_HAS_SADDR;
796         }
797
798         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
799         if (dst->error == 0)
800                 return (struct rt6_info *) dst;
801
802         dst_release(dst);
803
804         return NULL;
805 }
806 EXPORT_SYMBOL(rt6_lookup);
807
808 /* ip6_ins_rt is called with FREE table->tb6_lock.
809    It takes new route entry, the addition fails by any reason the
810    route is freed. In any case, if caller does not hold it, it may
811    be destroyed.
812  */
813
814 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
815                         struct mx6_config *mxc)
816 {
817         int err;
818         struct fib6_table *table;
819
820         table = rt->rt6i_table;
821         write_lock_bh(&table->tb6_lock);
822         err = fib6_add(&table->tb6_root, rt, info, mxc);
823         write_unlock_bh(&table->tb6_lock);
824
825         return err;
826 }
827
828 int ip6_ins_rt(struct rt6_info *rt)
829 {
830         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
831         struct mx6_config mxc = { .mx = NULL, };
832
833         return __ip6_ins_rt(rt, &info, &mxc);
834 }
835
836 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
837                                       const struct in6_addr *daddr,
838                                       const struct in6_addr *saddr)
839 {
840         struct rt6_info *rt;
841
842         /*
843          *      Clone the route.
844          */
845
846         rt = ip6_rt_copy(ort, daddr);
847
848         if (rt) {
849                 if (ort->rt6i_dst.plen != 128 &&
850                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
851                         rt->rt6i_flags |= RTF_ANYCAST;
852
853                 rt->rt6i_flags |= RTF_CACHE;
854
855 #ifdef CONFIG_IPV6_SUBTREES
856                 if (rt->rt6i_src.plen && saddr) {
857                         rt->rt6i_src.addr = *saddr;
858                         rt->rt6i_src.plen = 128;
859                 }
860 #endif
861         }
862
863         return rt;
864 }
865
866 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
867                                         const struct in6_addr *daddr)
868 {
869         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
870
871         if (rt)
872                 rt->rt6i_flags |= RTF_CACHE;
873         return rt;
874 }
875
876 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
877                                       struct flowi6 *fl6, int flags)
878 {
879         struct fib6_node *fn, *saved_fn;
880         struct rt6_info *rt, *nrt;
881         int strict = 0;
882         int attempts = 3;
883         int err;
884
885         strict |= flags & RT6_LOOKUP_F_IFACE;
886         if (net->ipv6.devconf_all->forwarding == 0)
887                 strict |= RT6_LOOKUP_F_REACHABLE;
888
889 redo_fib6_lookup_lock:
890         read_lock_bh(&table->tb6_lock);
891
892         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
893         saved_fn = fn;
894
895 redo_rt6_select:
896         rt = rt6_select(fn, oif, strict);
897         if (rt->rt6i_nsiblings)
898                 rt = rt6_multipath_select(rt, fl6, oif, strict);
899         if (rt == net->ipv6.ip6_null_entry) {
900                 fn = fib6_backtrack(fn, &fl6->saddr);
901                 if (fn)
902                         goto redo_rt6_select;
903                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
904                         /* also consider unreachable route */
905                         strict &= ~RT6_LOOKUP_F_REACHABLE;
906                         fn = saved_fn;
907                         goto redo_rt6_select;
908                 } else {
909                         dst_hold(&rt->dst);
910                         read_unlock_bh(&table->tb6_lock);
911                         goto out2;
912                 }
913         }
914
915         dst_hold(&rt->dst);
916         read_unlock_bh(&table->tb6_lock);
917
918         if (rt->rt6i_flags & RTF_CACHE)
919                 goto out2;
920
921         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
922                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
923         else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
924                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
925         else
926                 goto out2;
927
928         ip6_rt_put(rt);
929         rt = nrt ? : net->ipv6.ip6_null_entry;
930
931         dst_hold(&rt->dst);
932         if (nrt) {
933                 err = ip6_ins_rt(nrt);
934                 if (!err)
935                         goto out2;
936         }
937
938         if (--attempts <= 0)
939                 goto out2;
940
941         /*
942          * Race condition! In the gap, when table->tb6_lock was
943          * released someone could insert this route.  Relookup.
944          */
945         ip6_rt_put(rt);
946         goto redo_fib6_lookup_lock;
947
948 out2:
949         rt6_dst_from_metrics_check(rt);
950         rt->dst.lastuse = jiffies;
951         rt->dst.__use++;
952
953         return rt;
954 }
955
956 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
957                                             struct flowi6 *fl6, int flags)
958 {
959         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
960 }
961
962 static struct dst_entry *ip6_route_input_lookup(struct net *net,
963                                                 struct net_device *dev,
964                                                 struct flowi6 *fl6, int flags)
965 {
966         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
967                 flags |= RT6_LOOKUP_F_IFACE;
968
969         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
970 }
971
972 void ip6_route_input(struct sk_buff *skb)
973 {
974         const struct ipv6hdr *iph = ipv6_hdr(skb);
975         struct net *net = dev_net(skb->dev);
976         int flags = RT6_LOOKUP_F_HAS_SADDR;
977         struct flowi6 fl6 = {
978                 .flowi6_iif = skb->dev->ifindex,
979                 .daddr = iph->daddr,
980                 .saddr = iph->saddr,
981                 .flowlabel = ip6_flowinfo(iph),
982                 .flowi6_mark = skb->mark,
983                 .flowi6_proto = iph->nexthdr,
984         };
985
986         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
987 }
988
989 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
990                                              struct flowi6 *fl6, int flags)
991 {
992         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
993 }
994
995 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
996                                     struct flowi6 *fl6)
997 {
998         int flags = 0;
999
1000         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1001
1002         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1003                 flags |= RT6_LOOKUP_F_IFACE;
1004
1005         if (!ipv6_addr_any(&fl6->saddr))
1006                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1007         else if (sk)
1008                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1009
1010         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1011 }
1012 EXPORT_SYMBOL(ip6_route_output);
1013
1014 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1015 {
1016         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1017         struct dst_entry *new = NULL;
1018
1019         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1020         if (rt) {
1021                 new = &rt->dst;
1022
1023                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1024
1025                 new->__use = 1;
1026                 new->input = dst_discard;
1027                 new->output = dst_discard_sk;
1028
1029                 if (dst_metrics_read_only(&ort->dst))
1030                         new->_metrics = ort->dst._metrics;
1031                 else
1032                         dst_copy_metrics(new, &ort->dst);
1033                 rt->rt6i_idev = ort->rt6i_idev;
1034                 if (rt->rt6i_idev)
1035                         in6_dev_hold(rt->rt6i_idev);
1036
1037                 rt->rt6i_gateway = ort->rt6i_gateway;
1038                 rt->rt6i_flags = ort->rt6i_flags;
1039                 rt->rt6i_metric = 0;
1040
1041                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1042 #ifdef CONFIG_IPV6_SUBTREES
1043                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1044 #endif
1045
1046                 dst_free(new);
1047         }
1048
1049         dst_release(dst_orig);
1050         return new ? new : ERR_PTR(-ENOMEM);
1051 }
1052
1053 /*
1054  *      Destination cache support functions
1055  */
1056
1057 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1058 {
1059         if (rt->dst.from &&
1060             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1061                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1062 }
1063
1064 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1065 {
1066         struct rt6_info *rt;
1067
1068         rt = (struct rt6_info *) dst;
1069
1070         /* All IPV6 dsts are created with ->obsolete set to the value
1071          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1072          * into this function always.
1073          */
1074         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1075                 return NULL;
1076
1077         if (rt6_check_expired(rt))
1078                 return NULL;
1079
1080         rt6_dst_from_metrics_check(rt);
1081
1082         return dst;
1083 }
1084
1085 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1086 {
1087         struct rt6_info *rt = (struct rt6_info *) dst;
1088
1089         if (rt) {
1090                 if (rt->rt6i_flags & RTF_CACHE) {
1091                         if (rt6_check_expired(rt)) {
1092                                 ip6_del_rt(rt);
1093                                 dst = NULL;
1094                         }
1095                 } else {
1096                         dst_release(dst);
1097                         dst = NULL;
1098                 }
1099         }
1100         return dst;
1101 }
1102
1103 static void ip6_link_failure(struct sk_buff *skb)
1104 {
1105         struct rt6_info *rt;
1106
1107         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1108
1109         rt = (struct rt6_info *) skb_dst(skb);
1110         if (rt) {
1111                 if (rt->rt6i_flags & RTF_CACHE) {
1112                         dst_hold(&rt->dst);
1113                         if (ip6_del_rt(rt))
1114                                 dst_free(&rt->dst);
1115                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1116                         rt->rt6i_node->fn_sernum = -1;
1117                 }
1118         }
1119 }
1120
1121 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1122                                struct sk_buff *skb, u32 mtu)
1123 {
1124         struct rt6_info *rt6 = (struct rt6_info *)dst;
1125
1126         dst_confirm(dst);
1127         if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1128                 struct net *net = dev_net(dst->dev);
1129
1130                 rt6->rt6i_flags |= RTF_MODIFIED;
1131                 if (mtu < IPV6_MIN_MTU)
1132                         mtu = IPV6_MIN_MTU;
1133
1134                 rt6->rt6i_pmtu = mtu;
1135                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1136         }
1137 }
1138
1139 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1140                      int oif, u32 mark)
1141 {
1142         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1143         struct dst_entry *dst;
1144         struct flowi6 fl6;
1145
1146         memset(&fl6, 0, sizeof(fl6));
1147         fl6.flowi6_oif = oif;
1148         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1149         fl6.daddr = iph->daddr;
1150         fl6.saddr = iph->saddr;
1151         fl6.flowlabel = ip6_flowinfo(iph);
1152
1153         dst = ip6_route_output(net, NULL, &fl6);
1154         if (!dst->error)
1155                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1156         dst_release(dst);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1159
1160 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1161 {
1162         ip6_update_pmtu(skb, sock_net(sk), mtu,
1163                         sk->sk_bound_dev_if, sk->sk_mark);
1164 }
1165 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1166
1167 /* Handle redirects */
1168 struct ip6rd_flowi {
1169         struct flowi6 fl6;
1170         struct in6_addr gateway;
1171 };
1172
1173 static struct rt6_info *__ip6_route_redirect(struct net *net,
1174                                              struct fib6_table *table,
1175                                              struct flowi6 *fl6,
1176                                              int flags)
1177 {
1178         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1179         struct rt6_info *rt;
1180         struct fib6_node *fn;
1181
1182         /* Get the "current" route for this destination and
1183          * check if the redirect has come from approriate router.
1184          *
1185          * RFC 4861 specifies that redirects should only be
1186          * accepted if they come from the nexthop to the target.
1187          * Due to the way the routes are chosen, this notion
1188          * is a bit fuzzy and one might need to check all possible
1189          * routes.
1190          */
1191
1192         read_lock_bh(&table->tb6_lock);
1193         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1194 restart:
1195         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1196                 if (rt6_check_expired(rt))
1197                         continue;
1198                 if (rt->dst.error)
1199                         break;
1200                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1201                         continue;
1202                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1203                         continue;
1204                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1205                         continue;
1206                 break;
1207         }
1208
1209         if (!rt)
1210                 rt = net->ipv6.ip6_null_entry;
1211         else if (rt->dst.error) {
1212                 rt = net->ipv6.ip6_null_entry;
1213                 goto out;
1214         }
1215
1216         if (rt == net->ipv6.ip6_null_entry) {
1217                 fn = fib6_backtrack(fn, &fl6->saddr);
1218                 if (fn)
1219                         goto restart;
1220         }
1221
1222 out:
1223         dst_hold(&rt->dst);
1224
1225         read_unlock_bh(&table->tb6_lock);
1226
1227         return rt;
1228 };
1229
1230 static struct dst_entry *ip6_route_redirect(struct net *net,
1231                                         const struct flowi6 *fl6,
1232                                         const struct in6_addr *gateway)
1233 {
1234         int flags = RT6_LOOKUP_F_HAS_SADDR;
1235         struct ip6rd_flowi rdfl;
1236
1237         rdfl.fl6 = *fl6;
1238         rdfl.gateway = *gateway;
1239
1240         return fib6_rule_lookup(net, &rdfl.fl6,
1241                                 flags, __ip6_route_redirect);
1242 }
1243
1244 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1245 {
1246         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1247         struct dst_entry *dst;
1248         struct flowi6 fl6;
1249
1250         memset(&fl6, 0, sizeof(fl6));
1251         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1252         fl6.flowi6_oif = oif;
1253         fl6.flowi6_mark = mark;
1254         fl6.daddr = iph->daddr;
1255         fl6.saddr = iph->saddr;
1256         fl6.flowlabel = ip6_flowinfo(iph);
1257
1258         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1259         rt6_do_redirect(dst, NULL, skb);
1260         dst_release(dst);
1261 }
1262 EXPORT_SYMBOL_GPL(ip6_redirect);
1263
1264 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1265                             u32 mark)
1266 {
1267         const struct ipv6hdr *iph = ipv6_hdr(skb);
1268         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1269         struct dst_entry *dst;
1270         struct flowi6 fl6;
1271
1272         memset(&fl6, 0, sizeof(fl6));
1273         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1274         fl6.flowi6_oif = oif;
1275         fl6.flowi6_mark = mark;
1276         fl6.daddr = msg->dest;
1277         fl6.saddr = iph->daddr;
1278
1279         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1280         rt6_do_redirect(dst, NULL, skb);
1281         dst_release(dst);
1282 }
1283
1284 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1285 {
1286         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1287 }
1288 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1289
1290 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1291 {
1292         struct net_device *dev = dst->dev;
1293         unsigned int mtu = dst_mtu(dst);
1294         struct net *net = dev_net(dev);
1295
1296         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1297
1298         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1299                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1300
1301         /*
1302          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1303          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1304          * IPV6_MAXPLEN is also valid and means: "any MSS,
1305          * rely only on pmtu discovery"
1306          */
1307         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1308                 mtu = IPV6_MAXPLEN;
1309         return mtu;
1310 }
1311
1312 static unsigned int ip6_mtu(const struct dst_entry *dst)
1313 {
1314         const struct rt6_info *rt = (const struct rt6_info *)dst;
1315         unsigned int mtu = rt->rt6i_pmtu;
1316         struct inet6_dev *idev;
1317
1318         if (mtu)
1319                 goto out;
1320
1321         mtu = dst_metric_raw(dst, RTAX_MTU);
1322         if (mtu)
1323                 goto out;
1324
1325         mtu = IPV6_MIN_MTU;
1326
1327         rcu_read_lock();
1328         idev = __in6_dev_get(dst->dev);
1329         if (idev)
1330                 mtu = idev->cnf.mtu6;
1331         rcu_read_unlock();
1332
1333 out:
1334         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1335 }
1336
1337 static struct dst_entry *icmp6_dst_gc_list;
1338 static DEFINE_SPINLOCK(icmp6_dst_lock);
1339
1340 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1341                                   struct flowi6 *fl6)
1342 {
1343         struct dst_entry *dst;
1344         struct rt6_info *rt;
1345         struct inet6_dev *idev = in6_dev_get(dev);
1346         struct net *net = dev_net(dev);
1347
1348         if (unlikely(!idev))
1349                 return ERR_PTR(-ENODEV);
1350
1351         rt = ip6_dst_alloc(net, dev, 0, NULL);
1352         if (unlikely(!rt)) {
1353                 in6_dev_put(idev);
1354                 dst = ERR_PTR(-ENOMEM);
1355                 goto out;
1356         }
1357
1358         rt->dst.flags |= DST_HOST;
1359         rt->dst.output  = ip6_output;
1360         atomic_set(&rt->dst.__refcnt, 1);
1361         rt->rt6i_gateway  = fl6->daddr;
1362         rt->rt6i_dst.addr = fl6->daddr;
1363         rt->rt6i_dst.plen = 128;
1364         rt->rt6i_idev     = idev;
1365         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1366
1367         spin_lock_bh(&icmp6_dst_lock);
1368         rt->dst.next = icmp6_dst_gc_list;
1369         icmp6_dst_gc_list = &rt->dst;
1370         spin_unlock_bh(&icmp6_dst_lock);
1371
1372         fib6_force_start_gc(net);
1373
1374         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1375
1376 out:
1377         return dst;
1378 }
1379
1380 int icmp6_dst_gc(void)
1381 {
1382         struct dst_entry *dst, **pprev;
1383         int more = 0;
1384
1385         spin_lock_bh(&icmp6_dst_lock);
1386         pprev = &icmp6_dst_gc_list;
1387
1388         while ((dst = *pprev) != NULL) {
1389                 if (!atomic_read(&dst->__refcnt)) {
1390                         *pprev = dst->next;
1391                         dst_free(dst);
1392                 } else {
1393                         pprev = &dst->next;
1394                         ++more;
1395                 }
1396         }
1397
1398         spin_unlock_bh(&icmp6_dst_lock);
1399
1400         return more;
1401 }
1402
1403 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1404                             void *arg)
1405 {
1406         struct dst_entry *dst, **pprev;
1407
1408         spin_lock_bh(&icmp6_dst_lock);
1409         pprev = &icmp6_dst_gc_list;
1410         while ((dst = *pprev) != NULL) {
1411                 struct rt6_info *rt = (struct rt6_info *) dst;
1412                 if (func(rt, arg)) {
1413                         *pprev = dst->next;
1414                         dst_free(dst);
1415                 } else {
1416                         pprev = &dst->next;
1417                 }
1418         }
1419         spin_unlock_bh(&icmp6_dst_lock);
1420 }
1421
1422 static int ip6_dst_gc(struct dst_ops *ops)
1423 {
1424         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1425         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1426         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1427         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1428         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1429         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1430         int entries;
1431
1432         entries = dst_entries_get_fast(ops);
1433         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1434             entries <= rt_max_size)
1435                 goto out;
1436
1437         net->ipv6.ip6_rt_gc_expire++;
1438         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1439         entries = dst_entries_get_slow(ops);
1440         if (entries < ops->gc_thresh)
1441                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1442 out:
1443         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1444         return entries > rt_max_size;
1445 }
1446
1447 static int ip6_convert_metrics(struct mx6_config *mxc,
1448                                const struct fib6_config *cfg)
1449 {
1450         struct nlattr *nla;
1451         int remaining;
1452         u32 *mp;
1453
1454         if (!cfg->fc_mx)
1455                 return 0;
1456
1457         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1458         if (unlikely(!mp))
1459                 return -ENOMEM;
1460
1461         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1462                 int type = nla_type(nla);
1463
1464                 if (type) {
1465                         u32 val;
1466
1467                         if (unlikely(type > RTAX_MAX))
1468                                 goto err;
1469                         if (type == RTAX_CC_ALGO) {
1470                                 char tmp[TCP_CA_NAME_MAX];
1471
1472                                 nla_strlcpy(tmp, nla, sizeof(tmp));
1473                                 val = tcp_ca_get_key_by_name(tmp);
1474                                 if (val == TCP_CA_UNSPEC)
1475                                         goto err;
1476                         } else {
1477                                 val = nla_get_u32(nla);
1478                         }
1479
1480                         mp[type - 1] = val;
1481                         __set_bit(type - 1, mxc->mx_valid);
1482                 }
1483         }
1484
1485         mxc->mx = mp;
1486
1487         return 0;
1488  err:
1489         kfree(mp);
1490         return -EINVAL;
1491 }
1492
1493 int ip6_route_add(struct fib6_config *cfg)
1494 {
1495         int err;
1496         struct net *net = cfg->fc_nlinfo.nl_net;
1497         struct rt6_info *rt = NULL;
1498         struct net_device *dev = NULL;
1499         struct inet6_dev *idev = NULL;
1500         struct fib6_table *table;
1501         struct mx6_config mxc = { .mx = NULL, };
1502         int addr_type;
1503
1504         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1505                 return -EINVAL;
1506 #ifndef CONFIG_IPV6_SUBTREES
1507         if (cfg->fc_src_len)
1508                 return -EINVAL;
1509 #endif
1510         if (cfg->fc_ifindex) {
1511                 err = -ENODEV;
1512                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1513                 if (!dev)
1514                         goto out;
1515                 idev = in6_dev_get(dev);
1516                 if (!idev)
1517                         goto out;
1518         }
1519
1520         if (cfg->fc_metric == 0)
1521                 cfg->fc_metric = IP6_RT_PRIO_USER;
1522
1523         err = -ENOBUFS;
1524         if (cfg->fc_nlinfo.nlh &&
1525             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1526                 table = fib6_get_table(net, cfg->fc_table);
1527                 if (!table) {
1528                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1529                         table = fib6_new_table(net, cfg->fc_table);
1530                 }
1531         } else {
1532                 table = fib6_new_table(net, cfg->fc_table);
1533         }
1534
1535         if (!table)
1536                 goto out;
1537
1538         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1539
1540         if (!rt) {
1541                 err = -ENOMEM;
1542                 goto out;
1543         }
1544
1545         if (cfg->fc_flags & RTF_EXPIRES)
1546                 rt6_set_expires(rt, jiffies +
1547                                 clock_t_to_jiffies(cfg->fc_expires));
1548         else
1549                 rt6_clean_expires(rt);
1550
1551         if (cfg->fc_protocol == RTPROT_UNSPEC)
1552                 cfg->fc_protocol = RTPROT_BOOT;
1553         rt->rt6i_protocol = cfg->fc_protocol;
1554
1555         addr_type = ipv6_addr_type(&cfg->fc_dst);
1556
1557         if (addr_type & IPV6_ADDR_MULTICAST)
1558                 rt->dst.input = ip6_mc_input;
1559         else if (cfg->fc_flags & RTF_LOCAL)
1560                 rt->dst.input = ip6_input;
1561         else
1562                 rt->dst.input = ip6_forward;
1563
1564         rt->dst.output = ip6_output;
1565
1566         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1567         rt->rt6i_dst.plen = cfg->fc_dst_len;
1568         if (rt->rt6i_dst.plen == 128)
1569                 rt->dst.flags |= DST_HOST;
1570
1571 #ifdef CONFIG_IPV6_SUBTREES
1572         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1573         rt->rt6i_src.plen = cfg->fc_src_len;
1574 #endif
1575
1576         rt->rt6i_metric = cfg->fc_metric;
1577
1578         /* We cannot add true routes via loopback here,
1579            they would result in kernel looping; promote them to reject routes
1580          */
1581         if ((cfg->fc_flags & RTF_REJECT) ||
1582             (dev && (dev->flags & IFF_LOOPBACK) &&
1583              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1584              !(cfg->fc_flags & RTF_LOCAL))) {
1585                 /* hold loopback dev/idev if we haven't done so. */
1586                 if (dev != net->loopback_dev) {
1587                         if (dev) {
1588                                 dev_put(dev);
1589                                 in6_dev_put(idev);
1590                         }
1591                         dev = net->loopback_dev;
1592                         dev_hold(dev);
1593                         idev = in6_dev_get(dev);
1594                         if (!idev) {
1595                                 err = -ENODEV;
1596                                 goto out;
1597                         }
1598                 }
1599                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1600                 switch (cfg->fc_type) {
1601                 case RTN_BLACKHOLE:
1602                         rt->dst.error = -EINVAL;
1603                         rt->dst.output = dst_discard_sk;
1604                         rt->dst.input = dst_discard;
1605                         break;
1606                 case RTN_PROHIBIT:
1607                         rt->dst.error = -EACCES;
1608                         rt->dst.output = ip6_pkt_prohibit_out;
1609                         rt->dst.input = ip6_pkt_prohibit;
1610                         break;
1611                 case RTN_THROW:
1612                 default:
1613                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1614                                         : -ENETUNREACH;
1615                         rt->dst.output = ip6_pkt_discard_out;
1616                         rt->dst.input = ip6_pkt_discard;
1617                         break;
1618                 }
1619                 goto install_route;
1620         }
1621
1622         if (cfg->fc_flags & RTF_GATEWAY) {
1623                 const struct in6_addr *gw_addr;
1624                 int gwa_type;
1625
1626                 gw_addr = &cfg->fc_gateway;
1627                 rt->rt6i_gateway = *gw_addr;
1628                 gwa_type = ipv6_addr_type(gw_addr);
1629
1630                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1631                         struct rt6_info *grt;
1632
1633                         /* IPv6 strictly inhibits using not link-local
1634                            addresses as nexthop address.
1635                            Otherwise, router will not able to send redirects.
1636                            It is very good, but in some (rare!) circumstances
1637                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1638                            some exceptions. --ANK
1639                          */
1640                         err = -EINVAL;
1641                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1642                                 goto out;
1643
1644                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1645
1646                         err = -EHOSTUNREACH;
1647                         if (!grt)
1648                                 goto out;
1649                         if (dev) {
1650                                 if (dev != grt->dst.dev) {
1651                                         ip6_rt_put(grt);
1652                                         goto out;
1653                                 }
1654                         } else {
1655                                 dev = grt->dst.dev;
1656                                 idev = grt->rt6i_idev;
1657                                 dev_hold(dev);
1658                                 in6_dev_hold(grt->rt6i_idev);
1659                         }
1660                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1661                                 err = 0;
1662                         ip6_rt_put(grt);
1663
1664                         if (err)
1665                                 goto out;
1666                 }
1667                 err = -EINVAL;
1668                 if (!dev || (dev->flags & IFF_LOOPBACK))
1669                         goto out;
1670         }
1671
1672         err = -ENODEV;
1673         if (!dev)
1674                 goto out;
1675
1676         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1677                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1678                         err = -EINVAL;
1679                         goto out;
1680                 }
1681                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1682                 rt->rt6i_prefsrc.plen = 128;
1683         } else
1684                 rt->rt6i_prefsrc.plen = 0;
1685
1686         rt->rt6i_flags = cfg->fc_flags;
1687
1688 install_route:
1689         rt->dst.dev = dev;
1690         rt->rt6i_idev = idev;
1691         rt->rt6i_table = table;
1692
1693         cfg->fc_nlinfo.nl_net = dev_net(dev);
1694
1695         err = ip6_convert_metrics(&mxc, cfg);
1696         if (err)
1697                 goto out;
1698
1699         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1700
1701         kfree(mxc.mx);
1702         return err;
1703 out:
1704         if (dev)
1705                 dev_put(dev);
1706         if (idev)
1707                 in6_dev_put(idev);
1708         if (rt)
1709                 dst_free(&rt->dst);
1710         return err;
1711 }
1712
1713 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1714 {
1715         int err;
1716         struct fib6_table *table;
1717         struct net *net = dev_net(rt->dst.dev);
1718
1719         if (rt == net->ipv6.ip6_null_entry) {
1720                 err = -ENOENT;
1721                 goto out;
1722         }
1723
1724         table = rt->rt6i_table;
1725         write_lock_bh(&table->tb6_lock);
1726         err = fib6_del(rt, info);
1727         write_unlock_bh(&table->tb6_lock);
1728
1729 out:
1730         ip6_rt_put(rt);
1731         return err;
1732 }
1733
1734 int ip6_del_rt(struct rt6_info *rt)
1735 {
1736         struct nl_info info = {
1737                 .nl_net = dev_net(rt->dst.dev),
1738         };
1739         return __ip6_del_rt(rt, &info);
1740 }
1741
1742 static int ip6_route_del(struct fib6_config *cfg)
1743 {
1744         struct fib6_table *table;
1745         struct fib6_node *fn;
1746         struct rt6_info *rt;
1747         int err = -ESRCH;
1748
1749         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1750         if (!table)
1751                 return err;
1752
1753         read_lock_bh(&table->tb6_lock);
1754
1755         fn = fib6_locate(&table->tb6_root,
1756                          &cfg->fc_dst, cfg->fc_dst_len,
1757                          &cfg->fc_src, cfg->fc_src_len);
1758
1759         if (fn) {
1760                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1761                         if ((rt->rt6i_flags & RTF_CACHE) &&
1762                             !(cfg->fc_flags & RTF_CACHE))
1763                                 continue;
1764                         if (cfg->fc_ifindex &&
1765                             (!rt->dst.dev ||
1766                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1767                                 continue;
1768                         if (cfg->fc_flags & RTF_GATEWAY &&
1769                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1770                                 continue;
1771                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1772                                 continue;
1773                         dst_hold(&rt->dst);
1774                         read_unlock_bh(&table->tb6_lock);
1775
1776                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1777                 }
1778         }
1779         read_unlock_bh(&table->tb6_lock);
1780
1781         return err;
1782 }
1783
1784 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1785 {
1786         struct net *net = dev_net(skb->dev);
1787         struct netevent_redirect netevent;
1788         struct rt6_info *rt, *nrt = NULL;
1789         struct ndisc_options ndopts;
1790         struct inet6_dev *in6_dev;
1791         struct neighbour *neigh;
1792         struct rd_msg *msg;
1793         int optlen, on_link;
1794         u8 *lladdr;
1795
1796         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1797         optlen -= sizeof(*msg);
1798
1799         if (optlen < 0) {
1800                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1801                 return;
1802         }
1803
1804         msg = (struct rd_msg *)icmp6_hdr(skb);
1805
1806         if (ipv6_addr_is_multicast(&msg->dest)) {
1807                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1808                 return;
1809         }
1810
1811         on_link = 0;
1812         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1813                 on_link = 1;
1814         } else if (ipv6_addr_type(&msg->target) !=
1815                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1816                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1817                 return;
1818         }
1819
1820         in6_dev = __in6_dev_get(skb->dev);
1821         if (!in6_dev)
1822                 return;
1823         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1824                 return;
1825
1826         /* RFC2461 8.1:
1827          *      The IP source address of the Redirect MUST be the same as the current
1828          *      first-hop router for the specified ICMP Destination Address.
1829          */
1830
1831         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1832                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1833                 return;
1834         }
1835
1836         lladdr = NULL;
1837         if (ndopts.nd_opts_tgt_lladdr) {
1838                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1839                                              skb->dev);
1840                 if (!lladdr) {
1841                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1842                         return;
1843                 }
1844         }
1845
1846         rt = (struct rt6_info *) dst;
1847         if (rt == net->ipv6.ip6_null_entry) {
1848                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1849                 return;
1850         }
1851
1852         /* Redirect received -> path was valid.
1853          * Look, redirects are sent only in response to data packets,
1854          * so that this nexthop apparently is reachable. --ANK
1855          */
1856         dst_confirm(&rt->dst);
1857
1858         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1859         if (!neigh)
1860                 return;
1861
1862         /*
1863          *      We have finally decided to accept it.
1864          */
1865
1866         neigh_update(neigh, lladdr, NUD_STALE,
1867                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1868                      NEIGH_UPDATE_F_OVERRIDE|
1869                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1870                                      NEIGH_UPDATE_F_ISROUTER))
1871                      );
1872
1873         nrt = ip6_rt_copy(rt, &msg->dest);
1874         if (!nrt)
1875                 goto out;
1876
1877         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1878         if (on_link)
1879                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1880
1881         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1882
1883         if (ip6_ins_rt(nrt))
1884                 goto out;
1885
1886         netevent.old = &rt->dst;
1887         netevent.new = &nrt->dst;
1888         netevent.daddr = &msg->dest;
1889         netevent.neigh = neigh;
1890         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1891
1892         if (rt->rt6i_flags & RTF_CACHE) {
1893                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1894                 ip6_del_rt(rt);
1895         }
1896
1897 out:
1898         neigh_release(neigh);
1899 }
1900
1901 /*
1902  *      Misc support functions
1903  */
1904
1905 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1906 {
1907         BUG_ON(from->dst.from);
1908
1909         rt->rt6i_flags &= ~RTF_EXPIRES;
1910         dst_hold(&from->dst);
1911         rt->dst.from = &from->dst;
1912         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1913 }
1914
1915 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1916                                     const struct in6_addr *dest)
1917 {
1918         struct net *net = dev_net(ort->dst.dev);
1919         struct rt6_info *rt;
1920
1921         if (ort->rt6i_flags & RTF_CACHE)
1922                 ort = (struct rt6_info *)ort->dst.from;
1923
1924         rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1925                            ort->rt6i_table);
1926
1927         if (rt) {
1928                 rt->dst.input = ort->dst.input;
1929                 rt->dst.output = ort->dst.output;
1930                 rt->dst.flags |= DST_HOST;
1931
1932                 rt->rt6i_dst.addr = *dest;
1933                 rt->rt6i_dst.plen = 128;
1934                 rt->dst.error = ort->dst.error;
1935                 rt->rt6i_idev = ort->rt6i_idev;
1936                 if (rt->rt6i_idev)
1937                         in6_dev_hold(rt->rt6i_idev);
1938                 rt->dst.lastuse = jiffies;
1939
1940                 if (ort->rt6i_flags & RTF_GATEWAY)
1941                         rt->rt6i_gateway = ort->rt6i_gateway;
1942                 else
1943                         rt->rt6i_gateway = *dest;
1944                 rt->rt6i_flags = ort->rt6i_flags;
1945                 rt6_set_from(rt, ort);
1946                 rt->rt6i_metric = 0;
1947
1948 #ifdef CONFIG_IPV6_SUBTREES
1949                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1950 #endif
1951                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1952                 rt->rt6i_table = ort->rt6i_table;
1953         }
1954         return rt;
1955 }
1956
1957 #ifdef CONFIG_IPV6_ROUTE_INFO
1958 static struct rt6_info *rt6_get_route_info(struct net *net,
1959                                            const struct in6_addr *prefix, int prefixlen,
1960                                            const struct in6_addr *gwaddr, int ifindex)
1961 {
1962         struct fib6_node *fn;
1963         struct rt6_info *rt = NULL;
1964         struct fib6_table *table;
1965
1966         table = fib6_get_table(net, RT6_TABLE_INFO);
1967         if (!table)
1968                 return NULL;
1969
1970         read_lock_bh(&table->tb6_lock);
1971         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1972         if (!fn)
1973                 goto out;
1974
1975         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1976                 if (rt->dst.dev->ifindex != ifindex)
1977                         continue;
1978                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1979                         continue;
1980                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1981                         continue;
1982                 dst_hold(&rt->dst);
1983                 break;
1984         }
1985 out:
1986         read_unlock_bh(&table->tb6_lock);
1987         return rt;
1988 }
1989
1990 static struct rt6_info *rt6_add_route_info(struct net *net,
1991                                            const struct in6_addr *prefix, int prefixlen,
1992                                            const struct in6_addr *gwaddr, int ifindex,
1993                                            unsigned int pref)
1994 {
1995         struct fib6_config cfg = {
1996                 .fc_table       = RT6_TABLE_INFO,
1997                 .fc_metric      = IP6_RT_PRIO_USER,
1998                 .fc_ifindex     = ifindex,
1999                 .fc_dst_len     = prefixlen,
2000                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2001                                   RTF_UP | RTF_PREF(pref),
2002                 .fc_nlinfo.portid = 0,
2003                 .fc_nlinfo.nlh = NULL,
2004                 .fc_nlinfo.nl_net = net,
2005         };
2006
2007         cfg.fc_dst = *prefix;
2008         cfg.fc_gateway = *gwaddr;
2009
2010         /* We should treat it as a default route if prefix length is 0. */
2011         if (!prefixlen)
2012                 cfg.fc_flags |= RTF_DEFAULT;
2013
2014         ip6_route_add(&cfg);
2015
2016         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2017 }
2018 #endif
2019
2020 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2021 {
2022         struct rt6_info *rt;
2023         struct fib6_table *table;
2024
2025         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2026         if (!table)
2027                 return NULL;
2028
2029         read_lock_bh(&table->tb6_lock);
2030         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2031                 if (dev == rt->dst.dev &&
2032                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2033                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2034                         break;
2035         }
2036         if (rt)
2037                 dst_hold(&rt->dst);
2038         read_unlock_bh(&table->tb6_lock);
2039         return rt;
2040 }
2041
2042 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2043                                      struct net_device *dev,
2044                                      unsigned int pref)
2045 {
2046         struct fib6_config cfg = {
2047                 .fc_table       = RT6_TABLE_DFLT,
2048                 .fc_metric      = IP6_RT_PRIO_USER,
2049                 .fc_ifindex     = dev->ifindex,
2050                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2051                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2052                 .fc_nlinfo.portid = 0,
2053                 .fc_nlinfo.nlh = NULL,
2054                 .fc_nlinfo.nl_net = dev_net(dev),
2055         };
2056
2057         cfg.fc_gateway = *gwaddr;
2058
2059         ip6_route_add(&cfg);
2060
2061         return rt6_get_dflt_router(gwaddr, dev);
2062 }
2063
2064 void rt6_purge_dflt_routers(struct net *net)
2065 {
2066         struct rt6_info *rt;
2067         struct fib6_table *table;
2068
2069         /* NOTE: Keep consistent with rt6_get_dflt_router */
2070         table = fib6_get_table(net, RT6_TABLE_DFLT);
2071         if (!table)
2072                 return;
2073
2074 restart:
2075         read_lock_bh(&table->tb6_lock);
2076         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2077                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2078                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2079                         dst_hold(&rt->dst);
2080                         read_unlock_bh(&table->tb6_lock);
2081                         ip6_del_rt(rt);
2082                         goto restart;
2083                 }
2084         }
2085         read_unlock_bh(&table->tb6_lock);
2086 }
2087
2088 static void rtmsg_to_fib6_config(struct net *net,
2089                                  struct in6_rtmsg *rtmsg,
2090                                  struct fib6_config *cfg)
2091 {
2092         memset(cfg, 0, sizeof(*cfg));
2093
2094         cfg->fc_table = RT6_TABLE_MAIN;
2095         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2096         cfg->fc_metric = rtmsg->rtmsg_metric;
2097         cfg->fc_expires = rtmsg->rtmsg_info;
2098         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2099         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2100         cfg->fc_flags = rtmsg->rtmsg_flags;
2101
2102         cfg->fc_nlinfo.nl_net = net;
2103
2104         cfg->fc_dst = rtmsg->rtmsg_dst;
2105         cfg->fc_src = rtmsg->rtmsg_src;
2106         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2107 }
2108
2109 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2110 {
2111         struct fib6_config cfg;
2112         struct in6_rtmsg rtmsg;
2113         int err;
2114
2115         switch (cmd) {
2116         case SIOCADDRT:         /* Add a route */
2117         case SIOCDELRT:         /* Delete a route */
2118                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2119                         return -EPERM;
2120                 err = copy_from_user(&rtmsg, arg,
2121                                      sizeof(struct in6_rtmsg));
2122                 if (err)
2123                         return -EFAULT;
2124
2125                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2126
2127                 rtnl_lock();
2128                 switch (cmd) {
2129                 case SIOCADDRT:
2130                         err = ip6_route_add(&cfg);
2131                         break;
2132                 case SIOCDELRT:
2133                         err = ip6_route_del(&cfg);
2134                         break;
2135                 default:
2136                         err = -EINVAL;
2137                 }
2138                 rtnl_unlock();
2139
2140                 return err;
2141         }
2142
2143         return -EINVAL;
2144 }
2145
2146 /*
2147  *      Drop the packet on the floor
2148  */
2149
2150 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2151 {
2152         int type;
2153         struct dst_entry *dst = skb_dst(skb);
2154         switch (ipstats_mib_noroutes) {
2155         case IPSTATS_MIB_INNOROUTES:
2156                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2157                 if (type == IPV6_ADDR_ANY) {
2158                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2159                                       IPSTATS_MIB_INADDRERRORS);
2160                         break;
2161                 }
2162                 /* FALLTHROUGH */
2163         case IPSTATS_MIB_OUTNOROUTES:
2164                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2165                               ipstats_mib_noroutes);
2166                 break;
2167         }
2168         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2169         kfree_skb(skb);
2170         return 0;
2171 }
2172
2173 static int ip6_pkt_discard(struct sk_buff *skb)
2174 {
2175         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2176 }
2177
2178 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2179 {
2180         skb->dev = skb_dst(skb)->dev;
2181         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2182 }
2183
2184 static int ip6_pkt_prohibit(struct sk_buff *skb)
2185 {
2186         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2187 }
2188
2189 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2190 {
2191         skb->dev = skb_dst(skb)->dev;
2192         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2193 }
2194
2195 /*
2196  *      Allocate a dst for local (unicast / anycast) address.
2197  */
2198
2199 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2200                                     const struct in6_addr *addr,
2201                                     bool anycast)
2202 {
2203         struct net *net = dev_net(idev->dev);
2204         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2205                                             DST_NOCOUNT, NULL);
2206         if (!rt)
2207                 return ERR_PTR(-ENOMEM);
2208
2209         in6_dev_hold(idev);
2210
2211         rt->dst.flags |= DST_HOST;
2212         rt->dst.input = ip6_input;
2213         rt->dst.output = ip6_output;
2214         rt->rt6i_idev = idev;
2215
2216         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2217         if (anycast)
2218                 rt->rt6i_flags |= RTF_ANYCAST;
2219         else
2220                 rt->rt6i_flags |= RTF_LOCAL;
2221
2222         rt->rt6i_gateway  = *addr;
2223         rt->rt6i_dst.addr = *addr;
2224         rt->rt6i_dst.plen = 128;
2225         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2226
2227         atomic_set(&rt->dst.__refcnt, 1);
2228
2229         return rt;
2230 }
2231
2232 int ip6_route_get_saddr(struct net *net,
2233                         struct rt6_info *rt,
2234                         const struct in6_addr *daddr,
2235                         unsigned int prefs,
2236                         struct in6_addr *saddr)
2237 {
2238         struct inet6_dev *idev =
2239                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2240         int err = 0;
2241         if (rt && rt->rt6i_prefsrc.plen)
2242                 *saddr = rt->rt6i_prefsrc.addr;
2243         else
2244                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2245                                          daddr, prefs, saddr);
2246         return err;
2247 }
2248
2249 /* remove deleted ip from prefsrc entries */
2250 struct arg_dev_net_ip {
2251         struct net_device *dev;
2252         struct net *net;
2253         struct in6_addr *addr;
2254 };
2255
2256 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2257 {
2258         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2259         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2260         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2261
2262         if (((void *)rt->dst.dev == dev || !dev) &&
2263             rt != net->ipv6.ip6_null_entry &&
2264             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2265                 /* remove prefsrc entry */
2266                 rt->rt6i_prefsrc.plen = 0;
2267         }
2268         return 0;
2269 }
2270
2271 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2272 {
2273         struct net *net = dev_net(ifp->idev->dev);
2274         struct arg_dev_net_ip adni = {
2275                 .dev = ifp->idev->dev,
2276                 .net = net,
2277                 .addr = &ifp->addr,
2278         };
2279         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2280 }
2281
2282 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2283 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2284
2285 /* Remove routers and update dst entries when gateway turn into host. */
2286 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2287 {
2288         struct in6_addr *gateway = (struct in6_addr *)arg;
2289
2290         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2291              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2292              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2293                 return -1;
2294         }
2295         return 0;
2296 }
2297
2298 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2299 {
2300         fib6_clean_all(net, fib6_clean_tohost, gateway);
2301 }
2302
2303 struct arg_dev_net {
2304         struct net_device *dev;
2305         struct net *net;
2306 };
2307
2308 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2309 {
2310         const struct arg_dev_net *adn = arg;
2311         const struct net_device *dev = adn->dev;
2312
2313         if ((rt->dst.dev == dev || !dev) &&
2314             rt != adn->net->ipv6.ip6_null_entry)
2315                 return -1;
2316
2317         return 0;
2318 }
2319
2320 void rt6_ifdown(struct net *net, struct net_device *dev)
2321 {
2322         struct arg_dev_net adn = {
2323                 .dev = dev,
2324                 .net = net,
2325         };
2326
2327         fib6_clean_all(net, fib6_ifdown, &adn);
2328         icmp6_clean_all(fib6_ifdown, &adn);
2329 }
2330
2331 struct rt6_mtu_change_arg {
2332         struct net_device *dev;
2333         unsigned int mtu;
2334 };
2335
2336 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2337 {
2338         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2339         struct inet6_dev *idev;
2340
2341         /* In IPv6 pmtu discovery is not optional,
2342            so that RTAX_MTU lock cannot disable it.
2343            We still use this lock to block changes
2344            caused by addrconf/ndisc.
2345         */
2346
2347         idev = __in6_dev_get(arg->dev);
2348         if (!idev)
2349                 return 0;
2350
2351         /* For administrative MTU increase, there is no way to discover
2352            IPv6 PMTU increase, so PMTU increase should be updated here.
2353            Since RFC 1981 doesn't include administrative MTU increase
2354            update PMTU increase is a MUST. (i.e. jumbo frame)
2355          */
2356         /*
2357            If new MTU is less than route PMTU, this new MTU will be the
2358            lowest MTU in the path, update the route PMTU to reflect PMTU
2359            decreases; if new MTU is greater than route PMTU, and the
2360            old MTU is the lowest MTU in the path, update the route PMTU
2361            to reflect the increase. In this case if the other nodes' MTU
2362            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2363            PMTU discouvery.
2364          */
2365         if (rt->dst.dev == arg->dev &&
2366             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2367                 if (rt->rt6i_flags & RTF_CACHE) {
2368                         /* For RTF_CACHE with rt6i_pmtu == 0
2369                          * (i.e. a redirected route),
2370                          * the metrics of its rt->dst.from has already
2371                          * been updated.
2372                          */
2373                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2374                                 rt->rt6i_pmtu = arg->mtu;
2375                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2376                            (dst_mtu(&rt->dst) < arg->mtu &&
2377                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2378                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2379                 }
2380         }
2381         return 0;
2382 }
2383
2384 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2385 {
2386         struct rt6_mtu_change_arg arg = {
2387                 .dev = dev,
2388                 .mtu = mtu,
2389         };
2390
2391         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2392 }
2393
2394 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2395         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2396         [RTA_OIF]               = { .type = NLA_U32 },
2397         [RTA_IIF]               = { .type = NLA_U32 },
2398         [RTA_PRIORITY]          = { .type = NLA_U32 },
2399         [RTA_METRICS]           = { .type = NLA_NESTED },
2400         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2401         [RTA_PREF]              = { .type = NLA_U8 },
2402 };
2403
2404 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2405                               struct fib6_config *cfg)
2406 {
2407         struct rtmsg *rtm;
2408         struct nlattr *tb[RTA_MAX+1];
2409         unsigned int pref;
2410         int err;
2411
2412         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2413         if (err < 0)
2414                 goto errout;
2415
2416         err = -EINVAL;
2417         rtm = nlmsg_data(nlh);
2418         memset(cfg, 0, sizeof(*cfg));
2419
2420         cfg->fc_table = rtm->rtm_table;
2421         cfg->fc_dst_len = rtm->rtm_dst_len;
2422         cfg->fc_src_len = rtm->rtm_src_len;
2423         cfg->fc_flags = RTF_UP;
2424         cfg->fc_protocol = rtm->rtm_protocol;
2425         cfg->fc_type = rtm->rtm_type;
2426
2427         if (rtm->rtm_type == RTN_UNREACHABLE ||
2428             rtm->rtm_type == RTN_BLACKHOLE ||
2429             rtm->rtm_type == RTN_PROHIBIT ||
2430             rtm->rtm_type == RTN_THROW)
2431                 cfg->fc_flags |= RTF_REJECT;
2432
2433         if (rtm->rtm_type == RTN_LOCAL)
2434                 cfg->fc_flags |= RTF_LOCAL;
2435
2436         if (rtm->rtm_flags & RTM_F_CLONED)
2437                 cfg->fc_flags |= RTF_CACHE;
2438
2439         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2440         cfg->fc_nlinfo.nlh = nlh;
2441         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2442
2443         if (tb[RTA_GATEWAY]) {
2444                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2445                 cfg->fc_flags |= RTF_GATEWAY;
2446         }
2447
2448         if (tb[RTA_DST]) {
2449                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2450
2451                 if (nla_len(tb[RTA_DST]) < plen)
2452                         goto errout;
2453
2454                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2455         }
2456
2457         if (tb[RTA_SRC]) {
2458                 int plen = (rtm->rtm_src_len + 7) >> 3;
2459
2460                 if (nla_len(tb[RTA_SRC]) < plen)
2461                         goto errout;
2462
2463                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2464         }
2465
2466         if (tb[RTA_PREFSRC])
2467                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2468
2469         if (tb[RTA_OIF])
2470                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2471
2472         if (tb[RTA_PRIORITY])
2473                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2474
2475         if (tb[RTA_METRICS]) {
2476                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2477                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2478         }
2479
2480         if (tb[RTA_TABLE])
2481                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2482
2483         if (tb[RTA_MULTIPATH]) {
2484                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2485                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2486         }
2487
2488         if (tb[RTA_PREF]) {
2489                 pref = nla_get_u8(tb[RTA_PREF]);
2490                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2491                     pref != ICMPV6_ROUTER_PREF_HIGH)
2492                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2493                 cfg->fc_flags |= RTF_PREF(pref);
2494         }
2495
2496         err = 0;
2497 errout:
2498         return err;
2499 }
2500
2501 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2502 {
2503         struct fib6_config r_cfg;
2504         struct rtnexthop *rtnh;
2505         int remaining;
2506         int attrlen;
2507         int err = 0, last_err = 0;
2508
2509 beginning:
2510         rtnh = (struct rtnexthop *)cfg->fc_mp;
2511         remaining = cfg->fc_mp_len;
2512
2513         /* Parse a Multipath Entry */
2514         while (rtnh_ok(rtnh, remaining)) {
2515                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2516                 if (rtnh->rtnh_ifindex)
2517                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2518
2519                 attrlen = rtnh_attrlen(rtnh);
2520                 if (attrlen > 0) {
2521                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2522
2523                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2524                         if (nla) {
2525                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2526                                 r_cfg.fc_flags |= RTF_GATEWAY;
2527                         }
2528                 }
2529                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2530                 if (err) {
2531                         last_err = err;
2532                         /* If we are trying to remove a route, do not stop the
2533                          * loop when ip6_route_del() fails (because next hop is
2534                          * already gone), we should try to remove all next hops.
2535                          */
2536                         if (add) {
2537                                 /* If add fails, we should try to delete all
2538                                  * next hops that have been already added.
2539                                  */
2540                                 add = 0;
2541                                 goto beginning;
2542                         }
2543                 }
2544                 /* Because each route is added like a single route we remove
2545                  * this flag after the first nexthop (if there is a collision,
2546                  * we have already fail to add the first nexthop:
2547                  * fib6_add_rt2node() has reject it).
2548                  */
2549                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2550                 rtnh = rtnh_next(rtnh, &remaining);
2551         }
2552
2553         return last_err;
2554 }
2555
2556 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2557 {
2558         struct fib6_config cfg;
2559         int err;
2560
2561         err = rtm_to_fib6_config(skb, nlh, &cfg);
2562         if (err < 0)
2563                 return err;
2564
2565         if (cfg.fc_mp)
2566                 return ip6_route_multipath(&cfg, 0);
2567         else
2568                 return ip6_route_del(&cfg);
2569 }
2570
2571 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2572 {
2573         struct fib6_config cfg;
2574         int err;
2575
2576         err = rtm_to_fib6_config(skb, nlh, &cfg);
2577         if (err < 0)
2578                 return err;
2579
2580         if (cfg.fc_mp)
2581                 return ip6_route_multipath(&cfg, 1);
2582         else
2583                 return ip6_route_add(&cfg);
2584 }
2585
2586 static inline size_t rt6_nlmsg_size(void)
2587 {
2588         return NLMSG_ALIGN(sizeof(struct rtmsg))
2589                + nla_total_size(16) /* RTA_SRC */
2590                + nla_total_size(16) /* RTA_DST */
2591                + nla_total_size(16) /* RTA_GATEWAY */
2592                + nla_total_size(16) /* RTA_PREFSRC */
2593                + nla_total_size(4) /* RTA_TABLE */
2594                + nla_total_size(4) /* RTA_IIF */
2595                + nla_total_size(4) /* RTA_OIF */
2596                + nla_total_size(4) /* RTA_PRIORITY */
2597                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2598                + nla_total_size(sizeof(struct rta_cacheinfo))
2599                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2600                + nla_total_size(1); /* RTA_PREF */
2601 }
2602
2603 static int rt6_fill_node(struct net *net,
2604                          struct sk_buff *skb, struct rt6_info *rt,
2605                          struct in6_addr *dst, struct in6_addr *src,
2606                          int iif, int type, u32 portid, u32 seq,
2607                          int prefix, int nowait, unsigned int flags)
2608 {
2609         u32 metrics[RTAX_MAX];
2610         struct rtmsg *rtm;
2611         struct nlmsghdr *nlh;
2612         long expires;
2613         u32 table;
2614
2615         if (prefix) {   /* user wants prefix routes only */
2616                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2617                         /* success since this is not a prefix route */
2618                         return 1;
2619                 }
2620         }
2621
2622         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2623         if (!nlh)
2624                 return -EMSGSIZE;
2625
2626         rtm = nlmsg_data(nlh);
2627         rtm->rtm_family = AF_INET6;
2628         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2629         rtm->rtm_src_len = rt->rt6i_src.plen;
2630         rtm->rtm_tos = 0;
2631         if (rt->rt6i_table)
2632                 table = rt->rt6i_table->tb6_id;
2633         else
2634                 table = RT6_TABLE_UNSPEC;
2635         rtm->rtm_table = table;
2636         if (nla_put_u32(skb, RTA_TABLE, table))
2637                 goto nla_put_failure;
2638         if (rt->rt6i_flags & RTF_REJECT) {
2639                 switch (rt->dst.error) {
2640                 case -EINVAL:
2641                         rtm->rtm_type = RTN_BLACKHOLE;
2642                         break;
2643                 case -EACCES:
2644                         rtm->rtm_type = RTN_PROHIBIT;
2645                         break;
2646                 case -EAGAIN:
2647                         rtm->rtm_type = RTN_THROW;
2648                         break;
2649                 default:
2650                         rtm->rtm_type = RTN_UNREACHABLE;
2651                         break;
2652                 }
2653         }
2654         else if (rt->rt6i_flags & RTF_LOCAL)
2655                 rtm->rtm_type = RTN_LOCAL;
2656         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2657                 rtm->rtm_type = RTN_LOCAL;
2658         else
2659                 rtm->rtm_type = RTN_UNICAST;
2660         rtm->rtm_flags = 0;
2661         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2662         rtm->rtm_protocol = rt->rt6i_protocol;
2663         if (rt->rt6i_flags & RTF_DYNAMIC)
2664                 rtm->rtm_protocol = RTPROT_REDIRECT;
2665         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2666                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2667                         rtm->rtm_protocol = RTPROT_RA;
2668                 else
2669                         rtm->rtm_protocol = RTPROT_KERNEL;
2670         }
2671
2672         if (rt->rt6i_flags & RTF_CACHE)
2673                 rtm->rtm_flags |= RTM_F_CLONED;
2674
2675         if (dst) {
2676                 if (nla_put_in6_addr(skb, RTA_DST, dst))
2677                         goto nla_put_failure;
2678                 rtm->rtm_dst_len = 128;
2679         } else if (rtm->rtm_dst_len)
2680                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2681                         goto nla_put_failure;
2682 #ifdef CONFIG_IPV6_SUBTREES
2683         if (src) {
2684                 if (nla_put_in6_addr(skb, RTA_SRC, src))
2685                         goto nla_put_failure;
2686                 rtm->rtm_src_len = 128;
2687         } else if (rtm->rtm_src_len &&
2688                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2689                 goto nla_put_failure;
2690 #endif
2691         if (iif) {
2692 #ifdef CONFIG_IPV6_MROUTE
2693                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2694                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2695                         if (err <= 0) {
2696                                 if (!nowait) {
2697                                         if (err == 0)
2698                                                 return 0;
2699                                         goto nla_put_failure;
2700                                 } else {
2701                                         if (err == -EMSGSIZE)
2702                                                 goto nla_put_failure;
2703                                 }
2704                         }
2705                 } else
2706 #endif
2707                         if (nla_put_u32(skb, RTA_IIF, iif))
2708                                 goto nla_put_failure;
2709         } else if (dst) {
2710                 struct in6_addr saddr_buf;
2711                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2712                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2713                         goto nla_put_failure;
2714         }
2715
2716         if (rt->rt6i_prefsrc.plen) {
2717                 struct in6_addr saddr_buf;
2718                 saddr_buf = rt->rt6i_prefsrc.addr;
2719                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2720                         goto nla_put_failure;
2721         }
2722
2723         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2724         if (rt->rt6i_pmtu)
2725                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2726         if (rtnetlink_put_metrics(skb, metrics) < 0)
2727                 goto nla_put_failure;
2728
2729         if (rt->rt6i_flags & RTF_GATEWAY) {
2730                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2731                         goto nla_put_failure;
2732         }
2733
2734         if (rt->dst.dev &&
2735             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2736                 goto nla_put_failure;
2737         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2738                 goto nla_put_failure;
2739
2740         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2741
2742         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2743                 goto nla_put_failure;
2744
2745         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2746                 goto nla_put_failure;
2747
2748         nlmsg_end(skb, nlh);
2749         return 0;
2750
2751 nla_put_failure:
2752         nlmsg_cancel(skb, nlh);
2753         return -EMSGSIZE;
2754 }
2755
2756 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2757 {
2758         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2759         int prefix;
2760
2761         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2762                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2763                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2764         } else
2765                 prefix = 0;
2766
2767         return rt6_fill_node(arg->net,
2768                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2769                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2770                      prefix, 0, NLM_F_MULTI);
2771 }
2772
2773 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2774 {
2775         struct net *net = sock_net(in_skb->sk);
2776         struct nlattr *tb[RTA_MAX+1];
2777         struct rt6_info *rt;
2778         struct sk_buff *skb;
2779         struct rtmsg *rtm;
2780         struct flowi6 fl6;
2781         int err, iif = 0, oif = 0;
2782
2783         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2784         if (err < 0)
2785                 goto errout;
2786
2787         err = -EINVAL;
2788         memset(&fl6, 0, sizeof(fl6));
2789
2790         if (tb[RTA_SRC]) {
2791                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2792                         goto errout;
2793
2794                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2795         }
2796
2797         if (tb[RTA_DST]) {
2798                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2799                         goto errout;
2800
2801                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2802         }
2803
2804         if (tb[RTA_IIF])
2805                 iif = nla_get_u32(tb[RTA_IIF]);
2806
2807         if (tb[RTA_OIF])
2808                 oif = nla_get_u32(tb[RTA_OIF]);
2809
2810         if (tb[RTA_MARK])
2811                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2812
2813         if (iif) {
2814                 struct net_device *dev;
2815                 int flags = 0;
2816
2817                 dev = __dev_get_by_index(net, iif);
2818                 if (!dev) {
2819                         err = -ENODEV;
2820                         goto errout;
2821                 }
2822
2823                 fl6.flowi6_iif = iif;
2824
2825                 if (!ipv6_addr_any(&fl6.saddr))
2826                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2827
2828                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2829                                                                flags);
2830         } else {
2831                 fl6.flowi6_oif = oif;
2832
2833                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2834         }
2835
2836         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2837         if (!skb) {
2838                 ip6_rt_put(rt);
2839                 err = -ENOBUFS;
2840                 goto errout;
2841         }
2842
2843         /* Reserve room for dummy headers, this skb can pass
2844            through good chunk of routing engine.
2845          */
2846         skb_reset_mac_header(skb);
2847         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2848
2849         skb_dst_set(skb, &rt->dst);
2850
2851         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2852                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2853                             nlh->nlmsg_seq, 0, 0, 0);
2854         if (err < 0) {
2855                 kfree_skb(skb);
2856                 goto errout;
2857         }
2858
2859         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2860 errout:
2861         return err;
2862 }
2863
2864 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2865 {
2866         struct sk_buff *skb;
2867         struct net *net = info->nl_net;
2868         u32 seq;
2869         int err;
2870
2871         err = -ENOBUFS;
2872         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2873
2874         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2875         if (!skb)
2876                 goto errout;
2877
2878         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2879                                 event, info->portid, seq, 0, 0, 0);
2880         if (err < 0) {
2881                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2882                 WARN_ON(err == -EMSGSIZE);
2883                 kfree_skb(skb);
2884                 goto errout;
2885         }
2886         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2887                     info->nlh, gfp_any());
2888         return;
2889 errout:
2890         if (err < 0)
2891                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2892 }
2893
2894 static int ip6_route_dev_notify(struct notifier_block *this,
2895                                 unsigned long event, void *ptr)
2896 {
2897         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2898         struct net *net = dev_net(dev);
2899
2900         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2901                 net->ipv6.ip6_null_entry->dst.dev = dev;
2902                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2905                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2906                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2907                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2908 #endif
2909         }
2910
2911         return NOTIFY_OK;
2912 }
2913
2914 /*
2915  *      /proc
2916  */
2917
2918 #ifdef CONFIG_PROC_FS
2919
2920 static const struct file_operations ipv6_route_proc_fops = {
2921         .owner          = THIS_MODULE,
2922         .open           = ipv6_route_open,
2923         .read           = seq_read,
2924         .llseek         = seq_lseek,
2925         .release        = seq_release_net,
2926 };
2927
2928 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2929 {
2930         struct net *net = (struct net *)seq->private;
2931         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2932                    net->ipv6.rt6_stats->fib_nodes,
2933                    net->ipv6.rt6_stats->fib_route_nodes,
2934                    net->ipv6.rt6_stats->fib_rt_alloc,
2935                    net->ipv6.rt6_stats->fib_rt_entries,
2936                    net->ipv6.rt6_stats->fib_rt_cache,
2937                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2938                    net->ipv6.rt6_stats->fib_discarded_routes);
2939
2940         return 0;
2941 }
2942
2943 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2944 {
2945         return single_open_net(inode, file, rt6_stats_seq_show);
2946 }
2947
2948 static const struct file_operations rt6_stats_seq_fops = {
2949         .owner   = THIS_MODULE,
2950         .open    = rt6_stats_seq_open,
2951         .read    = seq_read,
2952         .llseek  = seq_lseek,
2953         .release = single_release_net,
2954 };
2955 #endif  /* CONFIG_PROC_FS */
2956
2957 #ifdef CONFIG_SYSCTL
2958
2959 static
2960 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2961                               void __user *buffer, size_t *lenp, loff_t *ppos)
2962 {
2963         struct net *net;
2964         int delay;
2965         if (!write)
2966                 return -EINVAL;
2967
2968         net = (struct net *)ctl->extra1;
2969         delay = net->ipv6.sysctl.flush_delay;
2970         proc_dointvec(ctl, write, buffer, lenp, ppos);
2971         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2972         return 0;
2973 }
2974
2975 struct ctl_table ipv6_route_table_template[] = {
2976         {
2977                 .procname       =       "flush",
2978                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2979                 .maxlen         =       sizeof(int),
2980                 .mode           =       0200,
2981                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2982         },
2983         {
2984                 .procname       =       "gc_thresh",
2985                 .data           =       &ip6_dst_ops_template.gc_thresh,
2986                 .maxlen         =       sizeof(int),
2987                 .mode           =       0644,
2988                 .proc_handler   =       proc_dointvec,
2989         },
2990         {
2991                 .procname       =       "max_size",
2992                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2993                 .maxlen         =       sizeof(int),
2994                 .mode           =       0644,
2995                 .proc_handler   =       proc_dointvec,
2996         },
2997         {
2998                 .procname       =       "gc_min_interval",
2999                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3000                 .maxlen         =       sizeof(int),
3001                 .mode           =       0644,
3002                 .proc_handler   =       proc_dointvec_jiffies,
3003         },
3004         {
3005                 .procname       =       "gc_timeout",
3006                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3007                 .maxlen         =       sizeof(int),
3008                 .mode           =       0644,
3009                 .proc_handler   =       proc_dointvec_jiffies,
3010         },
3011         {
3012                 .procname       =       "gc_interval",
3013                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3014                 .maxlen         =       sizeof(int),
3015                 .mode           =       0644,
3016                 .proc_handler   =       proc_dointvec_jiffies,
3017         },
3018         {
3019                 .procname       =       "gc_elasticity",
3020                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3021                 .maxlen         =       sizeof(int),
3022                 .mode           =       0644,
3023                 .proc_handler   =       proc_dointvec,
3024         },
3025         {
3026                 .procname       =       "mtu_expires",
3027                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3028                 .maxlen         =       sizeof(int),
3029                 .mode           =       0644,
3030                 .proc_handler   =       proc_dointvec_jiffies,
3031         },
3032         {
3033                 .procname       =       "min_adv_mss",
3034                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3035                 .maxlen         =       sizeof(int),
3036                 .mode           =       0644,
3037                 .proc_handler   =       proc_dointvec,
3038         },
3039         {
3040                 .procname       =       "gc_min_interval_ms",
3041                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3042                 .maxlen         =       sizeof(int),
3043                 .mode           =       0644,
3044                 .proc_handler   =       proc_dointvec_ms_jiffies,
3045         },
3046         { }
3047 };
3048
3049 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3050 {
3051         struct ctl_table *table;
3052
3053         table = kmemdup(ipv6_route_table_template,
3054                         sizeof(ipv6_route_table_template),
3055                         GFP_KERNEL);
3056
3057         if (table) {
3058                 table[0].data = &net->ipv6.sysctl.flush_delay;
3059                 table[0].extra1 = net;
3060                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3061                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3062                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3063                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3064                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3065                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3066                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3067                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3068                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3069
3070                 /* Don't export sysctls to unprivileged users */
3071                 if (net->user_ns != &init_user_ns)
3072                         table[0].procname = NULL;
3073         }
3074
3075         return table;
3076 }
3077 #endif
3078
3079 static int __net_init ip6_route_net_init(struct net *net)
3080 {
3081         int ret = -ENOMEM;
3082
3083         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3084                sizeof(net->ipv6.ip6_dst_ops));
3085
3086         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3087                 goto out_ip6_dst_ops;
3088
3089         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3090                                            sizeof(*net->ipv6.ip6_null_entry),
3091                                            GFP_KERNEL);
3092         if (!net->ipv6.ip6_null_entry)
3093                 goto out_ip6_dst_entries;
3094         net->ipv6.ip6_null_entry->dst.path =
3095                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3096         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3097         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3098                          ip6_template_metrics, true);
3099
3100 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3101         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3102                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3103                                                GFP_KERNEL);
3104         if (!net->ipv6.ip6_prohibit_entry)
3105                 goto out_ip6_null_entry;
3106         net->ipv6.ip6_prohibit_entry->dst.path =
3107                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3108         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3109         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3110                          ip6_template_metrics, true);
3111
3112         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3113                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3114                                                GFP_KERNEL);
3115         if (!net->ipv6.ip6_blk_hole_entry)
3116                 goto out_ip6_prohibit_entry;
3117         net->ipv6.ip6_blk_hole_entry->dst.path =
3118                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3119         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3120         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3121                          ip6_template_metrics, true);
3122 #endif
3123
3124         net->ipv6.sysctl.flush_delay = 0;
3125         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3126         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3127         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3128         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3129         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3130         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3131         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3132
3133         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3134
3135         ret = 0;
3136 out:
3137         return ret;
3138
3139 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3140 out_ip6_prohibit_entry:
3141         kfree(net->ipv6.ip6_prohibit_entry);
3142 out_ip6_null_entry:
3143         kfree(net->ipv6.ip6_null_entry);
3144 #endif
3145 out_ip6_dst_entries:
3146         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3147 out_ip6_dst_ops:
3148         goto out;
3149 }
3150
3151 static void __net_exit ip6_route_net_exit(struct net *net)
3152 {
3153         kfree(net->ipv6.ip6_null_entry);
3154 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3155         kfree(net->ipv6.ip6_prohibit_entry);
3156         kfree(net->ipv6.ip6_blk_hole_entry);
3157 #endif
3158         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3159 }
3160
3161 static int __net_init ip6_route_net_init_late(struct net *net)
3162 {
3163 #ifdef CONFIG_PROC_FS
3164         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3165         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3166 #endif
3167         return 0;
3168 }
3169
3170 static void __net_exit ip6_route_net_exit_late(struct net *net)
3171 {
3172 #ifdef CONFIG_PROC_FS
3173         remove_proc_entry("ipv6_route", net->proc_net);
3174         remove_proc_entry("rt6_stats", net->proc_net);
3175 #endif
3176 }
3177
3178 static struct pernet_operations ip6_route_net_ops = {
3179         .init = ip6_route_net_init,
3180         .exit = ip6_route_net_exit,
3181 };
3182
3183 static int __net_init ipv6_inetpeer_init(struct net *net)
3184 {
3185         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3186
3187         if (!bp)
3188                 return -ENOMEM;
3189         inet_peer_base_init(bp);
3190         net->ipv6.peers = bp;
3191         return 0;
3192 }
3193
3194 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3195 {
3196         struct inet_peer_base *bp = net->ipv6.peers;
3197
3198         net->ipv6.peers = NULL;
3199         inetpeer_invalidate_tree(bp);
3200         kfree(bp);
3201 }
3202
3203 static struct pernet_operations ipv6_inetpeer_ops = {
3204         .init   =       ipv6_inetpeer_init,
3205         .exit   =       ipv6_inetpeer_exit,
3206 };
3207
3208 static struct pernet_operations ip6_route_net_late_ops = {
3209         .init = ip6_route_net_init_late,
3210         .exit = ip6_route_net_exit_late,
3211 };
3212
3213 static struct notifier_block ip6_route_dev_notifier = {
3214         .notifier_call = ip6_route_dev_notify,
3215         .priority = 0,
3216 };
3217
3218 int __init ip6_route_init(void)
3219 {
3220         int ret;
3221
3222         ret = -ENOMEM;
3223         ip6_dst_ops_template.kmem_cachep =
3224                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3225                                   SLAB_HWCACHE_ALIGN, NULL);
3226         if (!ip6_dst_ops_template.kmem_cachep)
3227                 goto out;
3228
3229         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3230         if (ret)
3231                 goto out_kmem_cache;
3232
3233         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3234         if (ret)
3235                 goto out_dst_entries;
3236
3237         ret = register_pernet_subsys(&ip6_route_net_ops);
3238         if (ret)
3239                 goto out_register_inetpeer;
3240
3241         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3242
3243         /* Registering of the loopback is done before this portion of code,
3244          * the loopback reference in rt6_info will not be taken, do it
3245          * manually for init_net */
3246         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3247         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3248   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3249         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3250         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3251         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3252         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3253   #endif
3254         ret = fib6_init();
3255         if (ret)
3256                 goto out_register_subsys;
3257
3258         ret = xfrm6_init();
3259         if (ret)
3260                 goto out_fib6_init;
3261
3262         ret = fib6_rules_init();
3263         if (ret)
3264                 goto xfrm6_init;
3265
3266         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3267         if (ret)
3268                 goto fib6_rules_init;
3269
3270         ret = -ENOBUFS;
3271         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3272             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3273             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3274                 goto out_register_late_subsys;
3275
3276         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3277         if (ret)
3278                 goto out_register_late_subsys;
3279
3280 out:
3281         return ret;
3282
3283 out_register_late_subsys:
3284         unregister_pernet_subsys(&ip6_route_net_late_ops);
3285 fib6_rules_init:
3286         fib6_rules_cleanup();
3287 xfrm6_init:
3288         xfrm6_fini();
3289 out_fib6_init:
3290         fib6_gc_cleanup();
3291 out_register_subsys:
3292         unregister_pernet_subsys(&ip6_route_net_ops);
3293 out_register_inetpeer:
3294         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3295 out_dst_entries:
3296         dst_entries_destroy(&ip6_dst_blackhole_ops);
3297 out_kmem_cache:
3298         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3299         goto out;
3300 }
3301
3302 void ip6_route_cleanup(void)
3303 {
3304         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3305         unregister_pernet_subsys(&ip6_route_net_late_ops);
3306         fib6_rules_cleanup();
3307         xfrm6_fini();
3308         fib6_gc_cleanup();
3309         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3310         unregister_pernet_subsys(&ip6_route_net_ops);
3311         dst_entries_destroy(&ip6_dst_blackhole_ops);
3312         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3313 }