]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
ipv6: Stop rt6_info from using inet_peer's metrics
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex,
102                                            unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static void rt6_bind_peer(struct rt6_info *rt, int create)
109 {
110         struct inet_peer_base *base;
111         struct inet_peer *peer;
112
113         base = inetpeer_base_ptr(rt->_rt6i_peer);
114         if (!base)
115                 return;
116
117         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
118         if (peer) {
119                 if (!rt6_set_peer(rt, peer))
120                         inet_putpeer(peer);
121         }
122 }
123
124 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
125 {
126         if (rt6_has_peer(rt))
127                 return rt6_peer_ptr(rt);
128
129         rt6_bind_peer(rt, create);
130         return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
131 }
132
133 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
134 {
135         return __rt6_get_peer(rt, 1);
136 }
137
138 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
139 {
140         struct rt6_info *rt = (struct rt6_info *)dst;
141
142         if (rt->rt6i_flags & RTF_CACHE)
143                 return NULL;
144         else
145                 return dst_cow_metrics_generic(dst, old);
146 }
147
148 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
149                                              struct sk_buff *skb,
150                                              const void *daddr)
151 {
152         struct in6_addr *p = &rt->rt6i_gateway;
153
154         if (!ipv6_addr_any(p))
155                 return (const void *) p;
156         else if (skb)
157                 return &ipv6_hdr(skb)->daddr;
158         return daddr;
159 }
160
161 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
162                                           struct sk_buff *skb,
163                                           const void *daddr)
164 {
165         struct rt6_info *rt = (struct rt6_info *) dst;
166         struct neighbour *n;
167
168         daddr = choose_neigh_daddr(rt, skb, daddr);
169         n = __ipv6_neigh_lookup(dst->dev, daddr);
170         if (n)
171                 return n;
172         return neigh_create(&nd_tbl, daddr, dst->dev);
173 }
174
175 static struct dst_ops ip6_dst_ops_template = {
176         .family                 =       AF_INET6,
177         .gc                     =       ip6_dst_gc,
178         .gc_thresh              =       1024,
179         .check                  =       ip6_dst_check,
180         .default_advmss         =       ip6_default_advmss,
181         .mtu                    =       ip6_mtu,
182         .cow_metrics            =       ipv6_cow_metrics,
183         .destroy                =       ip6_dst_destroy,
184         .ifdown                 =       ip6_dst_ifdown,
185         .negative_advice        =       ip6_negative_advice,
186         .link_failure           =       ip6_link_failure,
187         .update_pmtu            =       ip6_rt_update_pmtu,
188         .redirect               =       rt6_do_redirect,
189         .local_out              =       __ip6_local_out,
190         .neigh_lookup           =       ip6_neigh_lookup,
191 };
192
193 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
194 {
195         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
196
197         return mtu ? : dst->dev->mtu;
198 }
199
200 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
201                                          struct sk_buff *skb, u32 mtu)
202 {
203 }
204
205 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
206                                       struct sk_buff *skb)
207 {
208 }
209
210 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
211                                          unsigned long old)
212 {
213         return NULL;
214 }
215
216 static struct dst_ops ip6_dst_blackhole_ops = {
217         .family                 =       AF_INET6,
218         .destroy                =       ip6_dst_destroy,
219         .check                  =       ip6_dst_check,
220         .mtu                    =       ip6_blackhole_mtu,
221         .default_advmss         =       ip6_default_advmss,
222         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
223         .redirect               =       ip6_rt_blackhole_redirect,
224         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
225         .neigh_lookup           =       ip6_neigh_lookup,
226 };
227
228 static const u32 ip6_template_metrics[RTAX_MAX] = {
229         [RTAX_HOPLIMIT - 1] = 0,
230 };
231
232 static const struct rt6_info ip6_null_entry_template = {
233         .dst = {
234                 .__refcnt       = ATOMIC_INIT(1),
235                 .__use          = 1,
236                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
237                 .error          = -ENETUNREACH,
238                 .input          = ip6_pkt_discard,
239                 .output         = ip6_pkt_discard_out,
240         },
241         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
242         .rt6i_protocol  = RTPROT_KERNEL,
243         .rt6i_metric    = ~(u32) 0,
244         .rt6i_ref       = ATOMIC_INIT(1),
245 };
246
247 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
248
249 static const struct rt6_info ip6_prohibit_entry_template = {
250         .dst = {
251                 .__refcnt       = ATOMIC_INIT(1),
252                 .__use          = 1,
253                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
254                 .error          = -EACCES,
255                 .input          = ip6_pkt_prohibit,
256                 .output         = ip6_pkt_prohibit_out,
257         },
258         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
259         .rt6i_protocol  = RTPROT_KERNEL,
260         .rt6i_metric    = ~(u32) 0,
261         .rt6i_ref       = ATOMIC_INIT(1),
262 };
263
264 static const struct rt6_info ip6_blk_hole_entry_template = {
265         .dst = {
266                 .__refcnt       = ATOMIC_INIT(1),
267                 .__use          = 1,
268                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
269                 .error          = -EINVAL,
270                 .input          = dst_discard,
271                 .output         = dst_discard_sk,
272         },
273         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
274         .rt6i_protocol  = RTPROT_KERNEL,
275         .rt6i_metric    = ~(u32) 0,
276         .rt6i_ref       = ATOMIC_INIT(1),
277 };
278
279 #endif
280
281 /* allocate dst with ip6_dst_ops */
282 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
283                                              struct net_device *dev,
284                                              int flags,
285                                              struct fib6_table *table)
286 {
287         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
288                                         0, DST_OBSOLETE_FORCE_CHK, flags);
289
290         if (rt) {
291                 struct dst_entry *dst = &rt->dst;
292
293                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
294                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
295                 INIT_LIST_HEAD(&rt->rt6i_siblings);
296         }
297         return rt;
298 }
299
300 static void ip6_dst_destroy(struct dst_entry *dst)
301 {
302         struct rt6_info *rt = (struct rt6_info *)dst;
303         struct inet6_dev *idev = rt->rt6i_idev;
304         struct dst_entry *from = dst->from;
305
306         dst_destroy_metrics_generic(dst);
307
308         if (idev) {
309                 rt->rt6i_idev = NULL;
310                 in6_dev_put(idev);
311         }
312
313         dst->from = NULL;
314         dst_release(from);
315 }
316
317 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
318                            int how)
319 {
320         struct rt6_info *rt = (struct rt6_info *)dst;
321         struct inet6_dev *idev = rt->rt6i_idev;
322         struct net_device *loopback_dev =
323                 dev_net(dev)->loopback_dev;
324
325         if (dev != loopback_dev) {
326                 if (idev && idev->dev == dev) {
327                         struct inet6_dev *loopback_idev =
328                                 in6_dev_get(loopback_dev);
329                         if (loopback_idev) {
330                                 rt->rt6i_idev = loopback_idev;
331                                 in6_dev_put(idev);
332                         }
333                 }
334         }
335 }
336
337 static bool rt6_check_expired(const struct rt6_info *rt)
338 {
339         if (rt->rt6i_flags & RTF_EXPIRES) {
340                 if (time_after(jiffies, rt->dst.expires))
341                         return true;
342         } else if (rt->dst.from) {
343                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
344         }
345         return false;
346 }
347
348 /* Multipath route selection:
349  *   Hash based function using packet header and flowlabel.
350  * Adapted from fib_info_hashfn()
351  */
352 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
353                                const struct flowi6 *fl6)
354 {
355         unsigned int val = fl6->flowi6_proto;
356
357         val ^= ipv6_addr_hash(&fl6->daddr);
358         val ^= ipv6_addr_hash(&fl6->saddr);
359
360         /* Work only if this not encapsulated */
361         switch (fl6->flowi6_proto) {
362         case IPPROTO_UDP:
363         case IPPROTO_TCP:
364         case IPPROTO_SCTP:
365                 val ^= (__force u16)fl6->fl6_sport;
366                 val ^= (__force u16)fl6->fl6_dport;
367                 break;
368
369         case IPPROTO_ICMPV6:
370                 val ^= (__force u16)fl6->fl6_icmp_type;
371                 val ^= (__force u16)fl6->fl6_icmp_code;
372                 break;
373         }
374         /* RFC6438 recommands to use flowlabel */
375         val ^= (__force u32)fl6->flowlabel;
376
377         /* Perhaps, we need to tune, this function? */
378         val = val ^ (val >> 7) ^ (val >> 12);
379         return val % candidate_count;
380 }
381
382 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
383                                              struct flowi6 *fl6, int oif,
384                                              int strict)
385 {
386         struct rt6_info *sibling, *next_sibling;
387         int route_choosen;
388
389         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
390         /* Don't change the route, if route_choosen == 0
391          * (siblings does not include ourself)
392          */
393         if (route_choosen)
394                 list_for_each_entry_safe(sibling, next_sibling,
395                                 &match->rt6i_siblings, rt6i_siblings) {
396                         route_choosen--;
397                         if (route_choosen == 0) {
398                                 if (rt6_score_route(sibling, oif, strict) < 0)
399                                         break;
400                                 match = sibling;
401                                 break;
402                         }
403                 }
404         return match;
405 }
406
407 /*
408  *      Route lookup. Any table->tb6_lock is implied.
409  */
410
411 static inline struct rt6_info *rt6_device_match(struct net *net,
412                                                     struct rt6_info *rt,
413                                                     const struct in6_addr *saddr,
414                                                     int oif,
415                                                     int flags)
416 {
417         struct rt6_info *local = NULL;
418         struct rt6_info *sprt;
419
420         if (!oif && ipv6_addr_any(saddr))
421                 goto out;
422
423         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
424                 struct net_device *dev = sprt->dst.dev;
425
426                 if (oif) {
427                         if (dev->ifindex == oif)
428                                 return sprt;
429                         if (dev->flags & IFF_LOOPBACK) {
430                                 if (!sprt->rt6i_idev ||
431                                     sprt->rt6i_idev->dev->ifindex != oif) {
432                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
433                                                 continue;
434                                         if (local && (!oif ||
435                                                       local->rt6i_idev->dev->ifindex == oif))
436                                                 continue;
437                                 }
438                                 local = sprt;
439                         }
440                 } else {
441                         if (ipv6_chk_addr(net, saddr, dev,
442                                           flags & RT6_LOOKUP_F_IFACE))
443                                 return sprt;
444                 }
445         }
446
447         if (oif) {
448                 if (local)
449                         return local;
450
451                 if (flags & RT6_LOOKUP_F_IFACE)
452                         return net->ipv6.ip6_null_entry;
453         }
454 out:
455         return rt;
456 }
457
458 #ifdef CONFIG_IPV6_ROUTER_PREF
459 struct __rt6_probe_work {
460         struct work_struct work;
461         struct in6_addr target;
462         struct net_device *dev;
463 };
464
465 static void rt6_probe_deferred(struct work_struct *w)
466 {
467         struct in6_addr mcaddr;
468         struct __rt6_probe_work *work =
469                 container_of(w, struct __rt6_probe_work, work);
470
471         addrconf_addr_solict_mult(&work->target, &mcaddr);
472         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
473         dev_put(work->dev);
474         kfree(work);
475 }
476
477 static void rt6_probe(struct rt6_info *rt)
478 {
479         struct neighbour *neigh;
480         /*
481          * Okay, this does not seem to be appropriate
482          * for now, however, we need to check if it
483          * is really so; aka Router Reachability Probing.
484          *
485          * Router Reachability Probe MUST be rate-limited
486          * to no more than one per minute.
487          */
488         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
489                 return;
490         rcu_read_lock_bh();
491         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
492         if (neigh) {
493                 write_lock(&neigh->lock);
494                 if (neigh->nud_state & NUD_VALID)
495                         goto out;
496         }
497
498         if (!neigh ||
499             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
500                 struct __rt6_probe_work *work;
501
502                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
503
504                 if (neigh && work)
505                         __neigh_set_probe_once(neigh);
506
507                 if (neigh)
508                         write_unlock(&neigh->lock);
509
510                 if (work) {
511                         INIT_WORK(&work->work, rt6_probe_deferred);
512                         work->target = rt->rt6i_gateway;
513                         dev_hold(rt->dst.dev);
514                         work->dev = rt->dst.dev;
515                         schedule_work(&work->work);
516                 }
517         } else {
518 out:
519                 write_unlock(&neigh->lock);
520         }
521         rcu_read_unlock_bh();
522 }
523 #else
524 static inline void rt6_probe(struct rt6_info *rt)
525 {
526 }
527 #endif
528
529 /*
530  * Default Router Selection (RFC 2461 6.3.6)
531  */
532 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
533 {
534         struct net_device *dev = rt->dst.dev;
535         if (!oif || dev->ifindex == oif)
536                 return 2;
537         if ((dev->flags & IFF_LOOPBACK) &&
538             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
539                 return 1;
540         return 0;
541 }
542
543 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
544 {
545         struct neighbour *neigh;
546         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
547
548         if (rt->rt6i_flags & RTF_NONEXTHOP ||
549             !(rt->rt6i_flags & RTF_GATEWAY))
550                 return RT6_NUD_SUCCEED;
551
552         rcu_read_lock_bh();
553         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
554         if (neigh) {
555                 read_lock(&neigh->lock);
556                 if (neigh->nud_state & NUD_VALID)
557                         ret = RT6_NUD_SUCCEED;
558 #ifdef CONFIG_IPV6_ROUTER_PREF
559                 else if (!(neigh->nud_state & NUD_FAILED))
560                         ret = RT6_NUD_SUCCEED;
561                 else
562                         ret = RT6_NUD_FAIL_PROBE;
563 #endif
564                 read_unlock(&neigh->lock);
565         } else {
566                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
567                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
568         }
569         rcu_read_unlock_bh();
570
571         return ret;
572 }
573
574 static int rt6_score_route(struct rt6_info *rt, int oif,
575                            int strict)
576 {
577         int m;
578
579         m = rt6_check_dev(rt, oif);
580         if (!m && (strict & RT6_LOOKUP_F_IFACE))
581                 return RT6_NUD_FAIL_HARD;
582 #ifdef CONFIG_IPV6_ROUTER_PREF
583         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
584 #endif
585         if (strict & RT6_LOOKUP_F_REACHABLE) {
586                 int n = rt6_check_neigh(rt);
587                 if (n < 0)
588                         return n;
589         }
590         return m;
591 }
592
593 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
594                                    int *mpri, struct rt6_info *match,
595                                    bool *do_rr)
596 {
597         int m;
598         bool match_do_rr = false;
599
600         if (rt6_check_expired(rt))
601                 goto out;
602
603         m = rt6_score_route(rt, oif, strict);
604         if (m == RT6_NUD_FAIL_DO_RR) {
605                 match_do_rr = true;
606                 m = 0; /* lowest valid score */
607         } else if (m == RT6_NUD_FAIL_HARD) {
608                 goto out;
609         }
610
611         if (strict & RT6_LOOKUP_F_REACHABLE)
612                 rt6_probe(rt);
613
614         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
615         if (m > *mpri) {
616                 *do_rr = match_do_rr;
617                 *mpri = m;
618                 match = rt;
619         }
620 out:
621         return match;
622 }
623
624 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
625                                      struct rt6_info *rr_head,
626                                      u32 metric, int oif, int strict,
627                                      bool *do_rr)
628 {
629         struct rt6_info *rt, *match, *cont;
630         int mpri = -1;
631
632         match = NULL;
633         cont = NULL;
634         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
635                 if (rt->rt6i_metric != metric) {
636                         cont = rt;
637                         break;
638                 }
639
640                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
641         }
642
643         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
644                 if (rt->rt6i_metric != metric) {
645                         cont = rt;
646                         break;
647                 }
648
649                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
650         }
651
652         if (match || !cont)
653                 return match;
654
655         for (rt = cont; rt; rt = rt->dst.rt6_next)
656                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
657
658         return match;
659 }
660
661 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
662 {
663         struct rt6_info *match, *rt0;
664         struct net *net;
665         bool do_rr = false;
666
667         rt0 = fn->rr_ptr;
668         if (!rt0)
669                 fn->rr_ptr = rt0 = fn->leaf;
670
671         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
672                              &do_rr);
673
674         if (do_rr) {
675                 struct rt6_info *next = rt0->dst.rt6_next;
676
677                 /* no entries matched; do round-robin */
678                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
679                         next = fn->leaf;
680
681                 if (next != rt0)
682                         fn->rr_ptr = next;
683         }
684
685         net = dev_net(rt0->dst.dev);
686         return match ? match : net->ipv6.ip6_null_entry;
687 }
688
689 #ifdef CONFIG_IPV6_ROUTE_INFO
690 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
691                   const struct in6_addr *gwaddr)
692 {
693         struct net *net = dev_net(dev);
694         struct route_info *rinfo = (struct route_info *) opt;
695         struct in6_addr prefix_buf, *prefix;
696         unsigned int pref;
697         unsigned long lifetime;
698         struct rt6_info *rt;
699
700         if (len < sizeof(struct route_info)) {
701                 return -EINVAL;
702         }
703
704         /* Sanity check for prefix_len and length */
705         if (rinfo->length > 3) {
706                 return -EINVAL;
707         } else if (rinfo->prefix_len > 128) {
708                 return -EINVAL;
709         } else if (rinfo->prefix_len > 64) {
710                 if (rinfo->length < 2) {
711                         return -EINVAL;
712                 }
713         } else if (rinfo->prefix_len > 0) {
714                 if (rinfo->length < 1) {
715                         return -EINVAL;
716                 }
717         }
718
719         pref = rinfo->route_pref;
720         if (pref == ICMPV6_ROUTER_PREF_INVALID)
721                 return -EINVAL;
722
723         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
724
725         if (rinfo->length == 3)
726                 prefix = (struct in6_addr *)rinfo->prefix;
727         else {
728                 /* this function is safe */
729                 ipv6_addr_prefix(&prefix_buf,
730                                  (struct in6_addr *)rinfo->prefix,
731                                  rinfo->prefix_len);
732                 prefix = &prefix_buf;
733         }
734
735         if (rinfo->prefix_len == 0)
736                 rt = rt6_get_dflt_router(gwaddr, dev);
737         else
738                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
739                                         gwaddr, dev->ifindex);
740
741         if (rt && !lifetime) {
742                 ip6_del_rt(rt);
743                 rt = NULL;
744         }
745
746         if (!rt && lifetime)
747                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
748                                         pref);
749         else if (rt)
750                 rt->rt6i_flags = RTF_ROUTEINFO |
751                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
752
753         if (rt) {
754                 if (!addrconf_finite_timeout(lifetime))
755                         rt6_clean_expires(rt);
756                 else
757                         rt6_set_expires(rt, jiffies + HZ * lifetime);
758
759                 ip6_rt_put(rt);
760         }
761         return 0;
762 }
763 #endif
764
765 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
766                                         struct in6_addr *saddr)
767 {
768         struct fib6_node *pn;
769         while (1) {
770                 if (fn->fn_flags & RTN_TL_ROOT)
771                         return NULL;
772                 pn = fn->parent;
773                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
774                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
775                 else
776                         fn = pn;
777                 if (fn->fn_flags & RTN_RTINFO)
778                         return fn;
779         }
780 }
781
782 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
783                                              struct fib6_table *table,
784                                              struct flowi6 *fl6, int flags)
785 {
786         struct fib6_node *fn;
787         struct rt6_info *rt;
788
789         read_lock_bh(&table->tb6_lock);
790         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
791 restart:
792         rt = fn->leaf;
793         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
794         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
795                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
796         if (rt == net->ipv6.ip6_null_entry) {
797                 fn = fib6_backtrack(fn, &fl6->saddr);
798                 if (fn)
799                         goto restart;
800         }
801         dst_use(&rt->dst, jiffies);
802         read_unlock_bh(&table->tb6_lock);
803         return rt;
804
805 }
806
807 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
808                                     int flags)
809 {
810         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
811 }
812 EXPORT_SYMBOL_GPL(ip6_route_lookup);
813
814 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
815                             const struct in6_addr *saddr, int oif, int strict)
816 {
817         struct flowi6 fl6 = {
818                 .flowi6_oif = oif,
819                 .daddr = *daddr,
820         };
821         struct dst_entry *dst;
822         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
823
824         if (saddr) {
825                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
826                 flags |= RT6_LOOKUP_F_HAS_SADDR;
827         }
828
829         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
830         if (dst->error == 0)
831                 return (struct rt6_info *) dst;
832
833         dst_release(dst);
834
835         return NULL;
836 }
837 EXPORT_SYMBOL(rt6_lookup);
838
839 /* ip6_ins_rt is called with FREE table->tb6_lock.
840    It takes new route entry, the addition fails by any reason the
841    route is freed. In any case, if caller does not hold it, it may
842    be destroyed.
843  */
844
845 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
846                         struct mx6_config *mxc)
847 {
848         int err;
849         struct fib6_table *table;
850
851         table = rt->rt6i_table;
852         write_lock_bh(&table->tb6_lock);
853         err = fib6_add(&table->tb6_root, rt, info, mxc);
854         write_unlock_bh(&table->tb6_lock);
855
856         return err;
857 }
858
859 int ip6_ins_rt(struct rt6_info *rt)
860 {
861         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
862         struct mx6_config mxc = { .mx = NULL, };
863
864         return __ip6_ins_rt(rt, &info, &mxc);
865 }
866
867 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
868                                       const struct in6_addr *daddr,
869                                       const struct in6_addr *saddr)
870 {
871         struct rt6_info *rt;
872
873         /*
874          *      Clone the route.
875          */
876
877         rt = ip6_rt_copy(ort, daddr);
878
879         if (rt) {
880                 if (ort->rt6i_dst.plen != 128 &&
881                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
882                         rt->rt6i_flags |= RTF_ANYCAST;
883
884                 rt->rt6i_flags |= RTF_CACHE;
885
886 #ifdef CONFIG_IPV6_SUBTREES
887                 if (rt->rt6i_src.plen && saddr) {
888                         rt->rt6i_src.addr = *saddr;
889                         rt->rt6i_src.plen = 128;
890                 }
891 #endif
892         }
893
894         return rt;
895 }
896
897 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
898                                         const struct in6_addr *daddr)
899 {
900         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
901
902         if (rt)
903                 rt->rt6i_flags |= RTF_CACHE;
904         return rt;
905 }
906
907 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
908                                       struct flowi6 *fl6, int flags)
909 {
910         struct fib6_node *fn, *saved_fn;
911         struct rt6_info *rt, *nrt;
912         int strict = 0;
913         int attempts = 3;
914         int err;
915
916         strict |= flags & RT6_LOOKUP_F_IFACE;
917         if (net->ipv6.devconf_all->forwarding == 0)
918                 strict |= RT6_LOOKUP_F_REACHABLE;
919
920 redo_fib6_lookup_lock:
921         read_lock_bh(&table->tb6_lock);
922
923         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
924         saved_fn = fn;
925
926 redo_rt6_select:
927         rt = rt6_select(fn, oif, strict);
928         if (rt->rt6i_nsiblings)
929                 rt = rt6_multipath_select(rt, fl6, oif, strict);
930         if (rt == net->ipv6.ip6_null_entry) {
931                 fn = fib6_backtrack(fn, &fl6->saddr);
932                 if (fn)
933                         goto redo_rt6_select;
934                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
935                         /* also consider unreachable route */
936                         strict &= ~RT6_LOOKUP_F_REACHABLE;
937                         fn = saved_fn;
938                         goto redo_rt6_select;
939                 } else {
940                         dst_hold(&rt->dst);
941                         read_unlock_bh(&table->tb6_lock);
942                         goto out2;
943                 }
944         }
945
946         dst_hold(&rt->dst);
947         read_unlock_bh(&table->tb6_lock);
948
949         if (rt->rt6i_flags & RTF_CACHE)
950                 goto out2;
951
952         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
953                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
954         else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
955                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
956         else
957                 goto out2;
958
959         ip6_rt_put(rt);
960         rt = nrt ? : net->ipv6.ip6_null_entry;
961
962         dst_hold(&rt->dst);
963         if (nrt) {
964                 err = ip6_ins_rt(nrt);
965                 if (!err)
966                         goto out2;
967         }
968
969         if (--attempts <= 0)
970                 goto out2;
971
972         /*
973          * Race condition! In the gap, when table->tb6_lock was
974          * released someone could insert this route.  Relookup.
975          */
976         ip6_rt_put(rt);
977         goto redo_fib6_lookup_lock;
978
979 out2:
980         rt6_dst_from_metrics_check(rt);
981         rt->dst.lastuse = jiffies;
982         rt->dst.__use++;
983
984         return rt;
985 }
986
987 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
988                                             struct flowi6 *fl6, int flags)
989 {
990         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
991 }
992
993 static struct dst_entry *ip6_route_input_lookup(struct net *net,
994                                                 struct net_device *dev,
995                                                 struct flowi6 *fl6, int flags)
996 {
997         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
998                 flags |= RT6_LOOKUP_F_IFACE;
999
1000         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1001 }
1002
1003 void ip6_route_input(struct sk_buff *skb)
1004 {
1005         const struct ipv6hdr *iph = ipv6_hdr(skb);
1006         struct net *net = dev_net(skb->dev);
1007         int flags = RT6_LOOKUP_F_HAS_SADDR;
1008         struct flowi6 fl6 = {
1009                 .flowi6_iif = skb->dev->ifindex,
1010                 .daddr = iph->daddr,
1011                 .saddr = iph->saddr,
1012                 .flowlabel = ip6_flowinfo(iph),
1013                 .flowi6_mark = skb->mark,
1014                 .flowi6_proto = iph->nexthdr,
1015         };
1016
1017         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1018 }
1019
1020 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1021                                              struct flowi6 *fl6, int flags)
1022 {
1023         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1024 }
1025
1026 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1027                                     struct flowi6 *fl6)
1028 {
1029         int flags = 0;
1030
1031         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1032
1033         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1034                 flags |= RT6_LOOKUP_F_IFACE;
1035
1036         if (!ipv6_addr_any(&fl6->saddr))
1037                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1038         else if (sk)
1039                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1040
1041         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1042 }
1043 EXPORT_SYMBOL(ip6_route_output);
1044
1045 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1046 {
1047         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1048         struct dst_entry *new = NULL;
1049
1050         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1051         if (rt) {
1052                 new = &rt->dst;
1053
1054                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1055                 rt6_init_peer(rt, net->ipv6.peers);
1056
1057                 new->__use = 1;
1058                 new->input = dst_discard;
1059                 new->output = dst_discard_sk;
1060
1061                 if (dst_metrics_read_only(&ort->dst))
1062                         new->_metrics = ort->dst._metrics;
1063                 else
1064                         dst_copy_metrics(new, &ort->dst);
1065                 rt->rt6i_idev = ort->rt6i_idev;
1066                 if (rt->rt6i_idev)
1067                         in6_dev_hold(rt->rt6i_idev);
1068
1069                 rt->rt6i_gateway = ort->rt6i_gateway;
1070                 rt->rt6i_flags = ort->rt6i_flags;
1071                 rt->rt6i_metric = 0;
1072
1073                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074 #ifdef CONFIG_IPV6_SUBTREES
1075                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1076 #endif
1077
1078                 dst_free(new);
1079         }
1080
1081         dst_release(dst_orig);
1082         return new ? new : ERR_PTR(-ENOMEM);
1083 }
1084
1085 /*
1086  *      Destination cache support functions
1087  */
1088
1089 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1090 {
1091         if (rt->dst.from &&
1092             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1093                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1094 }
1095
1096 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1097 {
1098         struct rt6_info *rt;
1099
1100         rt = (struct rt6_info *) dst;
1101
1102         /* All IPV6 dsts are created with ->obsolete set to the value
1103          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1104          * into this function always.
1105          */
1106         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1107                 return NULL;
1108
1109         if (rt6_check_expired(rt))
1110                 return NULL;
1111
1112         rt6_dst_from_metrics_check(rt);
1113
1114         return dst;
1115 }
1116
1117 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1118 {
1119         struct rt6_info *rt = (struct rt6_info *) dst;
1120
1121         if (rt) {
1122                 if (rt->rt6i_flags & RTF_CACHE) {
1123                         if (rt6_check_expired(rt)) {
1124                                 ip6_del_rt(rt);
1125                                 dst = NULL;
1126                         }
1127                 } else {
1128                         dst_release(dst);
1129                         dst = NULL;
1130                 }
1131         }
1132         return dst;
1133 }
1134
1135 static void ip6_link_failure(struct sk_buff *skb)
1136 {
1137         struct rt6_info *rt;
1138
1139         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1140
1141         rt = (struct rt6_info *) skb_dst(skb);
1142         if (rt) {
1143                 if (rt->rt6i_flags & RTF_CACHE) {
1144                         dst_hold(&rt->dst);
1145                         if (ip6_del_rt(rt))
1146                                 dst_free(&rt->dst);
1147                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1148                         rt->rt6i_node->fn_sernum = -1;
1149                 }
1150         }
1151 }
1152
1153 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154                                struct sk_buff *skb, u32 mtu)
1155 {
1156         struct rt6_info *rt6 = (struct rt6_info *)dst;
1157
1158         dst_confirm(dst);
1159         if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1160                 struct net *net = dev_net(dst->dev);
1161
1162                 rt6->rt6i_flags |= RTF_MODIFIED;
1163                 if (mtu < IPV6_MIN_MTU)
1164                         mtu = IPV6_MIN_MTU;
1165
1166                 rt6->rt6i_pmtu = mtu;
1167                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1168         }
1169 }
1170
1171 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1172                      int oif, u32 mark)
1173 {
1174         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1175         struct dst_entry *dst;
1176         struct flowi6 fl6;
1177
1178         memset(&fl6, 0, sizeof(fl6));
1179         fl6.flowi6_oif = oif;
1180         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1181         fl6.daddr = iph->daddr;
1182         fl6.saddr = iph->saddr;
1183         fl6.flowlabel = ip6_flowinfo(iph);
1184
1185         dst = ip6_route_output(net, NULL, &fl6);
1186         if (!dst->error)
1187                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1188         dst_release(dst);
1189 }
1190 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1191
1192 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1193 {
1194         ip6_update_pmtu(skb, sock_net(sk), mtu,
1195                         sk->sk_bound_dev_if, sk->sk_mark);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1198
1199 /* Handle redirects */
1200 struct ip6rd_flowi {
1201         struct flowi6 fl6;
1202         struct in6_addr gateway;
1203 };
1204
1205 static struct rt6_info *__ip6_route_redirect(struct net *net,
1206                                              struct fib6_table *table,
1207                                              struct flowi6 *fl6,
1208                                              int flags)
1209 {
1210         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1211         struct rt6_info *rt;
1212         struct fib6_node *fn;
1213
1214         /* Get the "current" route for this destination and
1215          * check if the redirect has come from approriate router.
1216          *
1217          * RFC 4861 specifies that redirects should only be
1218          * accepted if they come from the nexthop to the target.
1219          * Due to the way the routes are chosen, this notion
1220          * is a bit fuzzy and one might need to check all possible
1221          * routes.
1222          */
1223
1224         read_lock_bh(&table->tb6_lock);
1225         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226 restart:
1227         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1228                 if (rt6_check_expired(rt))
1229                         continue;
1230                 if (rt->dst.error)
1231                         break;
1232                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1233                         continue;
1234                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1235                         continue;
1236                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1237                         continue;
1238                 break;
1239         }
1240
1241         if (!rt)
1242                 rt = net->ipv6.ip6_null_entry;
1243         else if (rt->dst.error) {
1244                 rt = net->ipv6.ip6_null_entry;
1245                 goto out;
1246         }
1247
1248         if (rt == net->ipv6.ip6_null_entry) {
1249                 fn = fib6_backtrack(fn, &fl6->saddr);
1250                 if (fn)
1251                         goto restart;
1252         }
1253
1254 out:
1255         dst_hold(&rt->dst);
1256
1257         read_unlock_bh(&table->tb6_lock);
1258
1259         return rt;
1260 };
1261
1262 static struct dst_entry *ip6_route_redirect(struct net *net,
1263                                         const struct flowi6 *fl6,
1264                                         const struct in6_addr *gateway)
1265 {
1266         int flags = RT6_LOOKUP_F_HAS_SADDR;
1267         struct ip6rd_flowi rdfl;
1268
1269         rdfl.fl6 = *fl6;
1270         rdfl.gateway = *gateway;
1271
1272         return fib6_rule_lookup(net, &rdfl.fl6,
1273                                 flags, __ip6_route_redirect);
1274 }
1275
1276 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1277 {
1278         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1279         struct dst_entry *dst;
1280         struct flowi6 fl6;
1281
1282         memset(&fl6, 0, sizeof(fl6));
1283         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1284         fl6.flowi6_oif = oif;
1285         fl6.flowi6_mark = mark;
1286         fl6.daddr = iph->daddr;
1287         fl6.saddr = iph->saddr;
1288         fl6.flowlabel = ip6_flowinfo(iph);
1289
1290         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1291         rt6_do_redirect(dst, NULL, skb);
1292         dst_release(dst);
1293 }
1294 EXPORT_SYMBOL_GPL(ip6_redirect);
1295
1296 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1297                             u32 mark)
1298 {
1299         const struct ipv6hdr *iph = ipv6_hdr(skb);
1300         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1301         struct dst_entry *dst;
1302         struct flowi6 fl6;
1303
1304         memset(&fl6, 0, sizeof(fl6));
1305         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1306         fl6.flowi6_oif = oif;
1307         fl6.flowi6_mark = mark;
1308         fl6.daddr = msg->dest;
1309         fl6.saddr = iph->daddr;
1310
1311         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1312         rt6_do_redirect(dst, NULL, skb);
1313         dst_release(dst);
1314 }
1315
1316 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1317 {
1318         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1319 }
1320 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1321
1322 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1323 {
1324         struct net_device *dev = dst->dev;
1325         unsigned int mtu = dst_mtu(dst);
1326         struct net *net = dev_net(dev);
1327
1328         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1329
1330         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1331                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1332
1333         /*
1334          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1335          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1336          * IPV6_MAXPLEN is also valid and means: "any MSS,
1337          * rely only on pmtu discovery"
1338          */
1339         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1340                 mtu = IPV6_MAXPLEN;
1341         return mtu;
1342 }
1343
1344 static unsigned int ip6_mtu(const struct dst_entry *dst)
1345 {
1346         const struct rt6_info *rt = (const struct rt6_info *)dst;
1347         unsigned int mtu = rt->rt6i_pmtu;
1348         struct inet6_dev *idev;
1349
1350         if (mtu)
1351                 goto out;
1352
1353         mtu = dst_metric_raw(dst, RTAX_MTU);
1354         if (mtu)
1355                 goto out;
1356
1357         mtu = IPV6_MIN_MTU;
1358
1359         rcu_read_lock();
1360         idev = __in6_dev_get(dst->dev);
1361         if (idev)
1362                 mtu = idev->cnf.mtu6;
1363         rcu_read_unlock();
1364
1365 out:
1366         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1367 }
1368
1369 static struct dst_entry *icmp6_dst_gc_list;
1370 static DEFINE_SPINLOCK(icmp6_dst_lock);
1371
1372 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1373                                   struct flowi6 *fl6)
1374 {
1375         struct dst_entry *dst;
1376         struct rt6_info *rt;
1377         struct inet6_dev *idev = in6_dev_get(dev);
1378         struct net *net = dev_net(dev);
1379
1380         if (unlikely(!idev))
1381                 return ERR_PTR(-ENODEV);
1382
1383         rt = ip6_dst_alloc(net, dev, 0, NULL);
1384         if (unlikely(!rt)) {
1385                 in6_dev_put(idev);
1386                 dst = ERR_PTR(-ENOMEM);
1387                 goto out;
1388         }
1389
1390         rt->dst.flags |= DST_HOST;
1391         rt->dst.output  = ip6_output;
1392         atomic_set(&rt->dst.__refcnt, 1);
1393         rt->rt6i_gateway  = fl6->daddr;
1394         rt->rt6i_dst.addr = fl6->daddr;
1395         rt->rt6i_dst.plen = 128;
1396         rt->rt6i_idev     = idev;
1397         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1398
1399         spin_lock_bh(&icmp6_dst_lock);
1400         rt->dst.next = icmp6_dst_gc_list;
1401         icmp6_dst_gc_list = &rt->dst;
1402         spin_unlock_bh(&icmp6_dst_lock);
1403
1404         fib6_force_start_gc(net);
1405
1406         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1407
1408 out:
1409         return dst;
1410 }
1411
1412 int icmp6_dst_gc(void)
1413 {
1414         struct dst_entry *dst, **pprev;
1415         int more = 0;
1416
1417         spin_lock_bh(&icmp6_dst_lock);
1418         pprev = &icmp6_dst_gc_list;
1419
1420         while ((dst = *pprev) != NULL) {
1421                 if (!atomic_read(&dst->__refcnt)) {
1422                         *pprev = dst->next;
1423                         dst_free(dst);
1424                 } else {
1425                         pprev = &dst->next;
1426                         ++more;
1427                 }
1428         }
1429
1430         spin_unlock_bh(&icmp6_dst_lock);
1431
1432         return more;
1433 }
1434
1435 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1436                             void *arg)
1437 {
1438         struct dst_entry *dst, **pprev;
1439
1440         spin_lock_bh(&icmp6_dst_lock);
1441         pprev = &icmp6_dst_gc_list;
1442         while ((dst = *pprev) != NULL) {
1443                 struct rt6_info *rt = (struct rt6_info *) dst;
1444                 if (func(rt, arg)) {
1445                         *pprev = dst->next;
1446                         dst_free(dst);
1447                 } else {
1448                         pprev = &dst->next;
1449                 }
1450         }
1451         spin_unlock_bh(&icmp6_dst_lock);
1452 }
1453
1454 static int ip6_dst_gc(struct dst_ops *ops)
1455 {
1456         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1457         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1458         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1459         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1460         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1461         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1462         int entries;
1463
1464         entries = dst_entries_get_fast(ops);
1465         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1466             entries <= rt_max_size)
1467                 goto out;
1468
1469         net->ipv6.ip6_rt_gc_expire++;
1470         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1471         entries = dst_entries_get_slow(ops);
1472         if (entries < ops->gc_thresh)
1473                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1474 out:
1475         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1476         return entries > rt_max_size;
1477 }
1478
1479 static int ip6_convert_metrics(struct mx6_config *mxc,
1480                                const struct fib6_config *cfg)
1481 {
1482         struct nlattr *nla;
1483         int remaining;
1484         u32 *mp;
1485
1486         if (!cfg->fc_mx)
1487                 return 0;
1488
1489         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1490         if (unlikely(!mp))
1491                 return -ENOMEM;
1492
1493         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1494                 int type = nla_type(nla);
1495
1496                 if (type) {
1497                         u32 val;
1498
1499                         if (unlikely(type > RTAX_MAX))
1500                                 goto err;
1501                         if (type == RTAX_CC_ALGO) {
1502                                 char tmp[TCP_CA_NAME_MAX];
1503
1504                                 nla_strlcpy(tmp, nla, sizeof(tmp));
1505                                 val = tcp_ca_get_key_by_name(tmp);
1506                                 if (val == TCP_CA_UNSPEC)
1507                                         goto err;
1508                         } else {
1509                                 val = nla_get_u32(nla);
1510                         }
1511
1512                         mp[type - 1] = val;
1513                         __set_bit(type - 1, mxc->mx_valid);
1514                 }
1515         }
1516
1517         mxc->mx = mp;
1518
1519         return 0;
1520  err:
1521         kfree(mp);
1522         return -EINVAL;
1523 }
1524
1525 int ip6_route_add(struct fib6_config *cfg)
1526 {
1527         int err;
1528         struct net *net = cfg->fc_nlinfo.nl_net;
1529         struct rt6_info *rt = NULL;
1530         struct net_device *dev = NULL;
1531         struct inet6_dev *idev = NULL;
1532         struct fib6_table *table;
1533         struct mx6_config mxc = { .mx = NULL, };
1534         int addr_type;
1535
1536         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1537                 return -EINVAL;
1538 #ifndef CONFIG_IPV6_SUBTREES
1539         if (cfg->fc_src_len)
1540                 return -EINVAL;
1541 #endif
1542         if (cfg->fc_ifindex) {
1543                 err = -ENODEV;
1544                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1545                 if (!dev)
1546                         goto out;
1547                 idev = in6_dev_get(dev);
1548                 if (!idev)
1549                         goto out;
1550         }
1551
1552         if (cfg->fc_metric == 0)
1553                 cfg->fc_metric = IP6_RT_PRIO_USER;
1554
1555         err = -ENOBUFS;
1556         if (cfg->fc_nlinfo.nlh &&
1557             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1558                 table = fib6_get_table(net, cfg->fc_table);
1559                 if (!table) {
1560                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1561                         table = fib6_new_table(net, cfg->fc_table);
1562                 }
1563         } else {
1564                 table = fib6_new_table(net, cfg->fc_table);
1565         }
1566
1567         if (!table)
1568                 goto out;
1569
1570         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1571
1572         if (!rt) {
1573                 err = -ENOMEM;
1574                 goto out;
1575         }
1576
1577         if (cfg->fc_flags & RTF_EXPIRES)
1578                 rt6_set_expires(rt, jiffies +
1579                                 clock_t_to_jiffies(cfg->fc_expires));
1580         else
1581                 rt6_clean_expires(rt);
1582
1583         if (cfg->fc_protocol == RTPROT_UNSPEC)
1584                 cfg->fc_protocol = RTPROT_BOOT;
1585         rt->rt6i_protocol = cfg->fc_protocol;
1586
1587         addr_type = ipv6_addr_type(&cfg->fc_dst);
1588
1589         if (addr_type & IPV6_ADDR_MULTICAST)
1590                 rt->dst.input = ip6_mc_input;
1591         else if (cfg->fc_flags & RTF_LOCAL)
1592                 rt->dst.input = ip6_input;
1593         else
1594                 rt->dst.input = ip6_forward;
1595
1596         rt->dst.output = ip6_output;
1597
1598         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1599         rt->rt6i_dst.plen = cfg->fc_dst_len;
1600         if (rt->rt6i_dst.plen == 128) {
1601                 rt->dst.flags |= DST_HOST;
1602                 dst_metrics_set_force_overwrite(&rt->dst);
1603         }
1604
1605 #ifdef CONFIG_IPV6_SUBTREES
1606         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1607         rt->rt6i_src.plen = cfg->fc_src_len;
1608 #endif
1609
1610         rt->rt6i_metric = cfg->fc_metric;
1611
1612         /* We cannot add true routes via loopback here,
1613            they would result in kernel looping; promote them to reject routes
1614          */
1615         if ((cfg->fc_flags & RTF_REJECT) ||
1616             (dev && (dev->flags & IFF_LOOPBACK) &&
1617              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1618              !(cfg->fc_flags & RTF_LOCAL))) {
1619                 /* hold loopback dev/idev if we haven't done so. */
1620                 if (dev != net->loopback_dev) {
1621                         if (dev) {
1622                                 dev_put(dev);
1623                                 in6_dev_put(idev);
1624                         }
1625                         dev = net->loopback_dev;
1626                         dev_hold(dev);
1627                         idev = in6_dev_get(dev);
1628                         if (!idev) {
1629                                 err = -ENODEV;
1630                                 goto out;
1631                         }
1632                 }
1633                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1634                 switch (cfg->fc_type) {
1635                 case RTN_BLACKHOLE:
1636                         rt->dst.error = -EINVAL;
1637                         rt->dst.output = dst_discard_sk;
1638                         rt->dst.input = dst_discard;
1639                         break;
1640                 case RTN_PROHIBIT:
1641                         rt->dst.error = -EACCES;
1642                         rt->dst.output = ip6_pkt_prohibit_out;
1643                         rt->dst.input = ip6_pkt_prohibit;
1644                         break;
1645                 case RTN_THROW:
1646                 default:
1647                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1648                                         : -ENETUNREACH;
1649                         rt->dst.output = ip6_pkt_discard_out;
1650                         rt->dst.input = ip6_pkt_discard;
1651                         break;
1652                 }
1653                 goto install_route;
1654         }
1655
1656         if (cfg->fc_flags & RTF_GATEWAY) {
1657                 const struct in6_addr *gw_addr;
1658                 int gwa_type;
1659
1660                 gw_addr = &cfg->fc_gateway;
1661                 rt->rt6i_gateway = *gw_addr;
1662                 gwa_type = ipv6_addr_type(gw_addr);
1663
1664                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1665                         struct rt6_info *grt;
1666
1667                         /* IPv6 strictly inhibits using not link-local
1668                            addresses as nexthop address.
1669                            Otherwise, router will not able to send redirects.
1670                            It is very good, but in some (rare!) circumstances
1671                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1672                            some exceptions. --ANK
1673                          */
1674                         err = -EINVAL;
1675                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1676                                 goto out;
1677
1678                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1679
1680                         err = -EHOSTUNREACH;
1681                         if (!grt)
1682                                 goto out;
1683                         if (dev) {
1684                                 if (dev != grt->dst.dev) {
1685                                         ip6_rt_put(grt);
1686                                         goto out;
1687                                 }
1688                         } else {
1689                                 dev = grt->dst.dev;
1690                                 idev = grt->rt6i_idev;
1691                                 dev_hold(dev);
1692                                 in6_dev_hold(grt->rt6i_idev);
1693                         }
1694                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1695                                 err = 0;
1696                         ip6_rt_put(grt);
1697
1698                         if (err)
1699                                 goto out;
1700                 }
1701                 err = -EINVAL;
1702                 if (!dev || (dev->flags & IFF_LOOPBACK))
1703                         goto out;
1704         }
1705
1706         err = -ENODEV;
1707         if (!dev)
1708                 goto out;
1709
1710         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1711                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1712                         err = -EINVAL;
1713                         goto out;
1714                 }
1715                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1716                 rt->rt6i_prefsrc.plen = 128;
1717         } else
1718                 rt->rt6i_prefsrc.plen = 0;
1719
1720         rt->rt6i_flags = cfg->fc_flags;
1721
1722 install_route:
1723         rt->dst.dev = dev;
1724         rt->rt6i_idev = idev;
1725         rt->rt6i_table = table;
1726
1727         cfg->fc_nlinfo.nl_net = dev_net(dev);
1728
1729         err = ip6_convert_metrics(&mxc, cfg);
1730         if (err)
1731                 goto out;
1732
1733         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1734
1735         kfree(mxc.mx);
1736         return err;
1737 out:
1738         if (dev)
1739                 dev_put(dev);
1740         if (idev)
1741                 in6_dev_put(idev);
1742         if (rt)
1743                 dst_free(&rt->dst);
1744         return err;
1745 }
1746
1747 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1748 {
1749         int err;
1750         struct fib6_table *table;
1751         struct net *net = dev_net(rt->dst.dev);
1752
1753         if (rt == net->ipv6.ip6_null_entry) {
1754                 err = -ENOENT;
1755                 goto out;
1756         }
1757
1758         table = rt->rt6i_table;
1759         write_lock_bh(&table->tb6_lock);
1760         err = fib6_del(rt, info);
1761         write_unlock_bh(&table->tb6_lock);
1762
1763 out:
1764         ip6_rt_put(rt);
1765         return err;
1766 }
1767
1768 int ip6_del_rt(struct rt6_info *rt)
1769 {
1770         struct nl_info info = {
1771                 .nl_net = dev_net(rt->dst.dev),
1772         };
1773         return __ip6_del_rt(rt, &info);
1774 }
1775
1776 static int ip6_route_del(struct fib6_config *cfg)
1777 {
1778         struct fib6_table *table;
1779         struct fib6_node *fn;
1780         struct rt6_info *rt;
1781         int err = -ESRCH;
1782
1783         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1784         if (!table)
1785                 return err;
1786
1787         read_lock_bh(&table->tb6_lock);
1788
1789         fn = fib6_locate(&table->tb6_root,
1790                          &cfg->fc_dst, cfg->fc_dst_len,
1791                          &cfg->fc_src, cfg->fc_src_len);
1792
1793         if (fn) {
1794                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1795                         if ((rt->rt6i_flags & RTF_CACHE) &&
1796                             !(cfg->fc_flags & RTF_CACHE))
1797                                 continue;
1798                         if (cfg->fc_ifindex &&
1799                             (!rt->dst.dev ||
1800                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1801                                 continue;
1802                         if (cfg->fc_flags & RTF_GATEWAY &&
1803                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1804                                 continue;
1805                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1806                                 continue;
1807                         dst_hold(&rt->dst);
1808                         read_unlock_bh(&table->tb6_lock);
1809
1810                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1811                 }
1812         }
1813         read_unlock_bh(&table->tb6_lock);
1814
1815         return err;
1816 }
1817
1818 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1819 {
1820         struct net *net = dev_net(skb->dev);
1821         struct netevent_redirect netevent;
1822         struct rt6_info *rt, *nrt = NULL;
1823         struct ndisc_options ndopts;
1824         struct inet6_dev *in6_dev;
1825         struct neighbour *neigh;
1826         struct rd_msg *msg;
1827         int optlen, on_link;
1828         u8 *lladdr;
1829
1830         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1831         optlen -= sizeof(*msg);
1832
1833         if (optlen < 0) {
1834                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1835                 return;
1836         }
1837
1838         msg = (struct rd_msg *)icmp6_hdr(skb);
1839
1840         if (ipv6_addr_is_multicast(&msg->dest)) {
1841                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1842                 return;
1843         }
1844
1845         on_link = 0;
1846         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1847                 on_link = 1;
1848         } else if (ipv6_addr_type(&msg->target) !=
1849                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1850                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1851                 return;
1852         }
1853
1854         in6_dev = __in6_dev_get(skb->dev);
1855         if (!in6_dev)
1856                 return;
1857         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1858                 return;
1859
1860         /* RFC2461 8.1:
1861          *      The IP source address of the Redirect MUST be the same as the current
1862          *      first-hop router for the specified ICMP Destination Address.
1863          */
1864
1865         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1866                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1867                 return;
1868         }
1869
1870         lladdr = NULL;
1871         if (ndopts.nd_opts_tgt_lladdr) {
1872                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1873                                              skb->dev);
1874                 if (!lladdr) {
1875                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1876                         return;
1877                 }
1878         }
1879
1880         rt = (struct rt6_info *) dst;
1881         if (rt == net->ipv6.ip6_null_entry) {
1882                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1883                 return;
1884         }
1885
1886         /* Redirect received -> path was valid.
1887          * Look, redirects are sent only in response to data packets,
1888          * so that this nexthop apparently is reachable. --ANK
1889          */
1890         dst_confirm(&rt->dst);
1891
1892         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1893         if (!neigh)
1894                 return;
1895
1896         /*
1897          *      We have finally decided to accept it.
1898          */
1899
1900         neigh_update(neigh, lladdr, NUD_STALE,
1901                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1902                      NEIGH_UPDATE_F_OVERRIDE|
1903                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1904                                      NEIGH_UPDATE_F_ISROUTER))
1905                      );
1906
1907         nrt = ip6_rt_copy(rt, &msg->dest);
1908         if (!nrt)
1909                 goto out;
1910
1911         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1912         if (on_link)
1913                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1914
1915         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1916
1917         if (ip6_ins_rt(nrt))
1918                 goto out;
1919
1920         netevent.old = &rt->dst;
1921         netevent.new = &nrt->dst;
1922         netevent.daddr = &msg->dest;
1923         netevent.neigh = neigh;
1924         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1925
1926         if (rt->rt6i_flags & RTF_CACHE) {
1927                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1928                 ip6_del_rt(rt);
1929         }
1930
1931 out:
1932         neigh_release(neigh);
1933 }
1934
1935 /*
1936  *      Misc support functions
1937  */
1938
1939 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1940 {
1941         BUG_ON(from->dst.from);
1942
1943         rt->rt6i_flags &= ~RTF_EXPIRES;
1944         dst_hold(&from->dst);
1945         rt->dst.from = &from->dst;
1946         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1947 }
1948
1949 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1950                                     const struct in6_addr *dest)
1951 {
1952         struct net *net = dev_net(ort->dst.dev);
1953         struct rt6_info *rt;
1954
1955         if (ort->rt6i_flags & RTF_CACHE)
1956                 ort = (struct rt6_info *)ort->dst.from;
1957
1958         rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1959                            ort->rt6i_table);
1960
1961         if (rt) {
1962                 rt->dst.input = ort->dst.input;
1963                 rt->dst.output = ort->dst.output;
1964                 rt->dst.flags |= DST_HOST;
1965
1966                 rt->rt6i_dst.addr = *dest;
1967                 rt->rt6i_dst.plen = 128;
1968                 rt->dst.error = ort->dst.error;
1969                 rt->rt6i_idev = ort->rt6i_idev;
1970                 if (rt->rt6i_idev)
1971                         in6_dev_hold(rt->rt6i_idev);
1972                 rt->dst.lastuse = jiffies;
1973
1974                 if (ort->rt6i_flags & RTF_GATEWAY)
1975                         rt->rt6i_gateway = ort->rt6i_gateway;
1976                 else
1977                         rt->rt6i_gateway = *dest;
1978                 rt->rt6i_flags = ort->rt6i_flags;
1979                 rt6_set_from(rt, ort);
1980                 rt->rt6i_metric = 0;
1981
1982 #ifdef CONFIG_IPV6_SUBTREES
1983                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1984 #endif
1985                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1986                 rt->rt6i_table = ort->rt6i_table;
1987         }
1988         return rt;
1989 }
1990
1991 #ifdef CONFIG_IPV6_ROUTE_INFO
1992 static struct rt6_info *rt6_get_route_info(struct net *net,
1993                                            const struct in6_addr *prefix, int prefixlen,
1994                                            const struct in6_addr *gwaddr, int ifindex)
1995 {
1996         struct fib6_node *fn;
1997         struct rt6_info *rt = NULL;
1998         struct fib6_table *table;
1999
2000         table = fib6_get_table(net, RT6_TABLE_INFO);
2001         if (!table)
2002                 return NULL;
2003
2004         read_lock_bh(&table->tb6_lock);
2005         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2006         if (!fn)
2007                 goto out;
2008
2009         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2010                 if (rt->dst.dev->ifindex != ifindex)
2011                         continue;
2012                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2013                         continue;
2014                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2015                         continue;
2016                 dst_hold(&rt->dst);
2017                 break;
2018         }
2019 out:
2020         read_unlock_bh(&table->tb6_lock);
2021         return rt;
2022 }
2023
2024 static struct rt6_info *rt6_add_route_info(struct net *net,
2025                                            const struct in6_addr *prefix, int prefixlen,
2026                                            const struct in6_addr *gwaddr, int ifindex,
2027                                            unsigned int pref)
2028 {
2029         struct fib6_config cfg = {
2030                 .fc_table       = RT6_TABLE_INFO,
2031                 .fc_metric      = IP6_RT_PRIO_USER,
2032                 .fc_ifindex     = ifindex,
2033                 .fc_dst_len     = prefixlen,
2034                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2035                                   RTF_UP | RTF_PREF(pref),
2036                 .fc_nlinfo.portid = 0,
2037                 .fc_nlinfo.nlh = NULL,
2038                 .fc_nlinfo.nl_net = net,
2039         };
2040
2041         cfg.fc_dst = *prefix;
2042         cfg.fc_gateway = *gwaddr;
2043
2044         /* We should treat it as a default route if prefix length is 0. */
2045         if (!prefixlen)
2046                 cfg.fc_flags |= RTF_DEFAULT;
2047
2048         ip6_route_add(&cfg);
2049
2050         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2051 }
2052 #endif
2053
2054 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2055 {
2056         struct rt6_info *rt;
2057         struct fib6_table *table;
2058
2059         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2060         if (!table)
2061                 return NULL;
2062
2063         read_lock_bh(&table->tb6_lock);
2064         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2065                 if (dev == rt->dst.dev &&
2066                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2067                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2068                         break;
2069         }
2070         if (rt)
2071                 dst_hold(&rt->dst);
2072         read_unlock_bh(&table->tb6_lock);
2073         return rt;
2074 }
2075
2076 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2077                                      struct net_device *dev,
2078                                      unsigned int pref)
2079 {
2080         struct fib6_config cfg = {
2081                 .fc_table       = RT6_TABLE_DFLT,
2082                 .fc_metric      = IP6_RT_PRIO_USER,
2083                 .fc_ifindex     = dev->ifindex,
2084                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2085                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2086                 .fc_nlinfo.portid = 0,
2087                 .fc_nlinfo.nlh = NULL,
2088                 .fc_nlinfo.nl_net = dev_net(dev),
2089         };
2090
2091         cfg.fc_gateway = *gwaddr;
2092
2093         ip6_route_add(&cfg);
2094
2095         return rt6_get_dflt_router(gwaddr, dev);
2096 }
2097
2098 void rt6_purge_dflt_routers(struct net *net)
2099 {
2100         struct rt6_info *rt;
2101         struct fib6_table *table;
2102
2103         /* NOTE: Keep consistent with rt6_get_dflt_router */
2104         table = fib6_get_table(net, RT6_TABLE_DFLT);
2105         if (!table)
2106                 return;
2107
2108 restart:
2109         read_lock_bh(&table->tb6_lock);
2110         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2111                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2112                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2113                         dst_hold(&rt->dst);
2114                         read_unlock_bh(&table->tb6_lock);
2115                         ip6_del_rt(rt);
2116                         goto restart;
2117                 }
2118         }
2119         read_unlock_bh(&table->tb6_lock);
2120 }
2121
2122 static void rtmsg_to_fib6_config(struct net *net,
2123                                  struct in6_rtmsg *rtmsg,
2124                                  struct fib6_config *cfg)
2125 {
2126         memset(cfg, 0, sizeof(*cfg));
2127
2128         cfg->fc_table = RT6_TABLE_MAIN;
2129         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2130         cfg->fc_metric = rtmsg->rtmsg_metric;
2131         cfg->fc_expires = rtmsg->rtmsg_info;
2132         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2133         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2134         cfg->fc_flags = rtmsg->rtmsg_flags;
2135
2136         cfg->fc_nlinfo.nl_net = net;
2137
2138         cfg->fc_dst = rtmsg->rtmsg_dst;
2139         cfg->fc_src = rtmsg->rtmsg_src;
2140         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2141 }
2142
2143 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2144 {
2145         struct fib6_config cfg;
2146         struct in6_rtmsg rtmsg;
2147         int err;
2148
2149         switch (cmd) {
2150         case SIOCADDRT:         /* Add a route */
2151         case SIOCDELRT:         /* Delete a route */
2152                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2153                         return -EPERM;
2154                 err = copy_from_user(&rtmsg, arg,
2155                                      sizeof(struct in6_rtmsg));
2156                 if (err)
2157                         return -EFAULT;
2158
2159                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2160
2161                 rtnl_lock();
2162                 switch (cmd) {
2163                 case SIOCADDRT:
2164                         err = ip6_route_add(&cfg);
2165                         break;
2166                 case SIOCDELRT:
2167                         err = ip6_route_del(&cfg);
2168                         break;
2169                 default:
2170                         err = -EINVAL;
2171                 }
2172                 rtnl_unlock();
2173
2174                 return err;
2175         }
2176
2177         return -EINVAL;
2178 }
2179
2180 /*
2181  *      Drop the packet on the floor
2182  */
2183
2184 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2185 {
2186         int type;
2187         struct dst_entry *dst = skb_dst(skb);
2188         switch (ipstats_mib_noroutes) {
2189         case IPSTATS_MIB_INNOROUTES:
2190                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2191                 if (type == IPV6_ADDR_ANY) {
2192                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2193                                       IPSTATS_MIB_INADDRERRORS);
2194                         break;
2195                 }
2196                 /* FALLTHROUGH */
2197         case IPSTATS_MIB_OUTNOROUTES:
2198                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2199                               ipstats_mib_noroutes);
2200                 break;
2201         }
2202         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2203         kfree_skb(skb);
2204         return 0;
2205 }
2206
2207 static int ip6_pkt_discard(struct sk_buff *skb)
2208 {
2209         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2210 }
2211
2212 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2213 {
2214         skb->dev = skb_dst(skb)->dev;
2215         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2216 }
2217
2218 static int ip6_pkt_prohibit(struct sk_buff *skb)
2219 {
2220         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2221 }
2222
2223 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2224 {
2225         skb->dev = skb_dst(skb)->dev;
2226         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2227 }
2228
2229 /*
2230  *      Allocate a dst for local (unicast / anycast) address.
2231  */
2232
2233 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2234                                     const struct in6_addr *addr,
2235                                     bool anycast)
2236 {
2237         struct net *net = dev_net(idev->dev);
2238         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2239                                             DST_NOCOUNT, NULL);
2240         if (!rt)
2241                 return ERR_PTR(-ENOMEM);
2242
2243         in6_dev_hold(idev);
2244
2245         rt->dst.flags |= DST_HOST;
2246         rt->dst.input = ip6_input;
2247         rt->dst.output = ip6_output;
2248         rt->rt6i_idev = idev;
2249
2250         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2251         if (anycast)
2252                 rt->rt6i_flags |= RTF_ANYCAST;
2253         else
2254                 rt->rt6i_flags |= RTF_LOCAL;
2255
2256         rt->rt6i_gateway  = *addr;
2257         rt->rt6i_dst.addr = *addr;
2258         rt->rt6i_dst.plen = 128;
2259         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2260
2261         atomic_set(&rt->dst.__refcnt, 1);
2262
2263         return rt;
2264 }
2265
2266 int ip6_route_get_saddr(struct net *net,
2267                         struct rt6_info *rt,
2268                         const struct in6_addr *daddr,
2269                         unsigned int prefs,
2270                         struct in6_addr *saddr)
2271 {
2272         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2273         int err = 0;
2274         if (rt->rt6i_prefsrc.plen)
2275                 *saddr = rt->rt6i_prefsrc.addr;
2276         else
2277                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2278                                          daddr, prefs, saddr);
2279         return err;
2280 }
2281
2282 /* remove deleted ip from prefsrc entries */
2283 struct arg_dev_net_ip {
2284         struct net_device *dev;
2285         struct net *net;
2286         struct in6_addr *addr;
2287 };
2288
2289 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2290 {
2291         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2292         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2293         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2294
2295         if (((void *)rt->dst.dev == dev || !dev) &&
2296             rt != net->ipv6.ip6_null_entry &&
2297             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2298                 /* remove prefsrc entry */
2299                 rt->rt6i_prefsrc.plen = 0;
2300         }
2301         return 0;
2302 }
2303
2304 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2305 {
2306         struct net *net = dev_net(ifp->idev->dev);
2307         struct arg_dev_net_ip adni = {
2308                 .dev = ifp->idev->dev,
2309                 .net = net,
2310                 .addr = &ifp->addr,
2311         };
2312         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2313 }
2314
2315 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2316 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2317
2318 /* Remove routers and update dst entries when gateway turn into host. */
2319 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2320 {
2321         struct in6_addr *gateway = (struct in6_addr *)arg;
2322
2323         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2324              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2325              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2326                 return -1;
2327         }
2328         return 0;
2329 }
2330
2331 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2332 {
2333         fib6_clean_all(net, fib6_clean_tohost, gateway);
2334 }
2335
2336 struct arg_dev_net {
2337         struct net_device *dev;
2338         struct net *net;
2339 };
2340
2341 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2342 {
2343         const struct arg_dev_net *adn = arg;
2344         const struct net_device *dev = adn->dev;
2345
2346         if ((rt->dst.dev == dev || !dev) &&
2347             rt != adn->net->ipv6.ip6_null_entry)
2348                 return -1;
2349
2350         return 0;
2351 }
2352
2353 void rt6_ifdown(struct net *net, struct net_device *dev)
2354 {
2355         struct arg_dev_net adn = {
2356                 .dev = dev,
2357                 .net = net,
2358         };
2359
2360         fib6_clean_all(net, fib6_ifdown, &adn);
2361         icmp6_clean_all(fib6_ifdown, &adn);
2362 }
2363
2364 struct rt6_mtu_change_arg {
2365         struct net_device *dev;
2366         unsigned int mtu;
2367 };
2368
2369 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2370 {
2371         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2372         struct inet6_dev *idev;
2373
2374         /* In IPv6 pmtu discovery is not optional,
2375            so that RTAX_MTU lock cannot disable it.
2376            We still use this lock to block changes
2377            caused by addrconf/ndisc.
2378         */
2379
2380         idev = __in6_dev_get(arg->dev);
2381         if (!idev)
2382                 return 0;
2383
2384         /* For administrative MTU increase, there is no way to discover
2385            IPv6 PMTU increase, so PMTU increase should be updated here.
2386            Since RFC 1981 doesn't include administrative MTU increase
2387            update PMTU increase is a MUST. (i.e. jumbo frame)
2388          */
2389         /*
2390            If new MTU is less than route PMTU, this new MTU will be the
2391            lowest MTU in the path, update the route PMTU to reflect PMTU
2392            decreases; if new MTU is greater than route PMTU, and the
2393            old MTU is the lowest MTU in the path, update the route PMTU
2394            to reflect the increase. In this case if the other nodes' MTU
2395            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2396            PMTU discouvery.
2397          */
2398         if (rt->dst.dev == arg->dev &&
2399             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2400                 if (rt->rt6i_flags & RTF_CACHE) {
2401                         /* For RTF_CACHE with rt6i_pmtu == 0
2402                          * (i.e. a redirected route),
2403                          * the metrics of its rt->dst.from has already
2404                          * been updated.
2405                          */
2406                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2407                                 rt->rt6i_pmtu = arg->mtu;
2408                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2409                            (dst_mtu(&rt->dst) < arg->mtu &&
2410                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2411                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2412                 }
2413         }
2414         return 0;
2415 }
2416
2417 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2418 {
2419         struct rt6_mtu_change_arg arg = {
2420                 .dev = dev,
2421                 .mtu = mtu,
2422         };
2423
2424         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2425 }
2426
2427 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2428         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2429         [RTA_OIF]               = { .type = NLA_U32 },
2430         [RTA_IIF]               = { .type = NLA_U32 },
2431         [RTA_PRIORITY]          = { .type = NLA_U32 },
2432         [RTA_METRICS]           = { .type = NLA_NESTED },
2433         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2434         [RTA_PREF]              = { .type = NLA_U8 },
2435 };
2436
2437 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2438                               struct fib6_config *cfg)
2439 {
2440         struct rtmsg *rtm;
2441         struct nlattr *tb[RTA_MAX+1];
2442         unsigned int pref;
2443         int err;
2444
2445         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2446         if (err < 0)
2447                 goto errout;
2448
2449         err = -EINVAL;
2450         rtm = nlmsg_data(nlh);
2451         memset(cfg, 0, sizeof(*cfg));
2452
2453         cfg->fc_table = rtm->rtm_table;
2454         cfg->fc_dst_len = rtm->rtm_dst_len;
2455         cfg->fc_src_len = rtm->rtm_src_len;
2456         cfg->fc_flags = RTF_UP;
2457         cfg->fc_protocol = rtm->rtm_protocol;
2458         cfg->fc_type = rtm->rtm_type;
2459
2460         if (rtm->rtm_type == RTN_UNREACHABLE ||
2461             rtm->rtm_type == RTN_BLACKHOLE ||
2462             rtm->rtm_type == RTN_PROHIBIT ||
2463             rtm->rtm_type == RTN_THROW)
2464                 cfg->fc_flags |= RTF_REJECT;
2465
2466         if (rtm->rtm_type == RTN_LOCAL)
2467                 cfg->fc_flags |= RTF_LOCAL;
2468
2469         if (rtm->rtm_flags & RTM_F_CLONED)
2470                 cfg->fc_flags |= RTF_CACHE;
2471
2472         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2473         cfg->fc_nlinfo.nlh = nlh;
2474         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2475
2476         if (tb[RTA_GATEWAY]) {
2477                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2478                 cfg->fc_flags |= RTF_GATEWAY;
2479         }
2480
2481         if (tb[RTA_DST]) {
2482                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2483
2484                 if (nla_len(tb[RTA_DST]) < plen)
2485                         goto errout;
2486
2487                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2488         }
2489
2490         if (tb[RTA_SRC]) {
2491                 int plen = (rtm->rtm_src_len + 7) >> 3;
2492
2493                 if (nla_len(tb[RTA_SRC]) < plen)
2494                         goto errout;
2495
2496                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2497         }
2498
2499         if (tb[RTA_PREFSRC])
2500                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2501
2502         if (tb[RTA_OIF])
2503                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2504
2505         if (tb[RTA_PRIORITY])
2506                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2507
2508         if (tb[RTA_METRICS]) {
2509                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2510                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2511         }
2512
2513         if (tb[RTA_TABLE])
2514                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2515
2516         if (tb[RTA_MULTIPATH]) {
2517                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2518                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2519         }
2520
2521         if (tb[RTA_PREF]) {
2522                 pref = nla_get_u8(tb[RTA_PREF]);
2523                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2524                     pref != ICMPV6_ROUTER_PREF_HIGH)
2525                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2526                 cfg->fc_flags |= RTF_PREF(pref);
2527         }
2528
2529         err = 0;
2530 errout:
2531         return err;
2532 }
2533
2534 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2535 {
2536         struct fib6_config r_cfg;
2537         struct rtnexthop *rtnh;
2538         int remaining;
2539         int attrlen;
2540         int err = 0, last_err = 0;
2541
2542 beginning:
2543         rtnh = (struct rtnexthop *)cfg->fc_mp;
2544         remaining = cfg->fc_mp_len;
2545
2546         /* Parse a Multipath Entry */
2547         while (rtnh_ok(rtnh, remaining)) {
2548                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2549                 if (rtnh->rtnh_ifindex)
2550                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2551
2552                 attrlen = rtnh_attrlen(rtnh);
2553                 if (attrlen > 0) {
2554                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2555
2556                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2557                         if (nla) {
2558                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2559                                 r_cfg.fc_flags |= RTF_GATEWAY;
2560                         }
2561                 }
2562                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2563                 if (err) {
2564                         last_err = err;
2565                         /* If we are trying to remove a route, do not stop the
2566                          * loop when ip6_route_del() fails (because next hop is
2567                          * already gone), we should try to remove all next hops.
2568                          */
2569                         if (add) {
2570                                 /* If add fails, we should try to delete all
2571                                  * next hops that have been already added.
2572                                  */
2573                                 add = 0;
2574                                 goto beginning;
2575                         }
2576                 }
2577                 /* Because each route is added like a single route we remove
2578                  * this flag after the first nexthop (if there is a collision,
2579                  * we have already fail to add the first nexthop:
2580                  * fib6_add_rt2node() has reject it).
2581                  */
2582                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2583                 rtnh = rtnh_next(rtnh, &remaining);
2584         }
2585
2586         return last_err;
2587 }
2588
2589 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2590 {
2591         struct fib6_config cfg;
2592         int err;
2593
2594         err = rtm_to_fib6_config(skb, nlh, &cfg);
2595         if (err < 0)
2596                 return err;
2597
2598         if (cfg.fc_mp)
2599                 return ip6_route_multipath(&cfg, 0);
2600         else
2601                 return ip6_route_del(&cfg);
2602 }
2603
2604 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2605 {
2606         struct fib6_config cfg;
2607         int err;
2608
2609         err = rtm_to_fib6_config(skb, nlh, &cfg);
2610         if (err < 0)
2611                 return err;
2612
2613         if (cfg.fc_mp)
2614                 return ip6_route_multipath(&cfg, 1);
2615         else
2616                 return ip6_route_add(&cfg);
2617 }
2618
2619 static inline size_t rt6_nlmsg_size(void)
2620 {
2621         return NLMSG_ALIGN(sizeof(struct rtmsg))
2622                + nla_total_size(16) /* RTA_SRC */
2623                + nla_total_size(16) /* RTA_DST */
2624                + nla_total_size(16) /* RTA_GATEWAY */
2625                + nla_total_size(16) /* RTA_PREFSRC */
2626                + nla_total_size(4) /* RTA_TABLE */
2627                + nla_total_size(4) /* RTA_IIF */
2628                + nla_total_size(4) /* RTA_OIF */
2629                + nla_total_size(4) /* RTA_PRIORITY */
2630                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2631                + nla_total_size(sizeof(struct rta_cacheinfo))
2632                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2633                + nla_total_size(1); /* RTA_PREF */
2634 }
2635
2636 static int rt6_fill_node(struct net *net,
2637                          struct sk_buff *skb, struct rt6_info *rt,
2638                          struct in6_addr *dst, struct in6_addr *src,
2639                          int iif, int type, u32 portid, u32 seq,
2640                          int prefix, int nowait, unsigned int flags)
2641 {
2642         u32 metrics[RTAX_MAX];
2643         struct rtmsg *rtm;
2644         struct nlmsghdr *nlh;
2645         long expires;
2646         u32 table;
2647
2648         if (prefix) {   /* user wants prefix routes only */
2649                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2650                         /* success since this is not a prefix route */
2651                         return 1;
2652                 }
2653         }
2654
2655         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2656         if (!nlh)
2657                 return -EMSGSIZE;
2658
2659         rtm = nlmsg_data(nlh);
2660         rtm->rtm_family = AF_INET6;
2661         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2662         rtm->rtm_src_len = rt->rt6i_src.plen;
2663         rtm->rtm_tos = 0;
2664         if (rt->rt6i_table)
2665                 table = rt->rt6i_table->tb6_id;
2666         else
2667                 table = RT6_TABLE_UNSPEC;
2668         rtm->rtm_table = table;
2669         if (nla_put_u32(skb, RTA_TABLE, table))
2670                 goto nla_put_failure;
2671         if (rt->rt6i_flags & RTF_REJECT) {
2672                 switch (rt->dst.error) {
2673                 case -EINVAL:
2674                         rtm->rtm_type = RTN_BLACKHOLE;
2675                         break;
2676                 case -EACCES:
2677                         rtm->rtm_type = RTN_PROHIBIT;
2678                         break;
2679                 case -EAGAIN:
2680                         rtm->rtm_type = RTN_THROW;
2681                         break;
2682                 default:
2683                         rtm->rtm_type = RTN_UNREACHABLE;
2684                         break;
2685                 }
2686         }
2687         else if (rt->rt6i_flags & RTF_LOCAL)
2688                 rtm->rtm_type = RTN_LOCAL;
2689         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2690                 rtm->rtm_type = RTN_LOCAL;
2691         else
2692                 rtm->rtm_type = RTN_UNICAST;
2693         rtm->rtm_flags = 0;
2694         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2695         rtm->rtm_protocol = rt->rt6i_protocol;
2696         if (rt->rt6i_flags & RTF_DYNAMIC)
2697                 rtm->rtm_protocol = RTPROT_REDIRECT;
2698         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2699                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2700                         rtm->rtm_protocol = RTPROT_RA;
2701                 else
2702                         rtm->rtm_protocol = RTPROT_KERNEL;
2703         }
2704
2705         if (rt->rt6i_flags & RTF_CACHE)
2706                 rtm->rtm_flags |= RTM_F_CLONED;
2707
2708         if (dst) {
2709                 if (nla_put_in6_addr(skb, RTA_DST, dst))
2710                         goto nla_put_failure;
2711                 rtm->rtm_dst_len = 128;
2712         } else if (rtm->rtm_dst_len)
2713                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2714                         goto nla_put_failure;
2715 #ifdef CONFIG_IPV6_SUBTREES
2716         if (src) {
2717                 if (nla_put_in6_addr(skb, RTA_SRC, src))
2718                         goto nla_put_failure;
2719                 rtm->rtm_src_len = 128;
2720         } else if (rtm->rtm_src_len &&
2721                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2722                 goto nla_put_failure;
2723 #endif
2724         if (iif) {
2725 #ifdef CONFIG_IPV6_MROUTE
2726                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2727                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2728                         if (err <= 0) {
2729                                 if (!nowait) {
2730                                         if (err == 0)
2731                                                 return 0;
2732                                         goto nla_put_failure;
2733                                 } else {
2734                                         if (err == -EMSGSIZE)
2735                                                 goto nla_put_failure;
2736                                 }
2737                         }
2738                 } else
2739 #endif
2740                         if (nla_put_u32(skb, RTA_IIF, iif))
2741                                 goto nla_put_failure;
2742         } else if (dst) {
2743                 struct in6_addr saddr_buf;
2744                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2745                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2746                         goto nla_put_failure;
2747         }
2748
2749         if (rt->rt6i_prefsrc.plen) {
2750                 struct in6_addr saddr_buf;
2751                 saddr_buf = rt->rt6i_prefsrc.addr;
2752                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2753                         goto nla_put_failure;
2754         }
2755
2756         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2757         if (rt->rt6i_pmtu)
2758                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2759         if (rtnetlink_put_metrics(skb, metrics) < 0)
2760                 goto nla_put_failure;
2761
2762         if (rt->rt6i_flags & RTF_GATEWAY) {
2763                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2764                         goto nla_put_failure;
2765         }
2766
2767         if (rt->dst.dev &&
2768             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2769                 goto nla_put_failure;
2770         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2771                 goto nla_put_failure;
2772
2773         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2774
2775         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2776                 goto nla_put_failure;
2777
2778         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2779                 goto nla_put_failure;
2780
2781         nlmsg_end(skb, nlh);
2782         return 0;
2783
2784 nla_put_failure:
2785         nlmsg_cancel(skb, nlh);
2786         return -EMSGSIZE;
2787 }
2788
2789 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2790 {
2791         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2792         int prefix;
2793
2794         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2795                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2796                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2797         } else
2798                 prefix = 0;
2799
2800         return rt6_fill_node(arg->net,
2801                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2802                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2803                      prefix, 0, NLM_F_MULTI);
2804 }
2805
2806 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2807 {
2808         struct net *net = sock_net(in_skb->sk);
2809         struct nlattr *tb[RTA_MAX+1];
2810         struct rt6_info *rt;
2811         struct sk_buff *skb;
2812         struct rtmsg *rtm;
2813         struct flowi6 fl6;
2814         int err, iif = 0, oif = 0;
2815
2816         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2817         if (err < 0)
2818                 goto errout;
2819
2820         err = -EINVAL;
2821         memset(&fl6, 0, sizeof(fl6));
2822
2823         if (tb[RTA_SRC]) {
2824                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2825                         goto errout;
2826
2827                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2828         }
2829
2830         if (tb[RTA_DST]) {
2831                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2832                         goto errout;
2833
2834                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2835         }
2836
2837         if (tb[RTA_IIF])
2838                 iif = nla_get_u32(tb[RTA_IIF]);
2839
2840         if (tb[RTA_OIF])
2841                 oif = nla_get_u32(tb[RTA_OIF]);
2842
2843         if (tb[RTA_MARK])
2844                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2845
2846         if (iif) {
2847                 struct net_device *dev;
2848                 int flags = 0;
2849
2850                 dev = __dev_get_by_index(net, iif);
2851                 if (!dev) {
2852                         err = -ENODEV;
2853                         goto errout;
2854                 }
2855
2856                 fl6.flowi6_iif = iif;
2857
2858                 if (!ipv6_addr_any(&fl6.saddr))
2859                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2860
2861                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2862                                                                flags);
2863         } else {
2864                 fl6.flowi6_oif = oif;
2865
2866                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2867         }
2868
2869         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2870         if (!skb) {
2871                 ip6_rt_put(rt);
2872                 err = -ENOBUFS;
2873                 goto errout;
2874         }
2875
2876         /* Reserve room for dummy headers, this skb can pass
2877            through good chunk of routing engine.
2878          */
2879         skb_reset_mac_header(skb);
2880         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2881
2882         skb_dst_set(skb, &rt->dst);
2883
2884         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2885                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2886                             nlh->nlmsg_seq, 0, 0, 0);
2887         if (err < 0) {
2888                 kfree_skb(skb);
2889                 goto errout;
2890         }
2891
2892         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2893 errout:
2894         return err;
2895 }
2896
2897 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2898 {
2899         struct sk_buff *skb;
2900         struct net *net = info->nl_net;
2901         u32 seq;
2902         int err;
2903
2904         err = -ENOBUFS;
2905         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2906
2907         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2908         if (!skb)
2909                 goto errout;
2910
2911         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2912                                 event, info->portid, seq, 0, 0, 0);
2913         if (err < 0) {
2914                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2915                 WARN_ON(err == -EMSGSIZE);
2916                 kfree_skb(skb);
2917                 goto errout;
2918         }
2919         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2920                     info->nlh, gfp_any());
2921         return;
2922 errout:
2923         if (err < 0)
2924                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2925 }
2926
2927 static int ip6_route_dev_notify(struct notifier_block *this,
2928                                 unsigned long event, void *ptr)
2929 {
2930         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2931         struct net *net = dev_net(dev);
2932
2933         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2934                 net->ipv6.ip6_null_entry->dst.dev = dev;
2935                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2936 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2937                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2938                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2939                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2940                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2941 #endif
2942         }
2943
2944         return NOTIFY_OK;
2945 }
2946
2947 /*
2948  *      /proc
2949  */
2950
2951 #ifdef CONFIG_PROC_FS
2952
2953 static const struct file_operations ipv6_route_proc_fops = {
2954         .owner          = THIS_MODULE,
2955         .open           = ipv6_route_open,
2956         .read           = seq_read,
2957         .llseek         = seq_lseek,
2958         .release        = seq_release_net,
2959 };
2960
2961 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2962 {
2963         struct net *net = (struct net *)seq->private;
2964         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2965                    net->ipv6.rt6_stats->fib_nodes,
2966                    net->ipv6.rt6_stats->fib_route_nodes,
2967                    net->ipv6.rt6_stats->fib_rt_alloc,
2968                    net->ipv6.rt6_stats->fib_rt_entries,
2969                    net->ipv6.rt6_stats->fib_rt_cache,
2970                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2971                    net->ipv6.rt6_stats->fib_discarded_routes);
2972
2973         return 0;
2974 }
2975
2976 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2977 {
2978         return single_open_net(inode, file, rt6_stats_seq_show);
2979 }
2980
2981 static const struct file_operations rt6_stats_seq_fops = {
2982         .owner   = THIS_MODULE,
2983         .open    = rt6_stats_seq_open,
2984         .read    = seq_read,
2985         .llseek  = seq_lseek,
2986         .release = single_release_net,
2987 };
2988 #endif  /* CONFIG_PROC_FS */
2989
2990 #ifdef CONFIG_SYSCTL
2991
2992 static
2993 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2994                               void __user *buffer, size_t *lenp, loff_t *ppos)
2995 {
2996         struct net *net;
2997         int delay;
2998         if (!write)
2999                 return -EINVAL;
3000
3001         net = (struct net *)ctl->extra1;
3002         delay = net->ipv6.sysctl.flush_delay;
3003         proc_dointvec(ctl, write, buffer, lenp, ppos);
3004         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3005         return 0;
3006 }
3007
3008 struct ctl_table ipv6_route_table_template[] = {
3009         {
3010                 .procname       =       "flush",
3011                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3012                 .maxlen         =       sizeof(int),
3013                 .mode           =       0200,
3014                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3015         },
3016         {
3017                 .procname       =       "gc_thresh",
3018                 .data           =       &ip6_dst_ops_template.gc_thresh,
3019                 .maxlen         =       sizeof(int),
3020                 .mode           =       0644,
3021                 .proc_handler   =       proc_dointvec,
3022         },
3023         {
3024                 .procname       =       "max_size",
3025                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3026                 .maxlen         =       sizeof(int),
3027                 .mode           =       0644,
3028                 .proc_handler   =       proc_dointvec,
3029         },
3030         {
3031                 .procname       =       "gc_min_interval",
3032                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3033                 .maxlen         =       sizeof(int),
3034                 .mode           =       0644,
3035                 .proc_handler   =       proc_dointvec_jiffies,
3036         },
3037         {
3038                 .procname       =       "gc_timeout",
3039                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3040                 .maxlen         =       sizeof(int),
3041                 .mode           =       0644,
3042                 .proc_handler   =       proc_dointvec_jiffies,
3043         },
3044         {
3045                 .procname       =       "gc_interval",
3046                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3047                 .maxlen         =       sizeof(int),
3048                 .mode           =       0644,
3049                 .proc_handler   =       proc_dointvec_jiffies,
3050         },
3051         {
3052                 .procname       =       "gc_elasticity",
3053                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3054                 .maxlen         =       sizeof(int),
3055                 .mode           =       0644,
3056                 .proc_handler   =       proc_dointvec,
3057         },
3058         {
3059                 .procname       =       "mtu_expires",
3060                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3061                 .maxlen         =       sizeof(int),
3062                 .mode           =       0644,
3063                 .proc_handler   =       proc_dointvec_jiffies,
3064         },
3065         {
3066                 .procname       =       "min_adv_mss",
3067                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3068                 .maxlen         =       sizeof(int),
3069                 .mode           =       0644,
3070                 .proc_handler   =       proc_dointvec,
3071         },
3072         {
3073                 .procname       =       "gc_min_interval_ms",
3074                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3075                 .maxlen         =       sizeof(int),
3076                 .mode           =       0644,
3077                 .proc_handler   =       proc_dointvec_ms_jiffies,
3078         },
3079         { }
3080 };
3081
3082 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3083 {
3084         struct ctl_table *table;
3085
3086         table = kmemdup(ipv6_route_table_template,
3087                         sizeof(ipv6_route_table_template),
3088                         GFP_KERNEL);
3089
3090         if (table) {
3091                 table[0].data = &net->ipv6.sysctl.flush_delay;
3092                 table[0].extra1 = net;
3093                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3094                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3095                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3096                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3097                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3098                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3099                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3100                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3101                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3102
3103                 /* Don't export sysctls to unprivileged users */
3104                 if (net->user_ns != &init_user_ns)
3105                         table[0].procname = NULL;
3106         }
3107
3108         return table;
3109 }
3110 #endif
3111
3112 static int __net_init ip6_route_net_init(struct net *net)
3113 {
3114         int ret = -ENOMEM;
3115
3116         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3117                sizeof(net->ipv6.ip6_dst_ops));
3118
3119         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3120                 goto out_ip6_dst_ops;
3121
3122         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3123                                            sizeof(*net->ipv6.ip6_null_entry),
3124                                            GFP_KERNEL);
3125         if (!net->ipv6.ip6_null_entry)
3126                 goto out_ip6_dst_entries;
3127         net->ipv6.ip6_null_entry->dst.path =
3128                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3129         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3130         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3131                          ip6_template_metrics, true);
3132
3133 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3134         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3135                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3136                                                GFP_KERNEL);
3137         if (!net->ipv6.ip6_prohibit_entry)
3138                 goto out_ip6_null_entry;
3139         net->ipv6.ip6_prohibit_entry->dst.path =
3140                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3141         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3142         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3143                          ip6_template_metrics, true);
3144
3145         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3146                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3147                                                GFP_KERNEL);
3148         if (!net->ipv6.ip6_blk_hole_entry)
3149                 goto out_ip6_prohibit_entry;
3150         net->ipv6.ip6_blk_hole_entry->dst.path =
3151                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3152         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3153         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3154                          ip6_template_metrics, true);
3155 #endif
3156
3157         net->ipv6.sysctl.flush_delay = 0;
3158         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3159         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3160         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3161         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3162         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3163         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3164         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3165
3166         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3167
3168         ret = 0;
3169 out:
3170         return ret;
3171
3172 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3173 out_ip6_prohibit_entry:
3174         kfree(net->ipv6.ip6_prohibit_entry);
3175 out_ip6_null_entry:
3176         kfree(net->ipv6.ip6_null_entry);
3177 #endif
3178 out_ip6_dst_entries:
3179         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3180 out_ip6_dst_ops:
3181         goto out;
3182 }
3183
3184 static void __net_exit ip6_route_net_exit(struct net *net)
3185 {
3186         kfree(net->ipv6.ip6_null_entry);
3187 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3188         kfree(net->ipv6.ip6_prohibit_entry);
3189         kfree(net->ipv6.ip6_blk_hole_entry);
3190 #endif
3191         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3192 }
3193
3194 static int __net_init ip6_route_net_init_late(struct net *net)
3195 {
3196 #ifdef CONFIG_PROC_FS
3197         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3198         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3199 #endif
3200         return 0;
3201 }
3202
3203 static void __net_exit ip6_route_net_exit_late(struct net *net)
3204 {
3205 #ifdef CONFIG_PROC_FS
3206         remove_proc_entry("ipv6_route", net->proc_net);
3207         remove_proc_entry("rt6_stats", net->proc_net);
3208 #endif
3209 }
3210
3211 static struct pernet_operations ip6_route_net_ops = {
3212         .init = ip6_route_net_init,
3213         .exit = ip6_route_net_exit,
3214 };
3215
3216 static int __net_init ipv6_inetpeer_init(struct net *net)
3217 {
3218         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3219
3220         if (!bp)
3221                 return -ENOMEM;
3222         inet_peer_base_init(bp);
3223         net->ipv6.peers = bp;
3224         return 0;
3225 }
3226
3227 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3228 {
3229         struct inet_peer_base *bp = net->ipv6.peers;
3230
3231         net->ipv6.peers = NULL;
3232         inetpeer_invalidate_tree(bp);
3233         kfree(bp);
3234 }
3235
3236 static struct pernet_operations ipv6_inetpeer_ops = {
3237         .init   =       ipv6_inetpeer_init,
3238         .exit   =       ipv6_inetpeer_exit,
3239 };
3240
3241 static struct pernet_operations ip6_route_net_late_ops = {
3242         .init = ip6_route_net_init_late,
3243         .exit = ip6_route_net_exit_late,
3244 };
3245
3246 static struct notifier_block ip6_route_dev_notifier = {
3247         .notifier_call = ip6_route_dev_notify,
3248         .priority = 0,
3249 };
3250
3251 int __init ip6_route_init(void)
3252 {
3253         int ret;
3254
3255         ret = -ENOMEM;
3256         ip6_dst_ops_template.kmem_cachep =
3257                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3258                                   SLAB_HWCACHE_ALIGN, NULL);
3259         if (!ip6_dst_ops_template.kmem_cachep)
3260                 goto out;
3261
3262         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3263         if (ret)
3264                 goto out_kmem_cache;
3265
3266         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3267         if (ret)
3268                 goto out_dst_entries;
3269
3270         ret = register_pernet_subsys(&ip6_route_net_ops);
3271         if (ret)
3272                 goto out_register_inetpeer;
3273
3274         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3275
3276         /* Registering of the loopback is done before this portion of code,
3277          * the loopback reference in rt6_info will not be taken, do it
3278          * manually for init_net */
3279         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3280         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3281   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3282         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3283         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3284         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3285         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3286   #endif
3287         ret = fib6_init();
3288         if (ret)
3289                 goto out_register_subsys;
3290
3291         ret = xfrm6_init();
3292         if (ret)
3293                 goto out_fib6_init;
3294
3295         ret = fib6_rules_init();
3296         if (ret)
3297                 goto xfrm6_init;
3298
3299         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3300         if (ret)
3301                 goto fib6_rules_init;
3302
3303         ret = -ENOBUFS;
3304         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3305             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3306             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3307                 goto out_register_late_subsys;
3308
3309         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3310         if (ret)
3311                 goto out_register_late_subsys;
3312
3313 out:
3314         return ret;
3315
3316 out_register_late_subsys:
3317         unregister_pernet_subsys(&ip6_route_net_late_ops);
3318 fib6_rules_init:
3319         fib6_rules_cleanup();
3320 xfrm6_init:
3321         xfrm6_fini();
3322 out_fib6_init:
3323         fib6_gc_cleanup();
3324 out_register_subsys:
3325         unregister_pernet_subsys(&ip6_route_net_ops);
3326 out_register_inetpeer:
3327         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3328 out_dst_entries:
3329         dst_entries_destroy(&ip6_dst_blackhole_ops);
3330 out_kmem_cache:
3331         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3332         goto out;
3333 }
3334
3335 void ip6_route_cleanup(void)
3336 {
3337         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3338         unregister_pernet_subsys(&ip6_route_net_late_ops);
3339         fib6_rules_cleanup();
3340         xfrm6_fini();
3341         fib6_gc_cleanup();
3342         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3343         unregister_pernet_subsys(&ip6_route_net_ops);
3344         dst_entries_destroy(&ip6_dst_blackhole_ops);
3345         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3346 }