]> git.karo-electronics.de Git - linux-beck.git/blob - net/ipv6/route.c
ipv6: remove rt6i_peer_genid from rt6_info and its handler
[linux-beck.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 255,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= (__force u32)fl6->daddr.s6_addr32[0];
392         val ^= (__force u32)fl6->daddr.s6_addr32[1];
393         val ^= (__force u32)fl6->daddr.s6_addr32[2];
394         val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396         val ^= (__force u32)fl6->saddr.s6_addr32[0];
397         val ^= (__force u32)fl6->saddr.s6_addr32[1];
398         val ^= (__force u32)fl6->saddr.s6_addr32[2];
399         val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401         /* Work only if this not encapsulated */
402         switch (fl6->flowi6_proto) {
403         case IPPROTO_UDP:
404         case IPPROTO_TCP:
405         case IPPROTO_SCTP:
406                 val ^= (__force u16)fl6->fl6_sport;
407                 val ^= (__force u16)fl6->fl6_dport;
408                 break;
409
410         case IPPROTO_ICMPV6:
411                 val ^= (__force u16)fl6->fl6_icmp_type;
412                 val ^= (__force u16)fl6->fl6_icmp_code;
413                 break;
414         }
415         /* RFC6438 recommands to use flowlabel */
416         val ^= (__force u32)fl6->flowlabel;
417
418         /* Perhaps, we need to tune, this function? */
419         val = val ^ (val >> 7) ^ (val >> 12);
420         return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424                                              struct flowi6 *fl6)
425 {
426         struct rt6_info *sibling, *next_sibling;
427         int route_choosen;
428
429         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430         /* Don't change the route, if route_choosen == 0
431          * (siblings does not include ourself)
432          */
433         if (route_choosen)
434                 list_for_each_entry_safe(sibling, next_sibling,
435                                 &match->rt6i_siblings, rt6i_siblings) {
436                         route_choosen--;
437                         if (route_choosen == 0) {
438                                 match = sibling;
439                                 break;
440                         }
441                 }
442         return match;
443 }
444
445 /*
446  *      Route lookup. Any table->tb6_lock is implied.
447  */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450                                                     struct rt6_info *rt,
451                                                     const struct in6_addr *saddr,
452                                                     int oif,
453                                                     int flags)
454 {
455         struct rt6_info *local = NULL;
456         struct rt6_info *sprt;
457
458         if (!oif && ipv6_addr_any(saddr))
459                 goto out;
460
461         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462                 struct net_device *dev = sprt->dst.dev;
463
464                 if (oif) {
465                         if (dev->ifindex == oif)
466                                 return sprt;
467                         if (dev->flags & IFF_LOOPBACK) {
468                                 if (!sprt->rt6i_idev ||
469                                     sprt->rt6i_idev->dev->ifindex != oif) {
470                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
471                                                 continue;
472                                         if (local && (!oif ||
473                                                       local->rt6i_idev->dev->ifindex == oif))
474                                                 continue;
475                                 }
476                                 local = sprt;
477                         }
478                 } else {
479                         if (ipv6_chk_addr(net, saddr, dev,
480                                           flags & RT6_LOOKUP_F_IFACE))
481                                 return sprt;
482                 }
483         }
484
485         if (oif) {
486                 if (local)
487                         return local;
488
489                 if (flags & RT6_LOOKUP_F_IFACE)
490                         return net->ipv6.ip6_null_entry;
491         }
492 out:
493         return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         /*
501          * Okay, this does not seem to be appropriate
502          * for now, however, we need to check if it
503          * is really so; aka Router Reachability Probing.
504          *
505          * Router Reachability Probe MUST be rate-limited
506          * to no more than one per minute.
507          */
508         neigh = rt ? rt->n : NULL;
509         if (!neigh || (neigh->nud_state & NUD_VALID))
510                 return;
511         read_lock_bh(&neigh->lock);
512         if (!(neigh->nud_state & NUD_VALID) &&
513             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514                 struct in6_addr mcaddr;
515                 struct in6_addr *target;
516
517                 neigh->updated = jiffies;
518                 read_unlock_bh(&neigh->lock);
519
520                 target = (struct in6_addr *)&neigh->primary_key;
521                 addrconf_addr_solict_mult(target, &mcaddr);
522                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523         } else {
524                 read_unlock_bh(&neigh->lock);
525         }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534  * Default Router Selection (RFC 2461 6.3.6)
535  */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538         struct net_device *dev = rt->dst.dev;
539         if (!oif || dev->ifindex == oif)
540                 return 2;
541         if ((dev->flags & IFF_LOOPBACK) &&
542             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543                 return 1;
544         return 0;
545 }
546
547 static inline int rt6_check_neigh(struct rt6_info *rt)
548 {
549         struct neighbour *neigh;
550         int m;
551
552         neigh = rt->n;
553         if (rt->rt6i_flags & RTF_NONEXTHOP ||
554             !(rt->rt6i_flags & RTF_GATEWAY))
555                 m = 1;
556         else if (neigh) {
557                 read_lock_bh(&neigh->lock);
558                 if (neigh->nud_state & NUD_VALID)
559                         m = 2;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561                 else if (neigh->nud_state & NUD_FAILED)
562                         m = 0;
563 #endif
564                 else
565                         m = 1;
566                 read_unlock_bh(&neigh->lock);
567         } else
568                 m = 0;
569         return m;
570 }
571
572 static int rt6_score_route(struct rt6_info *rt, int oif,
573                            int strict)
574 {
575         int m, n;
576
577         m = rt6_check_dev(rt, oif);
578         if (!m && (strict & RT6_LOOKUP_F_IFACE))
579                 return -1;
580 #ifdef CONFIG_IPV6_ROUTER_PREF
581         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
582 #endif
583         n = rt6_check_neigh(rt);
584         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
585                 return -1;
586         return m;
587 }
588
589 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590                                    int *mpri, struct rt6_info *match)
591 {
592         int m;
593
594         if (rt6_check_expired(rt))
595                 goto out;
596
597         m = rt6_score_route(rt, oif, strict);
598         if (m < 0)
599                 goto out;
600
601         if (m > *mpri) {
602                 if (strict & RT6_LOOKUP_F_REACHABLE)
603                         rt6_probe(match);
604                 *mpri = m;
605                 match = rt;
606         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
607                 rt6_probe(rt);
608         }
609
610 out:
611         return match;
612 }
613
614 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615                                      struct rt6_info *rr_head,
616                                      u32 metric, int oif, int strict)
617 {
618         struct rt6_info *rt, *match;
619         int mpri = -1;
620
621         match = NULL;
622         for (rt = rr_head; rt && rt->rt6i_metric == metric;
623              rt = rt->dst.rt6_next)
624                 match = find_match(rt, oif, strict, &mpri, match);
625         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626              rt = rt->dst.rt6_next)
627                 match = find_match(rt, oif, strict, &mpri, match);
628
629         return match;
630 }
631
632 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
633 {
634         struct rt6_info *match, *rt0;
635         struct net *net;
636
637         rt0 = fn->rr_ptr;
638         if (!rt0)
639                 fn->rr_ptr = rt0 = fn->leaf;
640
641         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
642
643         if (!match &&
644             (strict & RT6_LOOKUP_F_REACHABLE)) {
645                 struct rt6_info *next = rt0->dst.rt6_next;
646
647                 /* no entries matched; do round-robin */
648                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
649                         next = fn->leaf;
650
651                 if (next != rt0)
652                         fn->rr_ptr = next;
653         }
654
655         net = dev_net(rt0->dst.dev);
656         return match ? match : net->ipv6.ip6_null_entry;
657 }
658
659 #ifdef CONFIG_IPV6_ROUTE_INFO
660 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661                   const struct in6_addr *gwaddr)
662 {
663         struct net *net = dev_net(dev);
664         struct route_info *rinfo = (struct route_info *) opt;
665         struct in6_addr prefix_buf, *prefix;
666         unsigned int pref;
667         unsigned long lifetime;
668         struct rt6_info *rt;
669
670         if (len < sizeof(struct route_info)) {
671                 return -EINVAL;
672         }
673
674         /* Sanity check for prefix_len and length */
675         if (rinfo->length > 3) {
676                 return -EINVAL;
677         } else if (rinfo->prefix_len > 128) {
678                 return -EINVAL;
679         } else if (rinfo->prefix_len > 64) {
680                 if (rinfo->length < 2) {
681                         return -EINVAL;
682                 }
683         } else if (rinfo->prefix_len > 0) {
684                 if (rinfo->length < 1) {
685                         return -EINVAL;
686                 }
687         }
688
689         pref = rinfo->route_pref;
690         if (pref == ICMPV6_ROUTER_PREF_INVALID)
691                 return -EINVAL;
692
693         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
694
695         if (rinfo->length == 3)
696                 prefix = (struct in6_addr *)rinfo->prefix;
697         else {
698                 /* this function is safe */
699                 ipv6_addr_prefix(&prefix_buf,
700                                  (struct in6_addr *)rinfo->prefix,
701                                  rinfo->prefix_len);
702                 prefix = &prefix_buf;
703         }
704
705         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
706                                 dev->ifindex);
707
708         if (rt && !lifetime) {
709                 ip6_del_rt(rt);
710                 rt = NULL;
711         }
712
713         if (!rt && lifetime)
714                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
715                                         pref);
716         else if (rt)
717                 rt->rt6i_flags = RTF_ROUTEINFO |
718                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
719
720         if (rt) {
721                 if (!addrconf_finite_timeout(lifetime))
722                         rt6_clean_expires(rt);
723                 else
724                         rt6_set_expires(rt, jiffies + HZ * lifetime);
725
726                 ip6_rt_put(rt);
727         }
728         return 0;
729 }
730 #endif
731
732 #define BACKTRACK(__net, saddr)                 \
733 do { \
734         if (rt == __net->ipv6.ip6_null_entry) { \
735                 struct fib6_node *pn; \
736                 while (1) { \
737                         if (fn->fn_flags & RTN_TL_ROOT) \
738                                 goto out; \
739                         pn = fn->parent; \
740                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
742                         else \
743                                 fn = pn; \
744                         if (fn->fn_flags & RTN_RTINFO) \
745                                 goto restart; \
746                 } \
747         } \
748 } while (0)
749
750 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751                                              struct fib6_table *table,
752                                              struct flowi6 *fl6, int flags)
753 {
754         struct fib6_node *fn;
755         struct rt6_info *rt;
756
757         read_lock_bh(&table->tb6_lock);
758         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
759 restart:
760         rt = fn->leaf;
761         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763                 rt = rt6_multipath_select(rt, fl6);
764         BACKTRACK(net, &fl6->saddr);
765 out:
766         dst_use(&rt->dst, jiffies);
767         read_unlock_bh(&table->tb6_lock);
768         return rt;
769
770 }
771
772 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
773                                     int flags)
774 {
775         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
776 }
777 EXPORT_SYMBOL_GPL(ip6_route_lookup);
778
779 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780                             const struct in6_addr *saddr, int oif, int strict)
781 {
782         struct flowi6 fl6 = {
783                 .flowi6_oif = oif,
784                 .daddr = *daddr,
785         };
786         struct dst_entry *dst;
787         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
788
789         if (saddr) {
790                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791                 flags |= RT6_LOOKUP_F_HAS_SADDR;
792         }
793
794         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
795         if (dst->error == 0)
796                 return (struct rt6_info *) dst;
797
798         dst_release(dst);
799
800         return NULL;
801 }
802
803 EXPORT_SYMBOL(rt6_lookup);
804
805 /* ip6_ins_rt is called with FREE table->tb6_lock.
806    It takes new route entry, the addition fails by any reason the
807    route is freed. In any case, if caller does not hold it, it may
808    be destroyed.
809  */
810
811 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
812 {
813         int err;
814         struct fib6_table *table;
815
816         table = rt->rt6i_table;
817         write_lock_bh(&table->tb6_lock);
818         err = fib6_add(&table->tb6_root, rt, info);
819         write_unlock_bh(&table->tb6_lock);
820
821         return err;
822 }
823
824 int ip6_ins_rt(struct rt6_info *rt)
825 {
826         struct nl_info info = {
827                 .nl_net = dev_net(rt->dst.dev),
828         };
829         return __ip6_ins_rt(rt, &info);
830 }
831
832 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
833                                       const struct in6_addr *daddr,
834                                       const struct in6_addr *saddr)
835 {
836         struct rt6_info *rt;
837
838         /*
839          *      Clone the route.
840          */
841
842         rt = ip6_rt_copy(ort, daddr);
843
844         if (rt) {
845                 int attempts = !in_softirq();
846
847                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848                         if (ort->rt6i_dst.plen != 128 &&
849                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850                                 rt->rt6i_flags |= RTF_ANYCAST;
851                         rt->rt6i_gateway = *daddr;
852                 }
853
854                 rt->rt6i_flags |= RTF_CACHE;
855
856 #ifdef CONFIG_IPV6_SUBTREES
857                 if (rt->rt6i_src.plen && saddr) {
858                         rt->rt6i_src.addr = *saddr;
859                         rt->rt6i_src.plen = 128;
860                 }
861 #endif
862
863         retry:
864                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865                         struct net *net = dev_net(rt->dst.dev);
866                         int saved_rt_min_interval =
867                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868                         int saved_rt_elasticity =
869                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
870
871                         if (attempts-- > 0) {
872                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
874
875                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
876
877                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
878                                         saved_rt_elasticity;
879                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880                                         saved_rt_min_interval;
881                                 goto retry;
882                         }
883
884                         net_warn_ratelimited("Neighbour table overflow\n");
885                         dst_free(&rt->dst);
886                         return NULL;
887                 }
888         }
889
890         return rt;
891 }
892
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894                                         const struct in6_addr *daddr)
895 {
896         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897
898         if (rt) {
899                 rt->rt6i_flags |= RTF_CACHE;
900                 rt->n = neigh_clone(ort->n);
901         }
902         return rt;
903 }
904
905 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906                                       struct flowi6 *fl6, int flags)
907 {
908         struct fib6_node *fn;
909         struct rt6_info *rt, *nrt;
910         int strict = 0;
911         int attempts = 3;
912         int err;
913         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
914
915         strict |= flags & RT6_LOOKUP_F_IFACE;
916
917 relookup:
918         read_lock_bh(&table->tb6_lock);
919
920 restart_2:
921         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
922
923 restart:
924         rt = rt6_select(fn, oif, strict | reachable);
925         if (rt->rt6i_nsiblings && oif == 0)
926                 rt = rt6_multipath_select(rt, fl6);
927         BACKTRACK(net, &fl6->saddr);
928         if (rt == net->ipv6.ip6_null_entry ||
929             rt->rt6i_flags & RTF_CACHE)
930                 goto out;
931
932         dst_hold(&rt->dst);
933         read_unlock_bh(&table->tb6_lock);
934
935         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937         else if (!(rt->dst.flags & DST_HOST))
938                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
939         else
940                 goto out2;
941
942         ip6_rt_put(rt);
943         rt = nrt ? : net->ipv6.ip6_null_entry;
944
945         dst_hold(&rt->dst);
946         if (nrt) {
947                 err = ip6_ins_rt(nrt);
948                 if (!err)
949                         goto out2;
950         }
951
952         if (--attempts <= 0)
953                 goto out2;
954
955         /*
956          * Race condition! In the gap, when table->tb6_lock was
957          * released someone could insert this route.  Relookup.
958          */
959         ip6_rt_put(rt);
960         goto relookup;
961
962 out:
963         if (reachable) {
964                 reachable = 0;
965                 goto restart_2;
966         }
967         dst_hold(&rt->dst);
968         read_unlock_bh(&table->tb6_lock);
969 out2:
970         rt->dst.lastuse = jiffies;
971         rt->dst.__use++;
972
973         return rt;
974 }
975
976 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977                                             struct flowi6 *fl6, int flags)
978 {
979         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
980 }
981
982 static struct dst_entry *ip6_route_input_lookup(struct net *net,
983                                                 struct net_device *dev,
984                                                 struct flowi6 *fl6, int flags)
985 {
986         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987                 flags |= RT6_LOOKUP_F_IFACE;
988
989         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
990 }
991
992 void ip6_route_input(struct sk_buff *skb)
993 {
994         const struct ipv6hdr *iph = ipv6_hdr(skb);
995         struct net *net = dev_net(skb->dev);
996         int flags = RT6_LOOKUP_F_HAS_SADDR;
997         struct flowi6 fl6 = {
998                 .flowi6_iif = skb->dev->ifindex,
999                 .daddr = iph->daddr,
1000                 .saddr = iph->saddr,
1001                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002                 .flowi6_mark = skb->mark,
1003                 .flowi6_proto = iph->nexthdr,
1004         };
1005
1006         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1007 }
1008
1009 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010                                              struct flowi6 *fl6, int flags)
1011 {
1012         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1013 }
1014
1015 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1016                                     struct flowi6 *fl6)
1017 {
1018         int flags = 0;
1019
1020         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1021
1022         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023                 flags |= RT6_LOOKUP_F_IFACE;
1024
1025         if (!ipv6_addr_any(&fl6->saddr))
1026                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1027         else if (sk)
1028                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1029
1030         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1031 }
1032
1033 EXPORT_SYMBOL(ip6_route_output);
1034
1035 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1036 {
1037         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038         struct dst_entry *new = NULL;
1039
1040         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1041         if (rt) {
1042                 new = &rt->dst;
1043
1044                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045                 rt6_init_peer(rt, net->ipv6.peers);
1046
1047                 new->__use = 1;
1048                 new->input = dst_discard;
1049                 new->output = dst_discard;
1050
1051                 if (dst_metrics_read_only(&ort->dst))
1052                         new->_metrics = ort->dst._metrics;
1053                 else
1054                         dst_copy_metrics(new, &ort->dst);
1055                 rt->rt6i_idev = ort->rt6i_idev;
1056                 if (rt->rt6i_idev)
1057                         in6_dev_hold(rt->rt6i_idev);
1058
1059                 rt->rt6i_gateway = ort->rt6i_gateway;
1060                 rt->rt6i_flags = ort->rt6i_flags;
1061                 rt6_clean_expires(rt);
1062                 rt->rt6i_metric = 0;
1063
1064                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065 #ifdef CONFIG_IPV6_SUBTREES
1066                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1067 #endif
1068
1069                 dst_free(new);
1070         }
1071
1072         dst_release(dst_orig);
1073         return new ? new : ERR_PTR(-ENOMEM);
1074 }
1075
1076 /*
1077  *      Destination cache support functions
1078  */
1079
1080 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1081 {
1082         struct rt6_info *rt;
1083
1084         rt = (struct rt6_info *) dst;
1085
1086         /* All IPV6 dsts are created with ->obsolete set to the value
1087          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088          * into this function always.
1089          */
1090         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1091                 return NULL;
1092
1093         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1094                 return dst;
1095
1096         return NULL;
1097 }
1098
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100 {
1101         struct rt6_info *rt = (struct rt6_info *) dst;
1102
1103         if (rt) {
1104                 if (rt->rt6i_flags & RTF_CACHE) {
1105                         if (rt6_check_expired(rt)) {
1106                                 ip6_del_rt(rt);
1107                                 dst = NULL;
1108                         }
1109                 } else {
1110                         dst_release(dst);
1111                         dst = NULL;
1112                 }
1113         }
1114         return dst;
1115 }
1116
1117 static void ip6_link_failure(struct sk_buff *skb)
1118 {
1119         struct rt6_info *rt;
1120
1121         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1122
1123         rt = (struct rt6_info *) skb_dst(skb);
1124         if (rt) {
1125                 if (rt->rt6i_flags & RTF_CACHE)
1126                         rt6_update_expires(rt, 0);
1127                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1128                         rt->rt6i_node->fn_sernum = -1;
1129         }
1130 }
1131
1132 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133                                struct sk_buff *skb, u32 mtu)
1134 {
1135         struct rt6_info *rt6 = (struct rt6_info*)dst;
1136
1137         dst_confirm(dst);
1138         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139                 struct net *net = dev_net(dst->dev);
1140
1141                 rt6->rt6i_flags |= RTF_MODIFIED;
1142                 if (mtu < IPV6_MIN_MTU) {
1143                         u32 features = dst_metric(dst, RTAX_FEATURES);
1144                         mtu = IPV6_MIN_MTU;
1145                         features |= RTAX_FEATURE_ALLFRAG;
1146                         dst_metric_set(dst, RTAX_FEATURES, features);
1147                 }
1148                 dst_metric_set(dst, RTAX_MTU, mtu);
1149                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1150         }
1151 }
1152
1153 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1154                      int oif, u32 mark)
1155 {
1156         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157         struct dst_entry *dst;
1158         struct flowi6 fl6;
1159
1160         memset(&fl6, 0, sizeof(fl6));
1161         fl6.flowi6_oif = oif;
1162         fl6.flowi6_mark = mark;
1163         fl6.flowi6_flags = 0;
1164         fl6.daddr = iph->daddr;
1165         fl6.saddr = iph->saddr;
1166         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1167
1168         dst = ip6_route_output(net, NULL, &fl6);
1169         if (!dst->error)
1170                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1171         dst_release(dst);
1172 }
1173 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1174
1175 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1176 {
1177         ip6_update_pmtu(skb, sock_net(sk), mtu,
1178                         sk->sk_bound_dev_if, sk->sk_mark);
1179 }
1180 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1181
1182 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1183 {
1184         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185         struct dst_entry *dst;
1186         struct flowi6 fl6;
1187
1188         memset(&fl6, 0, sizeof(fl6));
1189         fl6.flowi6_oif = oif;
1190         fl6.flowi6_mark = mark;
1191         fl6.flowi6_flags = 0;
1192         fl6.daddr = iph->daddr;
1193         fl6.saddr = iph->saddr;
1194         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1195
1196         dst = ip6_route_output(net, NULL, &fl6);
1197         if (!dst->error)
1198                 rt6_do_redirect(dst, NULL, skb);
1199         dst_release(dst);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_redirect);
1202
1203 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1204 {
1205         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1206 }
1207 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1208
1209 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1210 {
1211         struct net_device *dev = dst->dev;
1212         unsigned int mtu = dst_mtu(dst);
1213         struct net *net = dev_net(dev);
1214
1215         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1216
1217         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1219
1220         /*
1221          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223          * IPV6_MAXPLEN is also valid and means: "any MSS,
1224          * rely only on pmtu discovery"
1225          */
1226         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1227                 mtu = IPV6_MAXPLEN;
1228         return mtu;
1229 }
1230
1231 static unsigned int ip6_mtu(const struct dst_entry *dst)
1232 {
1233         struct inet6_dev *idev;
1234         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1235
1236         if (mtu)
1237                 return mtu;
1238
1239         mtu = IPV6_MIN_MTU;
1240
1241         rcu_read_lock();
1242         idev = __in6_dev_get(dst->dev);
1243         if (idev)
1244                 mtu = idev->cnf.mtu6;
1245         rcu_read_unlock();
1246
1247         return mtu;
1248 }
1249
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1252
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254                                   struct neighbour *neigh,
1255                                   struct flowi6 *fl6)
1256 {
1257         struct dst_entry *dst;
1258         struct rt6_info *rt;
1259         struct inet6_dev *idev = in6_dev_get(dev);
1260         struct net *net = dev_net(dev);
1261
1262         if (unlikely(!idev))
1263                 return ERR_PTR(-ENODEV);
1264
1265         rt = ip6_dst_alloc(net, dev, 0, NULL);
1266         if (unlikely(!rt)) {
1267                 in6_dev_put(idev);
1268                 dst = ERR_PTR(-ENOMEM);
1269                 goto out;
1270         }
1271
1272         if (neigh)
1273                 neigh_hold(neigh);
1274         else {
1275                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276                 if (IS_ERR(neigh)) {
1277                         in6_dev_put(idev);
1278                         dst_free(&rt->dst);
1279                         return ERR_CAST(neigh);
1280                 }
1281         }
1282
1283         rt->dst.flags |= DST_HOST;
1284         rt->dst.output  = ip6_output;
1285         rt->n = neigh;
1286         atomic_set(&rt->dst.__refcnt, 1);
1287         rt->rt6i_dst.addr = fl6->daddr;
1288         rt->rt6i_dst.plen = 128;
1289         rt->rt6i_idev     = idev;
1290         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1291
1292         spin_lock_bh(&icmp6_dst_lock);
1293         rt->dst.next = icmp6_dst_gc_list;
1294         icmp6_dst_gc_list = &rt->dst;
1295         spin_unlock_bh(&icmp6_dst_lock);
1296
1297         fib6_force_start_gc(net);
1298
1299         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1300
1301 out:
1302         return dst;
1303 }
1304
1305 int icmp6_dst_gc(void)
1306 {
1307         struct dst_entry *dst, **pprev;
1308         int more = 0;
1309
1310         spin_lock_bh(&icmp6_dst_lock);
1311         pprev = &icmp6_dst_gc_list;
1312
1313         while ((dst = *pprev) != NULL) {
1314                 if (!atomic_read(&dst->__refcnt)) {
1315                         *pprev = dst->next;
1316                         dst_free(dst);
1317                 } else {
1318                         pprev = &dst->next;
1319                         ++more;
1320                 }
1321         }
1322
1323         spin_unlock_bh(&icmp6_dst_lock);
1324
1325         return more;
1326 }
1327
1328 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1329                             void *arg)
1330 {
1331         struct dst_entry *dst, **pprev;
1332
1333         spin_lock_bh(&icmp6_dst_lock);
1334         pprev = &icmp6_dst_gc_list;
1335         while ((dst = *pprev) != NULL) {
1336                 struct rt6_info *rt = (struct rt6_info *) dst;
1337                 if (func(rt, arg)) {
1338                         *pprev = dst->next;
1339                         dst_free(dst);
1340                 } else {
1341                         pprev = &dst->next;
1342                 }
1343         }
1344         spin_unlock_bh(&icmp6_dst_lock);
1345 }
1346
1347 static int ip6_dst_gc(struct dst_ops *ops)
1348 {
1349         unsigned long now = jiffies;
1350         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1356         int entries;
1357
1358         entries = dst_entries_get_fast(ops);
1359         if (time_after(rt_last_gc + rt_min_interval, now) &&
1360             entries <= rt_max_size)
1361                 goto out;
1362
1363         net->ipv6.ip6_rt_gc_expire++;
1364         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365         net->ipv6.ip6_rt_last_gc = now;
1366         entries = dst_entries_get_slow(ops);
1367         if (entries < ops->gc_thresh)
1368                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1369 out:
1370         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371         return entries > rt_max_size;
1372 }
1373
1374 /* Clean host part of a prefix. Not necessary in radix tree,
1375    but results in cleaner routing tables.
1376
1377    Remove it only when all the things will work!
1378  */
1379
1380 int ip6_dst_hoplimit(struct dst_entry *dst)
1381 {
1382         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1383         if (hoplimit == 0) {
1384                 struct net_device *dev = dst->dev;
1385                 struct inet6_dev *idev;
1386
1387                 rcu_read_lock();
1388                 idev = __in6_dev_get(dev);
1389                 if (idev)
1390                         hoplimit = idev->cnf.hop_limit;
1391                 else
1392                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1393                 rcu_read_unlock();
1394         }
1395         return hoplimit;
1396 }
1397 EXPORT_SYMBOL(ip6_dst_hoplimit);
1398
1399 /*
1400  *
1401  */
1402
1403 int ip6_route_add(struct fib6_config *cfg)
1404 {
1405         int err;
1406         struct net *net = cfg->fc_nlinfo.nl_net;
1407         struct rt6_info *rt = NULL;
1408         struct net_device *dev = NULL;
1409         struct inet6_dev *idev = NULL;
1410         struct fib6_table *table;
1411         int addr_type;
1412
1413         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1414                 return -EINVAL;
1415 #ifndef CONFIG_IPV6_SUBTREES
1416         if (cfg->fc_src_len)
1417                 return -EINVAL;
1418 #endif
1419         if (cfg->fc_ifindex) {
1420                 err = -ENODEV;
1421                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1422                 if (!dev)
1423                         goto out;
1424                 idev = in6_dev_get(dev);
1425                 if (!idev)
1426                         goto out;
1427         }
1428
1429         if (cfg->fc_metric == 0)
1430                 cfg->fc_metric = IP6_RT_PRIO_USER;
1431
1432         err = -ENOBUFS;
1433         if (cfg->fc_nlinfo.nlh &&
1434             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1435                 table = fib6_get_table(net, cfg->fc_table);
1436                 if (!table) {
1437                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1438                         table = fib6_new_table(net, cfg->fc_table);
1439                 }
1440         } else {
1441                 table = fib6_new_table(net, cfg->fc_table);
1442         }
1443
1444         if (!table)
1445                 goto out;
1446
1447         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1448
1449         if (!rt) {
1450                 err = -ENOMEM;
1451                 goto out;
1452         }
1453
1454         if (cfg->fc_flags & RTF_EXPIRES)
1455                 rt6_set_expires(rt, jiffies +
1456                                 clock_t_to_jiffies(cfg->fc_expires));
1457         else
1458                 rt6_clean_expires(rt);
1459
1460         if (cfg->fc_protocol == RTPROT_UNSPEC)
1461                 cfg->fc_protocol = RTPROT_BOOT;
1462         rt->rt6i_protocol = cfg->fc_protocol;
1463
1464         addr_type = ipv6_addr_type(&cfg->fc_dst);
1465
1466         if (addr_type & IPV6_ADDR_MULTICAST)
1467                 rt->dst.input = ip6_mc_input;
1468         else if (cfg->fc_flags & RTF_LOCAL)
1469                 rt->dst.input = ip6_input;
1470         else
1471                 rt->dst.input = ip6_forward;
1472
1473         rt->dst.output = ip6_output;
1474
1475         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1476         rt->rt6i_dst.plen = cfg->fc_dst_len;
1477         if (rt->rt6i_dst.plen == 128)
1478                rt->dst.flags |= DST_HOST;
1479
1480         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1481                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1482                 if (!metrics) {
1483                         err = -ENOMEM;
1484                         goto out;
1485                 }
1486                 dst_init_metrics(&rt->dst, metrics, 0);
1487         }
1488 #ifdef CONFIG_IPV6_SUBTREES
1489         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1490         rt->rt6i_src.plen = cfg->fc_src_len;
1491 #endif
1492
1493         rt->rt6i_metric = cfg->fc_metric;
1494
1495         /* We cannot add true routes via loopback here,
1496            they would result in kernel looping; promote them to reject routes
1497          */
1498         if ((cfg->fc_flags & RTF_REJECT) ||
1499             (dev && (dev->flags & IFF_LOOPBACK) &&
1500              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1501              !(cfg->fc_flags & RTF_LOCAL))) {
1502                 /* hold loopback dev/idev if we haven't done so. */
1503                 if (dev != net->loopback_dev) {
1504                         if (dev) {
1505                                 dev_put(dev);
1506                                 in6_dev_put(idev);
1507                         }
1508                         dev = net->loopback_dev;
1509                         dev_hold(dev);
1510                         idev = in6_dev_get(dev);
1511                         if (!idev) {
1512                                 err = -ENODEV;
1513                                 goto out;
1514                         }
1515                 }
1516                 rt->dst.output = ip6_pkt_discard_out;
1517                 rt->dst.input = ip6_pkt_discard;
1518                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1519                 switch (cfg->fc_type) {
1520                 case RTN_BLACKHOLE:
1521                         rt->dst.error = -EINVAL;
1522                         break;
1523                 case RTN_PROHIBIT:
1524                         rt->dst.error = -EACCES;
1525                         break;
1526                 case RTN_THROW:
1527                         rt->dst.error = -EAGAIN;
1528                         break;
1529                 default:
1530                         rt->dst.error = -ENETUNREACH;
1531                         break;
1532                 }
1533                 goto install_route;
1534         }
1535
1536         if (cfg->fc_flags & RTF_GATEWAY) {
1537                 const struct in6_addr *gw_addr;
1538                 int gwa_type;
1539
1540                 gw_addr = &cfg->fc_gateway;
1541                 rt->rt6i_gateway = *gw_addr;
1542                 gwa_type = ipv6_addr_type(gw_addr);
1543
1544                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1545                         struct rt6_info *grt;
1546
1547                         /* IPv6 strictly inhibits using not link-local
1548                            addresses as nexthop address.
1549                            Otherwise, router will not able to send redirects.
1550                            It is very good, but in some (rare!) circumstances
1551                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1552                            some exceptions. --ANK
1553                          */
1554                         err = -EINVAL;
1555                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1556                                 goto out;
1557
1558                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1559
1560                         err = -EHOSTUNREACH;
1561                         if (!grt)
1562                                 goto out;
1563                         if (dev) {
1564                                 if (dev != grt->dst.dev) {
1565                                         ip6_rt_put(grt);
1566                                         goto out;
1567                                 }
1568                         } else {
1569                                 dev = grt->dst.dev;
1570                                 idev = grt->rt6i_idev;
1571                                 dev_hold(dev);
1572                                 in6_dev_hold(grt->rt6i_idev);
1573                         }
1574                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1575                                 err = 0;
1576                         ip6_rt_put(grt);
1577
1578                         if (err)
1579                                 goto out;
1580                 }
1581                 err = -EINVAL;
1582                 if (!dev || (dev->flags & IFF_LOOPBACK))
1583                         goto out;
1584         }
1585
1586         err = -ENODEV;
1587         if (!dev)
1588                 goto out;
1589
1590         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1591                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1592                         err = -EINVAL;
1593                         goto out;
1594                 }
1595                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1596                 rt->rt6i_prefsrc.plen = 128;
1597         } else
1598                 rt->rt6i_prefsrc.plen = 0;
1599
1600         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1601                 err = rt6_bind_neighbour(rt, dev);
1602                 if (err)
1603                         goto out;
1604         }
1605
1606         rt->rt6i_flags = cfg->fc_flags;
1607
1608 install_route:
1609         if (cfg->fc_mx) {
1610                 struct nlattr *nla;
1611                 int remaining;
1612
1613                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1614                         int type = nla_type(nla);
1615
1616                         if (type) {
1617                                 if (type > RTAX_MAX) {
1618                                         err = -EINVAL;
1619                                         goto out;
1620                                 }
1621
1622                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1623                         }
1624                 }
1625         }
1626
1627         rt->dst.dev = dev;
1628         rt->rt6i_idev = idev;
1629         rt->rt6i_table = table;
1630
1631         cfg->fc_nlinfo.nl_net = dev_net(dev);
1632
1633         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1634
1635 out:
1636         if (dev)
1637                 dev_put(dev);
1638         if (idev)
1639                 in6_dev_put(idev);
1640         if (rt)
1641                 dst_free(&rt->dst);
1642         return err;
1643 }
1644
1645 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1646 {
1647         int err;
1648         struct fib6_table *table;
1649         struct net *net = dev_net(rt->dst.dev);
1650
1651         if (rt == net->ipv6.ip6_null_entry) {
1652                 err = -ENOENT;
1653                 goto out;
1654         }
1655
1656         table = rt->rt6i_table;
1657         write_lock_bh(&table->tb6_lock);
1658         err = fib6_del(rt, info);
1659         write_unlock_bh(&table->tb6_lock);
1660
1661 out:
1662         ip6_rt_put(rt);
1663         return err;
1664 }
1665
1666 int ip6_del_rt(struct rt6_info *rt)
1667 {
1668         struct nl_info info = {
1669                 .nl_net = dev_net(rt->dst.dev),
1670         };
1671         return __ip6_del_rt(rt, &info);
1672 }
1673
1674 static int ip6_route_del(struct fib6_config *cfg)
1675 {
1676         struct fib6_table *table;
1677         struct fib6_node *fn;
1678         struct rt6_info *rt;
1679         int err = -ESRCH;
1680
1681         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1682         if (!table)
1683                 return err;
1684
1685         read_lock_bh(&table->tb6_lock);
1686
1687         fn = fib6_locate(&table->tb6_root,
1688                          &cfg->fc_dst, cfg->fc_dst_len,
1689                          &cfg->fc_src, cfg->fc_src_len);
1690
1691         if (fn) {
1692                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1693                         if (cfg->fc_ifindex &&
1694                             (!rt->dst.dev ||
1695                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1696                                 continue;
1697                         if (cfg->fc_flags & RTF_GATEWAY &&
1698                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1699                                 continue;
1700                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1701                                 continue;
1702                         dst_hold(&rt->dst);
1703                         read_unlock_bh(&table->tb6_lock);
1704
1705                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1706                 }
1707         }
1708         read_unlock_bh(&table->tb6_lock);
1709
1710         return err;
1711 }
1712
1713 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1714 {
1715         struct net *net = dev_net(skb->dev);
1716         struct netevent_redirect netevent;
1717         struct rt6_info *rt, *nrt = NULL;
1718         const struct in6_addr *target;
1719         struct ndisc_options ndopts;
1720         const struct in6_addr *dest;
1721         struct neighbour *old_neigh;
1722         struct inet6_dev *in6_dev;
1723         struct neighbour *neigh;
1724         struct icmp6hdr *icmph;
1725         int optlen, on_link;
1726         u8 *lladdr;
1727
1728         optlen = skb->tail - skb->transport_header;
1729         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1730
1731         if (optlen < 0) {
1732                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1733                 return;
1734         }
1735
1736         icmph = icmp6_hdr(skb);
1737         target = (const struct in6_addr *) (icmph + 1);
1738         dest = target + 1;
1739
1740         if (ipv6_addr_is_multicast(dest)) {
1741                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1742                 return;
1743         }
1744
1745         on_link = 0;
1746         if (ipv6_addr_equal(dest, target)) {
1747                 on_link = 1;
1748         } else if (ipv6_addr_type(target) !=
1749                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1750                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1751                 return;
1752         }
1753
1754         in6_dev = __in6_dev_get(skb->dev);
1755         if (!in6_dev)
1756                 return;
1757         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1758                 return;
1759
1760         /* RFC2461 8.1:
1761          *      The IP source address of the Redirect MUST be the same as the current
1762          *      first-hop router for the specified ICMP Destination Address.
1763          */
1764
1765         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1766                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1767                 return;
1768         }
1769
1770         lladdr = NULL;
1771         if (ndopts.nd_opts_tgt_lladdr) {
1772                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1773                                              skb->dev);
1774                 if (!lladdr) {
1775                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1776                         return;
1777                 }
1778         }
1779
1780         rt = (struct rt6_info *) dst;
1781         if (rt == net->ipv6.ip6_null_entry) {
1782                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1783                 return;
1784         }
1785
1786         /* Redirect received -> path was valid.
1787          * Look, redirects are sent only in response to data packets,
1788          * so that this nexthop apparently is reachable. --ANK
1789          */
1790         dst_confirm(&rt->dst);
1791
1792         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1793         if (!neigh)
1794                 return;
1795
1796         /* Duplicate redirect: silently ignore. */
1797         old_neigh = rt->n;
1798         if (neigh == old_neigh)
1799                 goto out;
1800
1801         /*
1802          *      We have finally decided to accept it.
1803          */
1804
1805         neigh_update(neigh, lladdr, NUD_STALE,
1806                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1807                      NEIGH_UPDATE_F_OVERRIDE|
1808                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1809                                      NEIGH_UPDATE_F_ISROUTER))
1810                      );
1811
1812         nrt = ip6_rt_copy(rt, dest);
1813         if (!nrt)
1814                 goto out;
1815
1816         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1817         if (on_link)
1818                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1819
1820         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1821         nrt->n = neigh_clone(neigh);
1822
1823         if (ip6_ins_rt(nrt))
1824                 goto out;
1825
1826         netevent.old = &rt->dst;
1827         netevent.old_neigh = old_neigh;
1828         netevent.new = &nrt->dst;
1829         netevent.new_neigh = neigh;
1830         netevent.daddr = dest;
1831         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1832
1833         if (rt->rt6i_flags & RTF_CACHE) {
1834                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1835                 ip6_del_rt(rt);
1836         }
1837
1838 out:
1839         neigh_release(neigh);
1840 }
1841
1842 /*
1843  *      Misc support functions
1844  */
1845
1846 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1847                                     const struct in6_addr *dest)
1848 {
1849         struct net *net = dev_net(ort->dst.dev);
1850         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1851                                             ort->rt6i_table);
1852
1853         if (rt) {
1854                 rt->dst.input = ort->dst.input;
1855                 rt->dst.output = ort->dst.output;
1856                 rt->dst.flags |= DST_HOST;
1857
1858                 rt->rt6i_dst.addr = *dest;
1859                 rt->rt6i_dst.plen = 128;
1860                 dst_copy_metrics(&rt->dst, &ort->dst);
1861                 rt->dst.error = ort->dst.error;
1862                 rt->rt6i_idev = ort->rt6i_idev;
1863                 if (rt->rt6i_idev)
1864                         in6_dev_hold(rt->rt6i_idev);
1865                 rt->dst.lastuse = jiffies;
1866
1867                 rt->rt6i_gateway = ort->rt6i_gateway;
1868                 rt->rt6i_flags = ort->rt6i_flags;
1869                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1870                     (RTF_DEFAULT | RTF_ADDRCONF))
1871                         rt6_set_from(rt, ort);
1872                 else
1873                         rt6_clean_expires(rt);
1874                 rt->rt6i_metric = 0;
1875
1876 #ifdef CONFIG_IPV6_SUBTREES
1877                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1878 #endif
1879                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1880                 rt->rt6i_table = ort->rt6i_table;
1881         }
1882         return rt;
1883 }
1884
1885 #ifdef CONFIG_IPV6_ROUTE_INFO
1886 static struct rt6_info *rt6_get_route_info(struct net *net,
1887                                            const struct in6_addr *prefix, int prefixlen,
1888                                            const struct in6_addr *gwaddr, int ifindex)
1889 {
1890         struct fib6_node *fn;
1891         struct rt6_info *rt = NULL;
1892         struct fib6_table *table;
1893
1894         table = fib6_get_table(net, RT6_TABLE_INFO);
1895         if (!table)
1896                 return NULL;
1897
1898         read_lock_bh(&table->tb6_lock);
1899         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1900         if (!fn)
1901                 goto out;
1902
1903         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1904                 if (rt->dst.dev->ifindex != ifindex)
1905                         continue;
1906                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1907                         continue;
1908                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1909                         continue;
1910                 dst_hold(&rt->dst);
1911                 break;
1912         }
1913 out:
1914         read_unlock_bh(&table->tb6_lock);
1915         return rt;
1916 }
1917
1918 static struct rt6_info *rt6_add_route_info(struct net *net,
1919                                            const struct in6_addr *prefix, int prefixlen,
1920                                            const struct in6_addr *gwaddr, int ifindex,
1921                                            unsigned int pref)
1922 {
1923         struct fib6_config cfg = {
1924                 .fc_table       = RT6_TABLE_INFO,
1925                 .fc_metric      = IP6_RT_PRIO_USER,
1926                 .fc_ifindex     = ifindex,
1927                 .fc_dst_len     = prefixlen,
1928                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1929                                   RTF_UP | RTF_PREF(pref),
1930                 .fc_nlinfo.portid = 0,
1931                 .fc_nlinfo.nlh = NULL,
1932                 .fc_nlinfo.nl_net = net,
1933         };
1934
1935         cfg.fc_dst = *prefix;
1936         cfg.fc_gateway = *gwaddr;
1937
1938         /* We should treat it as a default route if prefix length is 0. */
1939         if (!prefixlen)
1940                 cfg.fc_flags |= RTF_DEFAULT;
1941
1942         ip6_route_add(&cfg);
1943
1944         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1945 }
1946 #endif
1947
1948 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1949 {
1950         struct rt6_info *rt;
1951         struct fib6_table *table;
1952
1953         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1954         if (!table)
1955                 return NULL;
1956
1957         read_lock_bh(&table->tb6_lock);
1958         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1959                 if (dev == rt->dst.dev &&
1960                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1961                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1962                         break;
1963         }
1964         if (rt)
1965                 dst_hold(&rt->dst);
1966         read_unlock_bh(&table->tb6_lock);
1967         return rt;
1968 }
1969
1970 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1971                                      struct net_device *dev,
1972                                      unsigned int pref)
1973 {
1974         struct fib6_config cfg = {
1975                 .fc_table       = RT6_TABLE_DFLT,
1976                 .fc_metric      = IP6_RT_PRIO_USER,
1977                 .fc_ifindex     = dev->ifindex,
1978                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1979                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1980                 .fc_nlinfo.portid = 0,
1981                 .fc_nlinfo.nlh = NULL,
1982                 .fc_nlinfo.nl_net = dev_net(dev),
1983         };
1984
1985         cfg.fc_gateway = *gwaddr;
1986
1987         ip6_route_add(&cfg);
1988
1989         return rt6_get_dflt_router(gwaddr, dev);
1990 }
1991
1992 void rt6_purge_dflt_routers(struct net *net)
1993 {
1994         struct rt6_info *rt;
1995         struct fib6_table *table;
1996
1997         /* NOTE: Keep consistent with rt6_get_dflt_router */
1998         table = fib6_get_table(net, RT6_TABLE_DFLT);
1999         if (!table)
2000                 return;
2001
2002 restart:
2003         read_lock_bh(&table->tb6_lock);
2004         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2005                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2006                         dst_hold(&rt->dst);
2007                         read_unlock_bh(&table->tb6_lock);
2008                         ip6_del_rt(rt);
2009                         goto restart;
2010                 }
2011         }
2012         read_unlock_bh(&table->tb6_lock);
2013 }
2014
2015 static void rtmsg_to_fib6_config(struct net *net,
2016                                  struct in6_rtmsg *rtmsg,
2017                                  struct fib6_config *cfg)
2018 {
2019         memset(cfg, 0, sizeof(*cfg));
2020
2021         cfg->fc_table = RT6_TABLE_MAIN;
2022         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2023         cfg->fc_metric = rtmsg->rtmsg_metric;
2024         cfg->fc_expires = rtmsg->rtmsg_info;
2025         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2026         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2027         cfg->fc_flags = rtmsg->rtmsg_flags;
2028
2029         cfg->fc_nlinfo.nl_net = net;
2030
2031         cfg->fc_dst = rtmsg->rtmsg_dst;
2032         cfg->fc_src = rtmsg->rtmsg_src;
2033         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2034 }
2035
2036 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2037 {
2038         struct fib6_config cfg;
2039         struct in6_rtmsg rtmsg;
2040         int err;
2041
2042         switch(cmd) {
2043         case SIOCADDRT:         /* Add a route */
2044         case SIOCDELRT:         /* Delete a route */
2045                 if (!capable(CAP_NET_ADMIN))
2046                         return -EPERM;
2047                 err = copy_from_user(&rtmsg, arg,
2048                                      sizeof(struct in6_rtmsg));
2049                 if (err)
2050                         return -EFAULT;
2051
2052                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2053
2054                 rtnl_lock();
2055                 switch (cmd) {
2056                 case SIOCADDRT:
2057                         err = ip6_route_add(&cfg);
2058                         break;
2059                 case SIOCDELRT:
2060                         err = ip6_route_del(&cfg);
2061                         break;
2062                 default:
2063                         err = -EINVAL;
2064                 }
2065                 rtnl_unlock();
2066
2067                 return err;
2068         }
2069
2070         return -EINVAL;
2071 }
2072
2073 /*
2074  *      Drop the packet on the floor
2075  */
2076
2077 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2078 {
2079         int type;
2080         struct dst_entry *dst = skb_dst(skb);
2081         switch (ipstats_mib_noroutes) {
2082         case IPSTATS_MIB_INNOROUTES:
2083                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2084                 if (type == IPV6_ADDR_ANY) {
2085                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086                                       IPSTATS_MIB_INADDRERRORS);
2087                         break;
2088                 }
2089                 /* FALLTHROUGH */
2090         case IPSTATS_MIB_OUTNOROUTES:
2091                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2092                               ipstats_mib_noroutes);
2093                 break;
2094         }
2095         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2096         kfree_skb(skb);
2097         return 0;
2098 }
2099
2100 static int ip6_pkt_discard(struct sk_buff *skb)
2101 {
2102         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2103 }
2104
2105 static int ip6_pkt_discard_out(struct sk_buff *skb)
2106 {
2107         skb->dev = skb_dst(skb)->dev;
2108         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2109 }
2110
2111 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2112
2113 static int ip6_pkt_prohibit(struct sk_buff *skb)
2114 {
2115         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2116 }
2117
2118 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2119 {
2120         skb->dev = skb_dst(skb)->dev;
2121         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2122 }
2123
2124 #endif
2125
2126 /*
2127  *      Allocate a dst for local (unicast / anycast) address.
2128  */
2129
2130 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2131                                     const struct in6_addr *addr,
2132                                     bool anycast)
2133 {
2134         struct net *net = dev_net(idev->dev);
2135         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2136         int err;
2137
2138         if (!rt) {
2139                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2140                 return ERR_PTR(-ENOMEM);
2141         }
2142
2143         in6_dev_hold(idev);
2144
2145         rt->dst.flags |= DST_HOST;
2146         rt->dst.input = ip6_input;
2147         rt->dst.output = ip6_output;
2148         rt->rt6i_idev = idev;
2149
2150         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2151         if (anycast)
2152                 rt->rt6i_flags |= RTF_ANYCAST;
2153         else
2154                 rt->rt6i_flags |= RTF_LOCAL;
2155         err = rt6_bind_neighbour(rt, rt->dst.dev);
2156         if (err) {
2157                 dst_free(&rt->dst);
2158                 return ERR_PTR(err);
2159         }
2160
2161         rt->rt6i_dst.addr = *addr;
2162         rt->rt6i_dst.plen = 128;
2163         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2164
2165         atomic_set(&rt->dst.__refcnt, 1);
2166
2167         return rt;
2168 }
2169
2170 int ip6_route_get_saddr(struct net *net,
2171                         struct rt6_info *rt,
2172                         const struct in6_addr *daddr,
2173                         unsigned int prefs,
2174                         struct in6_addr *saddr)
2175 {
2176         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2177         int err = 0;
2178         if (rt->rt6i_prefsrc.plen)
2179                 *saddr = rt->rt6i_prefsrc.addr;
2180         else
2181                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2182                                          daddr, prefs, saddr);
2183         return err;
2184 }
2185
2186 /* remove deleted ip from prefsrc entries */
2187 struct arg_dev_net_ip {
2188         struct net_device *dev;
2189         struct net *net;
2190         struct in6_addr *addr;
2191 };
2192
2193 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2194 {
2195         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2196         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2197         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2198
2199         if (((void *)rt->dst.dev == dev || !dev) &&
2200             rt != net->ipv6.ip6_null_entry &&
2201             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2202                 /* remove prefsrc entry */
2203                 rt->rt6i_prefsrc.plen = 0;
2204         }
2205         return 0;
2206 }
2207
2208 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2209 {
2210         struct net *net = dev_net(ifp->idev->dev);
2211         struct arg_dev_net_ip adni = {
2212                 .dev = ifp->idev->dev,
2213                 .net = net,
2214                 .addr = &ifp->addr,
2215         };
2216         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2217 }
2218
2219 struct arg_dev_net {
2220         struct net_device *dev;
2221         struct net *net;
2222 };
2223
2224 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2225 {
2226         const struct arg_dev_net *adn = arg;
2227         const struct net_device *dev = adn->dev;
2228
2229         if ((rt->dst.dev == dev || !dev) &&
2230             rt != adn->net->ipv6.ip6_null_entry)
2231                 return -1;
2232
2233         return 0;
2234 }
2235
2236 void rt6_ifdown(struct net *net, struct net_device *dev)
2237 {
2238         struct arg_dev_net adn = {
2239                 .dev = dev,
2240                 .net = net,
2241         };
2242
2243         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2244         icmp6_clean_all(fib6_ifdown, &adn);
2245 }
2246
2247 struct rt6_mtu_change_arg {
2248         struct net_device *dev;
2249         unsigned int mtu;
2250 };
2251
2252 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2253 {
2254         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2255         struct inet6_dev *idev;
2256
2257         /* In IPv6 pmtu discovery is not optional,
2258            so that RTAX_MTU lock cannot disable it.
2259            We still use this lock to block changes
2260            caused by addrconf/ndisc.
2261         */
2262
2263         idev = __in6_dev_get(arg->dev);
2264         if (!idev)
2265                 return 0;
2266
2267         /* For administrative MTU increase, there is no way to discover
2268            IPv6 PMTU increase, so PMTU increase should be updated here.
2269            Since RFC 1981 doesn't include administrative MTU increase
2270            update PMTU increase is a MUST. (i.e. jumbo frame)
2271          */
2272         /*
2273            If new MTU is less than route PMTU, this new MTU will be the
2274            lowest MTU in the path, update the route PMTU to reflect PMTU
2275            decreases; if new MTU is greater than route PMTU, and the
2276            old MTU is the lowest MTU in the path, update the route PMTU
2277            to reflect the increase. In this case if the other nodes' MTU
2278            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2279            PMTU discouvery.
2280          */
2281         if (rt->dst.dev == arg->dev &&
2282             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2283             (dst_mtu(&rt->dst) >= arg->mtu ||
2284              (dst_mtu(&rt->dst) < arg->mtu &&
2285               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2286                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2287         }
2288         return 0;
2289 }
2290
2291 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2292 {
2293         struct rt6_mtu_change_arg arg = {
2294                 .dev = dev,
2295                 .mtu = mtu,
2296         };
2297
2298         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2299 }
2300
2301 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2302         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2303         [RTA_OIF]               = { .type = NLA_U32 },
2304         [RTA_IIF]               = { .type = NLA_U32 },
2305         [RTA_PRIORITY]          = { .type = NLA_U32 },
2306         [RTA_METRICS]           = { .type = NLA_NESTED },
2307         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2308 };
2309
2310 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2311                               struct fib6_config *cfg)
2312 {
2313         struct rtmsg *rtm;
2314         struct nlattr *tb[RTA_MAX+1];
2315         int err;
2316
2317         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2318         if (err < 0)
2319                 goto errout;
2320
2321         err = -EINVAL;
2322         rtm = nlmsg_data(nlh);
2323         memset(cfg, 0, sizeof(*cfg));
2324
2325         cfg->fc_table = rtm->rtm_table;
2326         cfg->fc_dst_len = rtm->rtm_dst_len;
2327         cfg->fc_src_len = rtm->rtm_src_len;
2328         cfg->fc_flags = RTF_UP;
2329         cfg->fc_protocol = rtm->rtm_protocol;
2330         cfg->fc_type = rtm->rtm_type;
2331
2332         if (rtm->rtm_type == RTN_UNREACHABLE ||
2333             rtm->rtm_type == RTN_BLACKHOLE ||
2334             rtm->rtm_type == RTN_PROHIBIT ||
2335             rtm->rtm_type == RTN_THROW)
2336                 cfg->fc_flags |= RTF_REJECT;
2337
2338         if (rtm->rtm_type == RTN_LOCAL)
2339                 cfg->fc_flags |= RTF_LOCAL;
2340
2341         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2342         cfg->fc_nlinfo.nlh = nlh;
2343         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2344
2345         if (tb[RTA_GATEWAY]) {
2346                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2347                 cfg->fc_flags |= RTF_GATEWAY;
2348         }
2349
2350         if (tb[RTA_DST]) {
2351                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2352
2353                 if (nla_len(tb[RTA_DST]) < plen)
2354                         goto errout;
2355
2356                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2357         }
2358
2359         if (tb[RTA_SRC]) {
2360                 int plen = (rtm->rtm_src_len + 7) >> 3;
2361
2362                 if (nla_len(tb[RTA_SRC]) < plen)
2363                         goto errout;
2364
2365                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2366         }
2367
2368         if (tb[RTA_PREFSRC])
2369                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2370
2371         if (tb[RTA_OIF])
2372                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2373
2374         if (tb[RTA_PRIORITY])
2375                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2376
2377         if (tb[RTA_METRICS]) {
2378                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2379                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2380         }
2381
2382         if (tb[RTA_TABLE])
2383                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2384
2385         if (tb[RTA_MULTIPATH]) {
2386                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2387                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2388         }
2389
2390         err = 0;
2391 errout:
2392         return err;
2393 }
2394
2395 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2396 {
2397         struct fib6_config r_cfg;
2398         struct rtnexthop *rtnh;
2399         int remaining;
2400         int attrlen;
2401         int err = 0, last_err = 0;
2402
2403 beginning:
2404         rtnh = (struct rtnexthop *)cfg->fc_mp;
2405         remaining = cfg->fc_mp_len;
2406
2407         /* Parse a Multipath Entry */
2408         while (rtnh_ok(rtnh, remaining)) {
2409                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2410                 if (rtnh->rtnh_ifindex)
2411                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2412
2413                 attrlen = rtnh_attrlen(rtnh);
2414                 if (attrlen > 0) {
2415                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2416
2417                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2418                         if (nla) {
2419                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2420                                 r_cfg.fc_flags |= RTF_GATEWAY;
2421                         }
2422                 }
2423                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2424                 if (err) {
2425                         last_err = err;
2426                         /* If we are trying to remove a route, do not stop the
2427                          * loop when ip6_route_del() fails (because next hop is
2428                          * already gone), we should try to remove all next hops.
2429                          */
2430                         if (add) {
2431                                 /* If add fails, we should try to delete all
2432                                  * next hops that have been already added.
2433                                  */
2434                                 add = 0;
2435                                 goto beginning;
2436                         }
2437                 }
2438                 /* Because each route is added like a single route we remove
2439                  * this flag after the first nexthop (if there is a collision,
2440                  * we have already fail to add the first nexthop:
2441                  * fib6_add_rt2node() has reject it).
2442                  */
2443                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2444                 rtnh = rtnh_next(rtnh, &remaining);
2445         }
2446
2447         return last_err;
2448 }
2449
2450 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2451 {
2452         struct fib6_config cfg;
2453         int err;
2454
2455         err = rtm_to_fib6_config(skb, nlh, &cfg);
2456         if (err < 0)
2457                 return err;
2458
2459         if (cfg.fc_mp)
2460                 return ip6_route_multipath(&cfg, 0);
2461         else
2462                 return ip6_route_del(&cfg);
2463 }
2464
2465 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2466 {
2467         struct fib6_config cfg;
2468         int err;
2469
2470         err = rtm_to_fib6_config(skb, nlh, &cfg);
2471         if (err < 0)
2472                 return err;
2473
2474         if (cfg.fc_mp)
2475                 return ip6_route_multipath(&cfg, 1);
2476         else
2477                 return ip6_route_add(&cfg);
2478 }
2479
2480 static inline size_t rt6_nlmsg_size(void)
2481 {
2482         return NLMSG_ALIGN(sizeof(struct rtmsg))
2483                + nla_total_size(16) /* RTA_SRC */
2484                + nla_total_size(16) /* RTA_DST */
2485                + nla_total_size(16) /* RTA_GATEWAY */
2486                + nla_total_size(16) /* RTA_PREFSRC */
2487                + nla_total_size(4) /* RTA_TABLE */
2488                + nla_total_size(4) /* RTA_IIF */
2489                + nla_total_size(4) /* RTA_OIF */
2490                + nla_total_size(4) /* RTA_PRIORITY */
2491                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2492                + nla_total_size(sizeof(struct rta_cacheinfo));
2493 }
2494
2495 static int rt6_fill_node(struct net *net,
2496                          struct sk_buff *skb, struct rt6_info *rt,
2497                          struct in6_addr *dst, struct in6_addr *src,
2498                          int iif, int type, u32 portid, u32 seq,
2499                          int prefix, int nowait, unsigned int flags)
2500 {
2501         struct rtmsg *rtm;
2502         struct nlmsghdr *nlh;
2503         long expires;
2504         u32 table;
2505         struct neighbour *n;
2506
2507         if (prefix) {   /* user wants prefix routes only */
2508                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2509                         /* success since this is not a prefix route */
2510                         return 1;
2511                 }
2512         }
2513
2514         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2515         if (!nlh)
2516                 return -EMSGSIZE;
2517
2518         rtm = nlmsg_data(nlh);
2519         rtm->rtm_family = AF_INET6;
2520         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2521         rtm->rtm_src_len = rt->rt6i_src.plen;
2522         rtm->rtm_tos = 0;
2523         if (rt->rt6i_table)
2524                 table = rt->rt6i_table->tb6_id;
2525         else
2526                 table = RT6_TABLE_UNSPEC;
2527         rtm->rtm_table = table;
2528         if (nla_put_u32(skb, RTA_TABLE, table))
2529                 goto nla_put_failure;
2530         if (rt->rt6i_flags & RTF_REJECT) {
2531                 switch (rt->dst.error) {
2532                 case -EINVAL:
2533                         rtm->rtm_type = RTN_BLACKHOLE;
2534                         break;
2535                 case -EACCES:
2536                         rtm->rtm_type = RTN_PROHIBIT;
2537                         break;
2538                 case -EAGAIN:
2539                         rtm->rtm_type = RTN_THROW;
2540                         break;
2541                 default:
2542                         rtm->rtm_type = RTN_UNREACHABLE;
2543                         break;
2544                 }
2545         }
2546         else if (rt->rt6i_flags & RTF_LOCAL)
2547                 rtm->rtm_type = RTN_LOCAL;
2548         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2549                 rtm->rtm_type = RTN_LOCAL;
2550         else
2551                 rtm->rtm_type = RTN_UNICAST;
2552         rtm->rtm_flags = 0;
2553         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2554         rtm->rtm_protocol = rt->rt6i_protocol;
2555         if (rt->rt6i_flags & RTF_DYNAMIC)
2556                 rtm->rtm_protocol = RTPROT_REDIRECT;
2557         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2558                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2559                         rtm->rtm_protocol = RTPROT_RA;
2560                 else
2561                         rtm->rtm_protocol = RTPROT_KERNEL;
2562         }
2563
2564         if (rt->rt6i_flags & RTF_CACHE)
2565                 rtm->rtm_flags |= RTM_F_CLONED;
2566
2567         if (dst) {
2568                 if (nla_put(skb, RTA_DST, 16, dst))
2569                         goto nla_put_failure;
2570                 rtm->rtm_dst_len = 128;
2571         } else if (rtm->rtm_dst_len)
2572                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2573                         goto nla_put_failure;
2574 #ifdef CONFIG_IPV6_SUBTREES
2575         if (src) {
2576                 if (nla_put(skb, RTA_SRC, 16, src))
2577                         goto nla_put_failure;
2578                 rtm->rtm_src_len = 128;
2579         } else if (rtm->rtm_src_len &&
2580                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2581                 goto nla_put_failure;
2582 #endif
2583         if (iif) {
2584 #ifdef CONFIG_IPV6_MROUTE
2585                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2586                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2587                         if (err <= 0) {
2588                                 if (!nowait) {
2589                                         if (err == 0)
2590                                                 return 0;
2591                                         goto nla_put_failure;
2592                                 } else {
2593                                         if (err == -EMSGSIZE)
2594                                                 goto nla_put_failure;
2595                                 }
2596                         }
2597                 } else
2598 #endif
2599                         if (nla_put_u32(skb, RTA_IIF, iif))
2600                                 goto nla_put_failure;
2601         } else if (dst) {
2602                 struct in6_addr saddr_buf;
2603                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2604                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2605                         goto nla_put_failure;
2606         }
2607
2608         if (rt->rt6i_prefsrc.plen) {
2609                 struct in6_addr saddr_buf;
2610                 saddr_buf = rt->rt6i_prefsrc.addr;
2611                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2612                         goto nla_put_failure;
2613         }
2614
2615         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2616                 goto nla_put_failure;
2617
2618         n = rt->n;
2619         if (n) {
2620                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2621                         goto nla_put_failure;
2622         }
2623
2624         if (rt->dst.dev &&
2625             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2626                 goto nla_put_failure;
2627         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2628                 goto nla_put_failure;
2629
2630         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2631
2632         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2633                 goto nla_put_failure;
2634
2635         return nlmsg_end(skb, nlh);
2636
2637 nla_put_failure:
2638         nlmsg_cancel(skb, nlh);
2639         return -EMSGSIZE;
2640 }
2641
2642 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2643 {
2644         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2645         int prefix;
2646
2647         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2648                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2649                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2650         } else
2651                 prefix = 0;
2652
2653         return rt6_fill_node(arg->net,
2654                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2655                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2656                      prefix, 0, NLM_F_MULTI);
2657 }
2658
2659 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660 {
2661         struct net *net = sock_net(in_skb->sk);
2662         struct nlattr *tb[RTA_MAX+1];
2663         struct rt6_info *rt;
2664         struct sk_buff *skb;
2665         struct rtmsg *rtm;
2666         struct flowi6 fl6;
2667         int err, iif = 0, oif = 0;
2668
2669         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2670         if (err < 0)
2671                 goto errout;
2672
2673         err = -EINVAL;
2674         memset(&fl6, 0, sizeof(fl6));
2675
2676         if (tb[RTA_SRC]) {
2677                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2678                         goto errout;
2679
2680                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2681         }
2682
2683         if (tb[RTA_DST]) {
2684                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2685                         goto errout;
2686
2687                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2688         }
2689
2690         if (tb[RTA_IIF])
2691                 iif = nla_get_u32(tb[RTA_IIF]);
2692
2693         if (tb[RTA_OIF])
2694                 oif = nla_get_u32(tb[RTA_OIF]);
2695
2696         if (iif) {
2697                 struct net_device *dev;
2698                 int flags = 0;
2699
2700                 dev = __dev_get_by_index(net, iif);
2701                 if (!dev) {
2702                         err = -ENODEV;
2703                         goto errout;
2704                 }
2705
2706                 fl6.flowi6_iif = iif;
2707
2708                 if (!ipv6_addr_any(&fl6.saddr))
2709                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2710
2711                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2712                                                                flags);
2713         } else {
2714                 fl6.flowi6_oif = oif;
2715
2716                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2717         }
2718
2719         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2720         if (!skb) {
2721                 ip6_rt_put(rt);
2722                 err = -ENOBUFS;
2723                 goto errout;
2724         }
2725
2726         /* Reserve room for dummy headers, this skb can pass
2727            through good chunk of routing engine.
2728          */
2729         skb_reset_mac_header(skb);
2730         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2731
2732         skb_dst_set(skb, &rt->dst);
2733
2734         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2735                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2736                             nlh->nlmsg_seq, 0, 0, 0);
2737         if (err < 0) {
2738                 kfree_skb(skb);
2739                 goto errout;
2740         }
2741
2742         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2743 errout:
2744         return err;
2745 }
2746
2747 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2748 {
2749         struct sk_buff *skb;
2750         struct net *net = info->nl_net;
2751         u32 seq;
2752         int err;
2753
2754         err = -ENOBUFS;
2755         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2756
2757         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2758         if (!skb)
2759                 goto errout;
2760
2761         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2762                                 event, info->portid, seq, 0, 0, 0);
2763         if (err < 0) {
2764                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2765                 WARN_ON(err == -EMSGSIZE);
2766                 kfree_skb(skb);
2767                 goto errout;
2768         }
2769         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2770                     info->nlh, gfp_any());
2771         return;
2772 errout:
2773         if (err < 0)
2774                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2775 }
2776
2777 static int ip6_route_dev_notify(struct notifier_block *this,
2778                                 unsigned long event, void *data)
2779 {
2780         struct net_device *dev = (struct net_device *)data;
2781         struct net *net = dev_net(dev);
2782
2783         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2784                 net->ipv6.ip6_null_entry->dst.dev = dev;
2785                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2786 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2787                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2788                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2789                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2790                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2791 #endif
2792         }
2793
2794         return NOTIFY_OK;
2795 }
2796
2797 /*
2798  *      /proc
2799  */
2800
2801 #ifdef CONFIG_PROC_FS
2802
2803 struct rt6_proc_arg
2804 {
2805         char *buffer;
2806         int offset;
2807         int length;
2808         int skip;
2809         int len;
2810 };
2811
2812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2813 {
2814         struct seq_file *m = p_arg;
2815         struct neighbour *n;
2816
2817         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2818
2819 #ifdef CONFIG_IPV6_SUBTREES
2820         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2821 #else
2822         seq_puts(m, "00000000000000000000000000000000 00 ");
2823 #endif
2824         n = rt->n;
2825         if (n) {
2826                 seq_printf(m, "%pi6", n->primary_key);
2827         } else {
2828                 seq_puts(m, "00000000000000000000000000000000");
2829         }
2830         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2831                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2832                    rt->dst.__use, rt->rt6i_flags,
2833                    rt->dst.dev ? rt->dst.dev->name : "");
2834         return 0;
2835 }
2836
2837 static int ipv6_route_show(struct seq_file *m, void *v)
2838 {
2839         struct net *net = (struct net *)m->private;
2840         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2841         return 0;
2842 }
2843
2844 static int ipv6_route_open(struct inode *inode, struct file *file)
2845 {
2846         return single_open_net(inode, file, ipv6_route_show);
2847 }
2848
2849 static const struct file_operations ipv6_route_proc_fops = {
2850         .owner          = THIS_MODULE,
2851         .open           = ipv6_route_open,
2852         .read           = seq_read,
2853         .llseek         = seq_lseek,
2854         .release        = single_release_net,
2855 };
2856
2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858 {
2859         struct net *net = (struct net *)seq->private;
2860         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861                    net->ipv6.rt6_stats->fib_nodes,
2862                    net->ipv6.rt6_stats->fib_route_nodes,
2863                    net->ipv6.rt6_stats->fib_rt_alloc,
2864                    net->ipv6.rt6_stats->fib_rt_entries,
2865                    net->ipv6.rt6_stats->fib_rt_cache,
2866                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867                    net->ipv6.rt6_stats->fib_discarded_routes);
2868
2869         return 0;
2870 }
2871
2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873 {
2874         return single_open_net(inode, file, rt6_stats_seq_show);
2875 }
2876
2877 static const struct file_operations rt6_stats_seq_fops = {
2878         .owner   = THIS_MODULE,
2879         .open    = rt6_stats_seq_open,
2880         .read    = seq_read,
2881         .llseek  = seq_lseek,
2882         .release = single_release_net,
2883 };
2884 #endif  /* CONFIG_PROC_FS */
2885
2886 #ifdef CONFIG_SYSCTL
2887
2888 static
2889 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2890                               void __user *buffer, size_t *lenp, loff_t *ppos)
2891 {
2892         struct net *net;
2893         int delay;
2894         if (!write)
2895                 return -EINVAL;
2896
2897         net = (struct net *)ctl->extra1;
2898         delay = net->ipv6.sysctl.flush_delay;
2899         proc_dointvec(ctl, write, buffer, lenp, ppos);
2900         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2901         return 0;
2902 }
2903
2904 ctl_table ipv6_route_table_template[] = {
2905         {
2906                 .procname       =       "flush",
2907                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2908                 .maxlen         =       sizeof(int),
2909                 .mode           =       0200,
2910                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2911         },
2912         {
2913                 .procname       =       "gc_thresh",
2914                 .data           =       &ip6_dst_ops_template.gc_thresh,
2915                 .maxlen         =       sizeof(int),
2916                 .mode           =       0644,
2917                 .proc_handler   =       proc_dointvec,
2918         },
2919         {
2920                 .procname       =       "max_size",
2921                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2922                 .maxlen         =       sizeof(int),
2923                 .mode           =       0644,
2924                 .proc_handler   =       proc_dointvec,
2925         },
2926         {
2927                 .procname       =       "gc_min_interval",
2928                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929                 .maxlen         =       sizeof(int),
2930                 .mode           =       0644,
2931                 .proc_handler   =       proc_dointvec_jiffies,
2932         },
2933         {
2934                 .procname       =       "gc_timeout",
2935                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936                 .maxlen         =       sizeof(int),
2937                 .mode           =       0644,
2938                 .proc_handler   =       proc_dointvec_jiffies,
2939         },
2940         {
2941                 .procname       =       "gc_interval",
2942                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943                 .maxlen         =       sizeof(int),
2944                 .mode           =       0644,
2945                 .proc_handler   =       proc_dointvec_jiffies,
2946         },
2947         {
2948                 .procname       =       "gc_elasticity",
2949                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950                 .maxlen         =       sizeof(int),
2951                 .mode           =       0644,
2952                 .proc_handler   =       proc_dointvec,
2953         },
2954         {
2955                 .procname       =       "mtu_expires",
2956                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957                 .maxlen         =       sizeof(int),
2958                 .mode           =       0644,
2959                 .proc_handler   =       proc_dointvec_jiffies,
2960         },
2961         {
2962                 .procname       =       "min_adv_mss",
2963                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964                 .maxlen         =       sizeof(int),
2965                 .mode           =       0644,
2966                 .proc_handler   =       proc_dointvec,
2967         },
2968         {
2969                 .procname       =       "gc_min_interval_ms",
2970                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971                 .maxlen         =       sizeof(int),
2972                 .mode           =       0644,
2973                 .proc_handler   =       proc_dointvec_ms_jiffies,
2974         },
2975         { }
2976 };
2977
2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2979 {
2980         struct ctl_table *table;
2981
2982         table = kmemdup(ipv6_route_table_template,
2983                         sizeof(ipv6_route_table_template),
2984                         GFP_KERNEL);
2985
2986         if (table) {
2987                 table[0].data = &net->ipv6.sysctl.flush_delay;
2988                 table[0].extra1 = net;
2989                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2998         }
2999
3000         return table;
3001 }
3002 #endif
3003
3004 static int __net_init ip6_route_net_init(struct net *net)
3005 {
3006         int ret = -ENOMEM;
3007
3008         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3009                sizeof(net->ipv6.ip6_dst_ops));
3010
3011         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3012                 goto out_ip6_dst_ops;
3013
3014         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3015                                            sizeof(*net->ipv6.ip6_null_entry),
3016                                            GFP_KERNEL);
3017         if (!net->ipv6.ip6_null_entry)
3018                 goto out_ip6_dst_entries;
3019         net->ipv6.ip6_null_entry->dst.path =
3020                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3021         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3022         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3023                          ip6_template_metrics, true);
3024
3025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3026         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3027                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3028                                                GFP_KERNEL);
3029         if (!net->ipv6.ip6_prohibit_entry)
3030                 goto out_ip6_null_entry;
3031         net->ipv6.ip6_prohibit_entry->dst.path =
3032                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3033         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3034         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3035                          ip6_template_metrics, true);
3036
3037         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3038                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3039                                                GFP_KERNEL);
3040         if (!net->ipv6.ip6_blk_hole_entry)
3041                 goto out_ip6_prohibit_entry;
3042         net->ipv6.ip6_blk_hole_entry->dst.path =
3043                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3044         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3045         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3046                          ip6_template_metrics, true);
3047 #endif
3048
3049         net->ipv6.sysctl.flush_delay = 0;
3050         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3051         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3052         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3053         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3054         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3055         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3056         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3057
3058         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3059
3060         ret = 0;
3061 out:
3062         return ret;
3063
3064 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3065 out_ip6_prohibit_entry:
3066         kfree(net->ipv6.ip6_prohibit_entry);
3067 out_ip6_null_entry:
3068         kfree(net->ipv6.ip6_null_entry);
3069 #endif
3070 out_ip6_dst_entries:
3071         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3072 out_ip6_dst_ops:
3073         goto out;
3074 }
3075
3076 static void __net_exit ip6_route_net_exit(struct net *net)
3077 {
3078         kfree(net->ipv6.ip6_null_entry);
3079 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3080         kfree(net->ipv6.ip6_prohibit_entry);
3081         kfree(net->ipv6.ip6_blk_hole_entry);
3082 #endif
3083         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3084 }
3085
3086 static int __net_init ip6_route_net_init_late(struct net *net)
3087 {
3088 #ifdef CONFIG_PROC_FS
3089         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3090         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3091 #endif
3092         return 0;
3093 }
3094
3095 static void __net_exit ip6_route_net_exit_late(struct net *net)
3096 {
3097 #ifdef CONFIG_PROC_FS
3098         proc_net_remove(net, "ipv6_route");
3099         proc_net_remove(net, "rt6_stats");
3100 #endif
3101 }
3102
3103 static struct pernet_operations ip6_route_net_ops = {
3104         .init = ip6_route_net_init,
3105         .exit = ip6_route_net_exit,
3106 };
3107
3108 static int __net_init ipv6_inetpeer_init(struct net *net)
3109 {
3110         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3111
3112         if (!bp)
3113                 return -ENOMEM;
3114         inet_peer_base_init(bp);
3115         net->ipv6.peers = bp;
3116         return 0;
3117 }
3118
3119 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3120 {
3121         struct inet_peer_base *bp = net->ipv6.peers;
3122
3123         net->ipv6.peers = NULL;
3124         inetpeer_invalidate_tree(bp);
3125         kfree(bp);
3126 }
3127
3128 static struct pernet_operations ipv6_inetpeer_ops = {
3129         .init   =       ipv6_inetpeer_init,
3130         .exit   =       ipv6_inetpeer_exit,
3131 };
3132
3133 static struct pernet_operations ip6_route_net_late_ops = {
3134         .init = ip6_route_net_init_late,
3135         .exit = ip6_route_net_exit_late,
3136 };
3137
3138 static struct notifier_block ip6_route_dev_notifier = {
3139         .notifier_call = ip6_route_dev_notify,
3140         .priority = 0,
3141 };
3142
3143 int __init ip6_route_init(void)
3144 {
3145         int ret;
3146
3147         ret = -ENOMEM;
3148         ip6_dst_ops_template.kmem_cachep =
3149                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3150                                   SLAB_HWCACHE_ALIGN, NULL);
3151         if (!ip6_dst_ops_template.kmem_cachep)
3152                 goto out;
3153
3154         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3155         if (ret)
3156                 goto out_kmem_cache;
3157
3158         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3159         if (ret)
3160                 goto out_dst_entries;
3161
3162         ret = register_pernet_subsys(&ip6_route_net_ops);
3163         if (ret)
3164                 goto out_register_inetpeer;
3165
3166         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3167
3168         /* Registering of the loopback is done before this portion of code,
3169          * the loopback reference in rt6_info will not be taken, do it
3170          * manually for init_net */
3171         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3172         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3173   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3174         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3175         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3176         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3177         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3178   #endif
3179         ret = fib6_init();
3180         if (ret)
3181                 goto out_register_subsys;
3182
3183         ret = xfrm6_init();
3184         if (ret)
3185                 goto out_fib6_init;
3186
3187         ret = fib6_rules_init();
3188         if (ret)
3189                 goto xfrm6_init;
3190
3191         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3192         if (ret)
3193                 goto fib6_rules_init;
3194
3195         ret = -ENOBUFS;
3196         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3197             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3198             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3199                 goto out_register_late_subsys;
3200
3201         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3202         if (ret)
3203                 goto out_register_late_subsys;
3204
3205 out:
3206         return ret;
3207
3208 out_register_late_subsys:
3209         unregister_pernet_subsys(&ip6_route_net_late_ops);
3210 fib6_rules_init:
3211         fib6_rules_cleanup();
3212 xfrm6_init:
3213         xfrm6_fini();
3214 out_fib6_init:
3215         fib6_gc_cleanup();
3216 out_register_subsys:
3217         unregister_pernet_subsys(&ip6_route_net_ops);
3218 out_register_inetpeer:
3219         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3220 out_dst_entries:
3221         dst_entries_destroy(&ip6_dst_blackhole_ops);
3222 out_kmem_cache:
3223         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3224         goto out;
3225 }
3226
3227 void ip6_route_cleanup(void)
3228 {
3229         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3230         unregister_pernet_subsys(&ip6_route_net_late_ops);
3231         fib6_rules_cleanup();
3232         xfrm6_fini();
3233         fib6_gc_cleanup();
3234         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3235         unregister_pernet_subsys(&ip6_route_net_ops);
3236         dst_entries_destroy(&ip6_dst_blackhole_ops);
3237         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3238 }