2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
62 #include <asm/uaccess.h>
65 #include <linux/sysctl.h>
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void ip6_dst_destroy(struct dst_entry *);
75 static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
77 static int ip6_dst_gc(struct dst_ops *ops);
79 static int ip6_pkt_discard(struct sk_buff *skb);
80 static int ip6_pkt_discard_out(struct sk_buff *skb);
81 static void ip6_link_failure(struct sk_buff *skb);
82 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
103 if (!(rt->dst.flags & DST_HOST))
106 peer = rt6_get_peer_create(rt);
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
131 struct in6_addr *p = &rt->rt6i_gateway;
133 if (!ipv6_addr_any(p))
134 return (const void *) p;
136 return &ipv6_hdr(skb)->daddr;
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
144 struct rt6_info *rt = (struct rt6_info *) dst;
147 daddr = choose_neigh_daddr(rt, skb, daddr);
148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
151 return neigh_create(&nd_tbl, daddr, dst->dev);
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
167 static struct dst_ops ip6_dst_ops_template = {
169 .protocol = cpu_to_be16(ETH_P_IPV6),
172 .check = ip6_dst_check,
173 .default_advmss = ip6_default_advmss,
175 .cow_metrics = ipv6_cow_metrics,
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
181 .redirect = rt6_do_redirect,
182 .local_out = __ip6_local_out,
183 .neigh_lookup = ip6_neigh_lookup,
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
190 return mtu ? : dst->dev->mtu;
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
209 static struct dst_ops ip6_dst_blackhole_ops = {
211 .protocol = cpu_to_be16(ETH_P_IPV6),
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
214 .mtu = ip6_blackhole_mtu,
215 .default_advmss = ip6_default_advmss,
216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
217 .redirect = ip6_rt_blackhole_redirect,
218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
219 .neigh_lookup = ip6_neigh_lookup,
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223 [RTAX_HOPLIMIT - 1] = 255,
226 static const struct rt6_info ip6_null_entry_template = {
228 .__refcnt = ATOMIC_INIT(1),
230 .obsolete = DST_OBSOLETE_FORCE_CHK,
231 .error = -ENETUNREACH,
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236 .rt6i_protocol = RTPROT_KERNEL,
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
246 static const struct rt6_info ip6_prohibit_entry_template = {
248 .__refcnt = ATOMIC_INIT(1),
250 .obsolete = DST_OBSOLETE_FORCE_CHK,
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256 .rt6i_protocol = RTPROT_KERNEL,
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
261 static const struct rt6_info ip6_blk_hole_entry_template = {
263 .__refcnt = ATOMIC_INIT(1),
265 .obsolete = DST_OBSOLETE_FORCE_CHK,
267 .input = dst_discard,
268 .output = dst_discard,
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
271 .rt6i_protocol = RTPROT_KERNEL,
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280 struct net_device *dev,
282 struct fib6_table *table)
284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285 0, DST_OBSOLETE_FORCE_CHK, flags);
288 struct dst_entry *dst = &rt->dst;
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292 rt->rt6i_genid = rt_genid(net);
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
299 static void ip6_dst_destroy(struct dst_entry *dst)
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
305 neigh_release(rt->n);
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
311 rt->rt6i_idev = NULL;
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
324 void rt6_bind_peer(struct rt6_info *rt, int create)
326 struct inet_peer_base *base;
327 struct inet_peer *peer;
329 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
335 if (!rt6_set_peer(rt, peer))
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
345 struct net_device *loopback_dev =
346 dev_net(dev)->loopback_dev;
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
353 rt->rt6i_idev = loopback_idev;
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
365 static bool rt6_check_expired(const struct rt6_info *rt)
367 if (rt->rt6i_flags & RTF_EXPIRES) {
368 if (time_after(jiffies, rt->dst.expires))
370 } else if (rt->dst.from) {
371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
376 static bool rt6_need_strict(const struct in6_addr *daddr)
378 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
382 /* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
389 unsigned int val = fl6->flowi6_proto;
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
415 /* RFC6438 recommands to use flowlabel */
416 val ^= (__force u32)fl6->flowlabel;
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
426 struct rt6_info *sibling, *next_sibling;
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
437 if (route_choosen == 0) {
446 * Route lookup. Any table->tb6_lock is implied.
449 static inline struct rt6_info *rt6_device_match(struct net *net,
451 const struct in6_addr *saddr,
455 struct rt6_info *local = NULL;
456 struct rt6_info *sprt;
458 if (!oif && ipv6_addr_any(saddr))
461 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462 struct net_device *dev = sprt->dst.dev;
465 if (dev->ifindex == oif)
467 if (dev->flags & IFF_LOOPBACK) {
468 if (!sprt->rt6i_idev ||
469 sprt->rt6i_idev->dev->ifindex != oif) {
470 if (flags & RT6_LOOKUP_F_IFACE && oif)
472 if (local && (!oif ||
473 local->rt6i_idev->dev->ifindex == oif))
479 if (ipv6_chk_addr(net, saddr, dev,
480 flags & RT6_LOOKUP_F_IFACE))
489 if (flags & RT6_LOOKUP_F_IFACE)
490 return net->ipv6.ip6_null_entry;
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
499 struct neighbour *neigh;
501 * Okay, this does not seem to be appropriate
502 * for now, however, we need to check if it
503 * is really so; aka Router Reachability Probing.
505 * Router Reachability Probe MUST be rate-limited
506 * to no more than one per minute.
508 neigh = rt ? rt->n : NULL;
509 if (!neigh || (neigh->nud_state & NUD_VALID))
511 read_lock_bh(&neigh->lock);
512 if (!(neigh->nud_state & NUD_VALID) &&
513 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514 struct in6_addr mcaddr;
515 struct in6_addr *target;
517 neigh->updated = jiffies;
518 read_unlock_bh(&neigh->lock);
520 target = (struct in6_addr *)&neigh->primary_key;
521 addrconf_addr_solict_mult(target, &mcaddr);
522 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
524 read_unlock_bh(&neigh->lock);
528 static inline void rt6_probe(struct rt6_info *rt)
534 * Default Router Selection (RFC 2461 6.3.6)
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
538 struct net_device *dev = rt->dst.dev;
539 if (!oif || dev->ifindex == oif)
541 if ((dev->flags & IFF_LOOPBACK) &&
542 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
547 static inline int rt6_check_neigh(struct rt6_info *rt)
549 struct neighbour *neigh;
553 if (rt->rt6i_flags & RTF_NONEXTHOP ||
554 !(rt->rt6i_flags & RTF_GATEWAY))
557 read_lock_bh(&neigh->lock);
558 if (neigh->nud_state & NUD_VALID)
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561 else if (neigh->nud_state & NUD_FAILED)
566 read_unlock_bh(&neigh->lock);
572 static int rt6_score_route(struct rt6_info *rt, int oif,
577 m = rt6_check_dev(rt, oif);
578 if (!m && (strict & RT6_LOOKUP_F_IFACE))
580 #ifdef CONFIG_IPV6_ROUTER_PREF
581 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
583 n = rt6_check_neigh(rt);
584 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
589 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590 int *mpri, struct rt6_info *match)
594 if (rt6_check_expired(rt))
597 m = rt6_score_route(rt, oif, strict);
602 if (strict & RT6_LOOKUP_F_REACHABLE)
606 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
614 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615 struct rt6_info *rr_head,
616 u32 metric, int oif, int strict)
618 struct rt6_info *rt, *match;
622 for (rt = rr_head; rt && rt->rt6i_metric == metric;
623 rt = rt->dst.rt6_next)
624 match = find_match(rt, oif, strict, &mpri, match);
625 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626 rt = rt->dst.rt6_next)
627 match = find_match(rt, oif, strict, &mpri, match);
632 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
634 struct rt6_info *match, *rt0;
639 fn->rr_ptr = rt0 = fn->leaf;
641 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
644 (strict & RT6_LOOKUP_F_REACHABLE)) {
645 struct rt6_info *next = rt0->dst.rt6_next;
647 /* no entries matched; do round-robin */
648 if (!next || next->rt6i_metric != rt0->rt6i_metric)
655 net = dev_net(rt0->dst.dev);
656 return match ? match : net->ipv6.ip6_null_entry;
659 #ifdef CONFIG_IPV6_ROUTE_INFO
660 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661 const struct in6_addr *gwaddr)
663 struct net *net = dev_net(dev);
664 struct route_info *rinfo = (struct route_info *) opt;
665 struct in6_addr prefix_buf, *prefix;
667 unsigned long lifetime;
670 if (len < sizeof(struct route_info)) {
674 /* Sanity check for prefix_len and length */
675 if (rinfo->length > 3) {
677 } else if (rinfo->prefix_len > 128) {
679 } else if (rinfo->prefix_len > 64) {
680 if (rinfo->length < 2) {
683 } else if (rinfo->prefix_len > 0) {
684 if (rinfo->length < 1) {
689 pref = rinfo->route_pref;
690 if (pref == ICMPV6_ROUTER_PREF_INVALID)
693 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
695 if (rinfo->length == 3)
696 prefix = (struct in6_addr *)rinfo->prefix;
698 /* this function is safe */
699 ipv6_addr_prefix(&prefix_buf,
700 (struct in6_addr *)rinfo->prefix,
702 prefix = &prefix_buf;
705 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
708 if (rt && !lifetime) {
714 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
717 rt->rt6i_flags = RTF_ROUTEINFO |
718 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
721 if (!addrconf_finite_timeout(lifetime))
722 rt6_clean_expires(rt);
724 rt6_set_expires(rt, jiffies + HZ * lifetime);
732 #define BACKTRACK(__net, saddr) \
734 if (rt == __net->ipv6.ip6_null_entry) { \
735 struct fib6_node *pn; \
737 if (fn->fn_flags & RTN_TL_ROOT) \
740 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
744 if (fn->fn_flags & RTN_RTINFO) \
750 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751 struct fib6_table *table,
752 struct flowi6 *fl6, int flags)
754 struct fib6_node *fn;
757 read_lock_bh(&table->tb6_lock);
758 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
761 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763 rt = rt6_multipath_select(rt, fl6);
764 BACKTRACK(net, &fl6->saddr);
766 dst_use(&rt->dst, jiffies);
767 read_unlock_bh(&table->tb6_lock);
772 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
775 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
777 EXPORT_SYMBOL_GPL(ip6_route_lookup);
779 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780 const struct in6_addr *saddr, int oif, int strict)
782 struct flowi6 fl6 = {
786 struct dst_entry *dst;
787 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
790 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791 flags |= RT6_LOOKUP_F_HAS_SADDR;
794 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
796 return (struct rt6_info *) dst;
803 EXPORT_SYMBOL(rt6_lookup);
805 /* ip6_ins_rt is called with FREE table->tb6_lock.
806 It takes new route entry, the addition fails by any reason the
807 route is freed. In any case, if caller does not hold it, it may
811 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
814 struct fib6_table *table;
816 table = rt->rt6i_table;
817 write_lock_bh(&table->tb6_lock);
818 err = fib6_add(&table->tb6_root, rt, info);
819 write_unlock_bh(&table->tb6_lock);
824 int ip6_ins_rt(struct rt6_info *rt)
826 struct nl_info info = {
827 .nl_net = dev_net(rt->dst.dev),
829 return __ip6_ins_rt(rt, &info);
832 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
833 const struct in6_addr *daddr,
834 const struct in6_addr *saddr)
842 rt = ip6_rt_copy(ort, daddr);
845 int attempts = !in_softirq();
847 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848 if (ort->rt6i_dst.plen != 128 &&
849 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850 rt->rt6i_flags |= RTF_ANYCAST;
851 rt->rt6i_gateway = *daddr;
854 rt->rt6i_flags |= RTF_CACHE;
856 #ifdef CONFIG_IPV6_SUBTREES
857 if (rt->rt6i_src.plen && saddr) {
858 rt->rt6i_src.addr = *saddr;
859 rt->rt6i_src.plen = 128;
864 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865 struct net *net = dev_net(rt->dst.dev);
866 int saved_rt_min_interval =
867 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868 int saved_rt_elasticity =
869 net->ipv6.sysctl.ip6_rt_gc_elasticity;
871 if (attempts-- > 0) {
872 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
875 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
877 net->ipv6.sysctl.ip6_rt_gc_elasticity =
879 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880 saved_rt_min_interval;
884 net_warn_ratelimited("Neighbour table overflow\n");
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894 const struct in6_addr *daddr)
896 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
899 rt->rt6i_flags |= RTF_CACHE;
900 rt->n = neigh_clone(ort->n);
905 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906 struct flowi6 *fl6, int flags)
908 struct fib6_node *fn;
909 struct rt6_info *rt, *nrt;
913 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
915 strict |= flags & RT6_LOOKUP_F_IFACE;
918 read_lock_bh(&table->tb6_lock);
921 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
924 rt = rt6_select(fn, oif, strict | reachable);
925 if (rt->rt6i_nsiblings && oif == 0)
926 rt = rt6_multipath_select(rt, fl6);
927 BACKTRACK(net, &fl6->saddr);
928 if (rt == net->ipv6.ip6_null_entry ||
929 rt->rt6i_flags & RTF_CACHE)
933 read_unlock_bh(&table->tb6_lock);
935 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937 else if (!(rt->dst.flags & DST_HOST))
938 nrt = rt6_alloc_clone(rt, &fl6->daddr);
943 rt = nrt ? : net->ipv6.ip6_null_entry;
947 err = ip6_ins_rt(nrt);
956 * Race condition! In the gap, when table->tb6_lock was
957 * released someone could insert this route. Relookup.
968 read_unlock_bh(&table->tb6_lock);
970 rt->dst.lastuse = jiffies;
976 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977 struct flowi6 *fl6, int flags)
979 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
982 static struct dst_entry *ip6_route_input_lookup(struct net *net,
983 struct net_device *dev,
984 struct flowi6 *fl6, int flags)
986 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987 flags |= RT6_LOOKUP_F_IFACE;
989 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
992 void ip6_route_input(struct sk_buff *skb)
994 const struct ipv6hdr *iph = ipv6_hdr(skb);
995 struct net *net = dev_net(skb->dev);
996 int flags = RT6_LOOKUP_F_HAS_SADDR;
997 struct flowi6 fl6 = {
998 .flowi6_iif = skb->dev->ifindex,
1000 .saddr = iph->saddr,
1001 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002 .flowi6_mark = skb->mark,
1003 .flowi6_proto = iph->nexthdr,
1006 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1009 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010 struct flowi6 *fl6, int flags)
1012 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1015 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1020 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1022 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023 flags |= RT6_LOOKUP_F_IFACE;
1025 if (!ipv6_addr_any(&fl6->saddr))
1026 flags |= RT6_LOOKUP_F_HAS_SADDR;
1028 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1030 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1033 EXPORT_SYMBOL(ip6_route_output);
1035 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1037 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038 struct dst_entry *new = NULL;
1040 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1044 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045 rt6_init_peer(rt, net->ipv6.peers);
1048 new->input = dst_discard;
1049 new->output = dst_discard;
1051 if (dst_metrics_read_only(&ort->dst))
1052 new->_metrics = ort->dst._metrics;
1054 dst_copy_metrics(new, &ort->dst);
1055 rt->rt6i_idev = ort->rt6i_idev;
1057 in6_dev_hold(rt->rt6i_idev);
1059 rt->rt6i_gateway = ort->rt6i_gateway;
1060 rt->rt6i_flags = ort->rt6i_flags;
1061 rt6_clean_expires(rt);
1062 rt->rt6i_metric = 0;
1064 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065 #ifdef CONFIG_IPV6_SUBTREES
1066 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1072 dst_release(dst_orig);
1073 return new ? new : ERR_PTR(-ENOMEM);
1077 * Destination cache support functions
1080 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1082 struct rt6_info *rt;
1084 rt = (struct rt6_info *) dst;
1086 /* All IPV6 dsts are created with ->obsolete set to the value
1087 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088 * into this function always.
1090 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1093 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1101 struct rt6_info *rt = (struct rt6_info *) dst;
1104 if (rt->rt6i_flags & RTF_CACHE) {
1105 if (rt6_check_expired(rt)) {
1117 static void ip6_link_failure(struct sk_buff *skb)
1119 struct rt6_info *rt;
1121 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1123 rt = (struct rt6_info *) skb_dst(skb);
1125 if (rt->rt6i_flags & RTF_CACHE)
1126 rt6_update_expires(rt, 0);
1127 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1128 rt->rt6i_node->fn_sernum = -1;
1132 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133 struct sk_buff *skb, u32 mtu)
1135 struct rt6_info *rt6 = (struct rt6_info*)dst;
1138 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139 struct net *net = dev_net(dst->dev);
1141 rt6->rt6i_flags |= RTF_MODIFIED;
1142 if (mtu < IPV6_MIN_MTU) {
1143 u32 features = dst_metric(dst, RTAX_FEATURES);
1145 features |= RTAX_FEATURE_ALLFRAG;
1146 dst_metric_set(dst, RTAX_FEATURES, features);
1148 dst_metric_set(dst, RTAX_MTU, mtu);
1149 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1153 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1156 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157 struct dst_entry *dst;
1160 memset(&fl6, 0, sizeof(fl6));
1161 fl6.flowi6_oif = oif;
1162 fl6.flowi6_mark = mark;
1163 fl6.flowi6_flags = 0;
1164 fl6.daddr = iph->daddr;
1165 fl6.saddr = iph->saddr;
1166 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1168 dst = ip6_route_output(net, NULL, &fl6);
1170 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1173 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1175 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1177 ip6_update_pmtu(skb, sock_net(sk), mtu,
1178 sk->sk_bound_dev_if, sk->sk_mark);
1180 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1182 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1184 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185 struct dst_entry *dst;
1188 memset(&fl6, 0, sizeof(fl6));
1189 fl6.flowi6_oif = oif;
1190 fl6.flowi6_mark = mark;
1191 fl6.flowi6_flags = 0;
1192 fl6.daddr = iph->daddr;
1193 fl6.saddr = iph->saddr;
1194 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1196 dst = ip6_route_output(net, NULL, &fl6);
1198 rt6_do_redirect(dst, NULL, skb);
1201 EXPORT_SYMBOL_GPL(ip6_redirect);
1203 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1205 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1207 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1209 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1211 struct net_device *dev = dst->dev;
1212 unsigned int mtu = dst_mtu(dst);
1213 struct net *net = dev_net(dev);
1215 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1217 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1221 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223 * IPV6_MAXPLEN is also valid and means: "any MSS,
1224 * rely only on pmtu discovery"
1226 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1231 static unsigned int ip6_mtu(const struct dst_entry *dst)
1233 struct inet6_dev *idev;
1234 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1242 idev = __in6_dev_get(dst->dev);
1244 mtu = idev->cnf.mtu6;
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254 struct neighbour *neigh,
1257 struct dst_entry *dst;
1258 struct rt6_info *rt;
1259 struct inet6_dev *idev = in6_dev_get(dev);
1260 struct net *net = dev_net(dev);
1262 if (unlikely(!idev))
1263 return ERR_PTR(-ENODEV);
1265 rt = ip6_dst_alloc(net, dev, 0, NULL);
1266 if (unlikely(!rt)) {
1268 dst = ERR_PTR(-ENOMEM);
1275 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276 if (IS_ERR(neigh)) {
1279 return ERR_CAST(neigh);
1283 rt->dst.flags |= DST_HOST;
1284 rt->dst.output = ip6_output;
1286 atomic_set(&rt->dst.__refcnt, 1);
1287 rt->rt6i_dst.addr = fl6->daddr;
1288 rt->rt6i_dst.plen = 128;
1289 rt->rt6i_idev = idev;
1290 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1292 spin_lock_bh(&icmp6_dst_lock);
1293 rt->dst.next = icmp6_dst_gc_list;
1294 icmp6_dst_gc_list = &rt->dst;
1295 spin_unlock_bh(&icmp6_dst_lock);
1297 fib6_force_start_gc(net);
1299 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1305 int icmp6_dst_gc(void)
1307 struct dst_entry *dst, **pprev;
1310 spin_lock_bh(&icmp6_dst_lock);
1311 pprev = &icmp6_dst_gc_list;
1313 while ((dst = *pprev) != NULL) {
1314 if (!atomic_read(&dst->__refcnt)) {
1323 spin_unlock_bh(&icmp6_dst_lock);
1328 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1331 struct dst_entry *dst, **pprev;
1333 spin_lock_bh(&icmp6_dst_lock);
1334 pprev = &icmp6_dst_gc_list;
1335 while ((dst = *pprev) != NULL) {
1336 struct rt6_info *rt = (struct rt6_info *) dst;
1337 if (func(rt, arg)) {
1344 spin_unlock_bh(&icmp6_dst_lock);
1347 static int ip6_dst_gc(struct dst_ops *ops)
1349 unsigned long now = jiffies;
1350 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1358 entries = dst_entries_get_fast(ops);
1359 if (time_after(rt_last_gc + rt_min_interval, now) &&
1360 entries <= rt_max_size)
1363 net->ipv6.ip6_rt_gc_expire++;
1364 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365 net->ipv6.ip6_rt_last_gc = now;
1366 entries = dst_entries_get_slow(ops);
1367 if (entries < ops->gc_thresh)
1368 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1370 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371 return entries > rt_max_size;
1374 /* Clean host part of a prefix. Not necessary in radix tree,
1375 but results in cleaner routing tables.
1377 Remove it only when all the things will work!
1380 int ip6_dst_hoplimit(struct dst_entry *dst)
1382 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1383 if (hoplimit == 0) {
1384 struct net_device *dev = dst->dev;
1385 struct inet6_dev *idev;
1388 idev = __in6_dev_get(dev);
1390 hoplimit = idev->cnf.hop_limit;
1392 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1397 EXPORT_SYMBOL(ip6_dst_hoplimit);
1403 int ip6_route_add(struct fib6_config *cfg)
1406 struct net *net = cfg->fc_nlinfo.nl_net;
1407 struct rt6_info *rt = NULL;
1408 struct net_device *dev = NULL;
1409 struct inet6_dev *idev = NULL;
1410 struct fib6_table *table;
1413 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1415 #ifndef CONFIG_IPV6_SUBTREES
1416 if (cfg->fc_src_len)
1419 if (cfg->fc_ifindex) {
1421 dev = dev_get_by_index(net, cfg->fc_ifindex);
1424 idev = in6_dev_get(dev);
1429 if (cfg->fc_metric == 0)
1430 cfg->fc_metric = IP6_RT_PRIO_USER;
1433 if (cfg->fc_nlinfo.nlh &&
1434 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1435 table = fib6_get_table(net, cfg->fc_table);
1437 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1438 table = fib6_new_table(net, cfg->fc_table);
1441 table = fib6_new_table(net, cfg->fc_table);
1447 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1454 if (cfg->fc_flags & RTF_EXPIRES)
1455 rt6_set_expires(rt, jiffies +
1456 clock_t_to_jiffies(cfg->fc_expires));
1458 rt6_clean_expires(rt);
1460 if (cfg->fc_protocol == RTPROT_UNSPEC)
1461 cfg->fc_protocol = RTPROT_BOOT;
1462 rt->rt6i_protocol = cfg->fc_protocol;
1464 addr_type = ipv6_addr_type(&cfg->fc_dst);
1466 if (addr_type & IPV6_ADDR_MULTICAST)
1467 rt->dst.input = ip6_mc_input;
1468 else if (cfg->fc_flags & RTF_LOCAL)
1469 rt->dst.input = ip6_input;
1471 rt->dst.input = ip6_forward;
1473 rt->dst.output = ip6_output;
1475 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1476 rt->rt6i_dst.plen = cfg->fc_dst_len;
1477 if (rt->rt6i_dst.plen == 128)
1478 rt->dst.flags |= DST_HOST;
1480 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1481 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1486 dst_init_metrics(&rt->dst, metrics, 0);
1488 #ifdef CONFIG_IPV6_SUBTREES
1489 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1490 rt->rt6i_src.plen = cfg->fc_src_len;
1493 rt->rt6i_metric = cfg->fc_metric;
1495 /* We cannot add true routes via loopback here,
1496 they would result in kernel looping; promote them to reject routes
1498 if ((cfg->fc_flags & RTF_REJECT) ||
1499 (dev && (dev->flags & IFF_LOOPBACK) &&
1500 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1501 !(cfg->fc_flags & RTF_LOCAL))) {
1502 /* hold loopback dev/idev if we haven't done so. */
1503 if (dev != net->loopback_dev) {
1508 dev = net->loopback_dev;
1510 idev = in6_dev_get(dev);
1516 rt->dst.output = ip6_pkt_discard_out;
1517 rt->dst.input = ip6_pkt_discard;
1518 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1519 switch (cfg->fc_type) {
1521 rt->dst.error = -EINVAL;
1524 rt->dst.error = -EACCES;
1527 rt->dst.error = -EAGAIN;
1530 rt->dst.error = -ENETUNREACH;
1536 if (cfg->fc_flags & RTF_GATEWAY) {
1537 const struct in6_addr *gw_addr;
1540 gw_addr = &cfg->fc_gateway;
1541 rt->rt6i_gateway = *gw_addr;
1542 gwa_type = ipv6_addr_type(gw_addr);
1544 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1545 struct rt6_info *grt;
1547 /* IPv6 strictly inhibits using not link-local
1548 addresses as nexthop address.
1549 Otherwise, router will not able to send redirects.
1550 It is very good, but in some (rare!) circumstances
1551 (SIT, PtP, NBMA NOARP links) it is handy to allow
1552 some exceptions. --ANK
1555 if (!(gwa_type & IPV6_ADDR_UNICAST))
1558 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1560 err = -EHOSTUNREACH;
1564 if (dev != grt->dst.dev) {
1570 idev = grt->rt6i_idev;
1572 in6_dev_hold(grt->rt6i_idev);
1574 if (!(grt->rt6i_flags & RTF_GATEWAY))
1582 if (!dev || (dev->flags & IFF_LOOPBACK))
1590 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1591 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1595 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1596 rt->rt6i_prefsrc.plen = 128;
1598 rt->rt6i_prefsrc.plen = 0;
1600 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1601 err = rt6_bind_neighbour(rt, dev);
1606 rt->rt6i_flags = cfg->fc_flags;
1613 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1614 int type = nla_type(nla);
1617 if (type > RTAX_MAX) {
1622 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1628 rt->rt6i_idev = idev;
1629 rt->rt6i_table = table;
1631 cfg->fc_nlinfo.nl_net = dev_net(dev);
1633 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1645 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1648 struct fib6_table *table;
1649 struct net *net = dev_net(rt->dst.dev);
1651 if (rt == net->ipv6.ip6_null_entry) {
1656 table = rt->rt6i_table;
1657 write_lock_bh(&table->tb6_lock);
1658 err = fib6_del(rt, info);
1659 write_unlock_bh(&table->tb6_lock);
1666 int ip6_del_rt(struct rt6_info *rt)
1668 struct nl_info info = {
1669 .nl_net = dev_net(rt->dst.dev),
1671 return __ip6_del_rt(rt, &info);
1674 static int ip6_route_del(struct fib6_config *cfg)
1676 struct fib6_table *table;
1677 struct fib6_node *fn;
1678 struct rt6_info *rt;
1681 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1685 read_lock_bh(&table->tb6_lock);
1687 fn = fib6_locate(&table->tb6_root,
1688 &cfg->fc_dst, cfg->fc_dst_len,
1689 &cfg->fc_src, cfg->fc_src_len);
1692 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1693 if (cfg->fc_ifindex &&
1695 rt->dst.dev->ifindex != cfg->fc_ifindex))
1697 if (cfg->fc_flags & RTF_GATEWAY &&
1698 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1700 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1703 read_unlock_bh(&table->tb6_lock);
1705 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1708 read_unlock_bh(&table->tb6_lock);
1713 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1715 struct net *net = dev_net(skb->dev);
1716 struct netevent_redirect netevent;
1717 struct rt6_info *rt, *nrt = NULL;
1718 const struct in6_addr *target;
1719 struct ndisc_options ndopts;
1720 const struct in6_addr *dest;
1721 struct neighbour *old_neigh;
1722 struct inet6_dev *in6_dev;
1723 struct neighbour *neigh;
1724 struct icmp6hdr *icmph;
1725 int optlen, on_link;
1728 optlen = skb->tail - skb->transport_header;
1729 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1732 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1736 icmph = icmp6_hdr(skb);
1737 target = (const struct in6_addr *) (icmph + 1);
1740 if (ipv6_addr_is_multicast(dest)) {
1741 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1746 if (ipv6_addr_equal(dest, target)) {
1748 } else if (ipv6_addr_type(target) !=
1749 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1750 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1754 in6_dev = __in6_dev_get(skb->dev);
1757 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1761 * The IP source address of the Redirect MUST be the same as the current
1762 * first-hop router for the specified ICMP Destination Address.
1765 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1766 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1771 if (ndopts.nd_opts_tgt_lladdr) {
1772 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1775 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1780 rt = (struct rt6_info *) dst;
1781 if (rt == net->ipv6.ip6_null_entry) {
1782 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1786 /* Redirect received -> path was valid.
1787 * Look, redirects are sent only in response to data packets,
1788 * so that this nexthop apparently is reachable. --ANK
1790 dst_confirm(&rt->dst);
1792 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1796 /* Duplicate redirect: silently ignore. */
1798 if (neigh == old_neigh)
1802 * We have finally decided to accept it.
1805 neigh_update(neigh, lladdr, NUD_STALE,
1806 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1807 NEIGH_UPDATE_F_OVERRIDE|
1808 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1809 NEIGH_UPDATE_F_ISROUTER))
1812 nrt = ip6_rt_copy(rt, dest);
1816 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1818 nrt->rt6i_flags &= ~RTF_GATEWAY;
1820 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1821 nrt->n = neigh_clone(neigh);
1823 if (ip6_ins_rt(nrt))
1826 netevent.old = &rt->dst;
1827 netevent.old_neigh = old_neigh;
1828 netevent.new = &nrt->dst;
1829 netevent.new_neigh = neigh;
1830 netevent.daddr = dest;
1831 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1833 if (rt->rt6i_flags & RTF_CACHE) {
1834 rt = (struct rt6_info *) dst_clone(&rt->dst);
1839 neigh_release(neigh);
1843 * Misc support functions
1846 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1847 const struct in6_addr *dest)
1849 struct net *net = dev_net(ort->dst.dev);
1850 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1854 rt->dst.input = ort->dst.input;
1855 rt->dst.output = ort->dst.output;
1856 rt->dst.flags |= DST_HOST;
1858 rt->rt6i_dst.addr = *dest;
1859 rt->rt6i_dst.plen = 128;
1860 dst_copy_metrics(&rt->dst, &ort->dst);
1861 rt->dst.error = ort->dst.error;
1862 rt->rt6i_idev = ort->rt6i_idev;
1864 in6_dev_hold(rt->rt6i_idev);
1865 rt->dst.lastuse = jiffies;
1867 rt->rt6i_gateway = ort->rt6i_gateway;
1868 rt->rt6i_flags = ort->rt6i_flags;
1869 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1870 (RTF_DEFAULT | RTF_ADDRCONF))
1871 rt6_set_from(rt, ort);
1873 rt6_clean_expires(rt);
1874 rt->rt6i_metric = 0;
1876 #ifdef CONFIG_IPV6_SUBTREES
1877 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1879 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1880 rt->rt6i_table = ort->rt6i_table;
1885 #ifdef CONFIG_IPV6_ROUTE_INFO
1886 static struct rt6_info *rt6_get_route_info(struct net *net,
1887 const struct in6_addr *prefix, int prefixlen,
1888 const struct in6_addr *gwaddr, int ifindex)
1890 struct fib6_node *fn;
1891 struct rt6_info *rt = NULL;
1892 struct fib6_table *table;
1894 table = fib6_get_table(net, RT6_TABLE_INFO);
1898 read_lock_bh(&table->tb6_lock);
1899 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1903 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1904 if (rt->dst.dev->ifindex != ifindex)
1906 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1908 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1914 read_unlock_bh(&table->tb6_lock);
1918 static struct rt6_info *rt6_add_route_info(struct net *net,
1919 const struct in6_addr *prefix, int prefixlen,
1920 const struct in6_addr *gwaddr, int ifindex,
1923 struct fib6_config cfg = {
1924 .fc_table = RT6_TABLE_INFO,
1925 .fc_metric = IP6_RT_PRIO_USER,
1926 .fc_ifindex = ifindex,
1927 .fc_dst_len = prefixlen,
1928 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1929 RTF_UP | RTF_PREF(pref),
1930 .fc_nlinfo.portid = 0,
1931 .fc_nlinfo.nlh = NULL,
1932 .fc_nlinfo.nl_net = net,
1935 cfg.fc_dst = *prefix;
1936 cfg.fc_gateway = *gwaddr;
1938 /* We should treat it as a default route if prefix length is 0. */
1940 cfg.fc_flags |= RTF_DEFAULT;
1942 ip6_route_add(&cfg);
1944 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1948 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1950 struct rt6_info *rt;
1951 struct fib6_table *table;
1953 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1957 read_lock_bh(&table->tb6_lock);
1958 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1959 if (dev == rt->dst.dev &&
1960 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1961 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1966 read_unlock_bh(&table->tb6_lock);
1970 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1971 struct net_device *dev,
1974 struct fib6_config cfg = {
1975 .fc_table = RT6_TABLE_DFLT,
1976 .fc_metric = IP6_RT_PRIO_USER,
1977 .fc_ifindex = dev->ifindex,
1978 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1979 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1980 .fc_nlinfo.portid = 0,
1981 .fc_nlinfo.nlh = NULL,
1982 .fc_nlinfo.nl_net = dev_net(dev),
1985 cfg.fc_gateway = *gwaddr;
1987 ip6_route_add(&cfg);
1989 return rt6_get_dflt_router(gwaddr, dev);
1992 void rt6_purge_dflt_routers(struct net *net)
1994 struct rt6_info *rt;
1995 struct fib6_table *table;
1997 /* NOTE: Keep consistent with rt6_get_dflt_router */
1998 table = fib6_get_table(net, RT6_TABLE_DFLT);
2003 read_lock_bh(&table->tb6_lock);
2004 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2005 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2007 read_unlock_bh(&table->tb6_lock);
2012 read_unlock_bh(&table->tb6_lock);
2015 static void rtmsg_to_fib6_config(struct net *net,
2016 struct in6_rtmsg *rtmsg,
2017 struct fib6_config *cfg)
2019 memset(cfg, 0, sizeof(*cfg));
2021 cfg->fc_table = RT6_TABLE_MAIN;
2022 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2023 cfg->fc_metric = rtmsg->rtmsg_metric;
2024 cfg->fc_expires = rtmsg->rtmsg_info;
2025 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2026 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2027 cfg->fc_flags = rtmsg->rtmsg_flags;
2029 cfg->fc_nlinfo.nl_net = net;
2031 cfg->fc_dst = rtmsg->rtmsg_dst;
2032 cfg->fc_src = rtmsg->rtmsg_src;
2033 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2036 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2038 struct fib6_config cfg;
2039 struct in6_rtmsg rtmsg;
2043 case SIOCADDRT: /* Add a route */
2044 case SIOCDELRT: /* Delete a route */
2045 if (!capable(CAP_NET_ADMIN))
2047 err = copy_from_user(&rtmsg, arg,
2048 sizeof(struct in6_rtmsg));
2052 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2057 err = ip6_route_add(&cfg);
2060 err = ip6_route_del(&cfg);
2074 * Drop the packet on the floor
2077 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2080 struct dst_entry *dst = skb_dst(skb);
2081 switch (ipstats_mib_noroutes) {
2082 case IPSTATS_MIB_INNOROUTES:
2083 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2084 if (type == IPV6_ADDR_ANY) {
2085 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086 IPSTATS_MIB_INADDRERRORS);
2090 case IPSTATS_MIB_OUTNOROUTES:
2091 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2092 ipstats_mib_noroutes);
2095 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2100 static int ip6_pkt_discard(struct sk_buff *skb)
2102 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2105 static int ip6_pkt_discard_out(struct sk_buff *skb)
2107 skb->dev = skb_dst(skb)->dev;
2108 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2111 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2113 static int ip6_pkt_prohibit(struct sk_buff *skb)
2115 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2118 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2120 skb->dev = skb_dst(skb)->dev;
2121 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2127 * Allocate a dst for local (unicast / anycast) address.
2130 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2131 const struct in6_addr *addr,
2134 struct net *net = dev_net(idev->dev);
2135 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2139 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2140 return ERR_PTR(-ENOMEM);
2145 rt->dst.flags |= DST_HOST;
2146 rt->dst.input = ip6_input;
2147 rt->dst.output = ip6_output;
2148 rt->rt6i_idev = idev;
2150 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2152 rt->rt6i_flags |= RTF_ANYCAST;
2154 rt->rt6i_flags |= RTF_LOCAL;
2155 err = rt6_bind_neighbour(rt, rt->dst.dev);
2158 return ERR_PTR(err);
2161 rt->rt6i_dst.addr = *addr;
2162 rt->rt6i_dst.plen = 128;
2163 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2165 atomic_set(&rt->dst.__refcnt, 1);
2170 int ip6_route_get_saddr(struct net *net,
2171 struct rt6_info *rt,
2172 const struct in6_addr *daddr,
2174 struct in6_addr *saddr)
2176 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2178 if (rt->rt6i_prefsrc.plen)
2179 *saddr = rt->rt6i_prefsrc.addr;
2181 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2182 daddr, prefs, saddr);
2186 /* remove deleted ip from prefsrc entries */
2187 struct arg_dev_net_ip {
2188 struct net_device *dev;
2190 struct in6_addr *addr;
2193 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2195 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2196 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2197 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2199 if (((void *)rt->dst.dev == dev || !dev) &&
2200 rt != net->ipv6.ip6_null_entry &&
2201 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2202 /* remove prefsrc entry */
2203 rt->rt6i_prefsrc.plen = 0;
2208 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2210 struct net *net = dev_net(ifp->idev->dev);
2211 struct arg_dev_net_ip adni = {
2212 .dev = ifp->idev->dev,
2216 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2219 struct arg_dev_net {
2220 struct net_device *dev;
2224 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2226 const struct arg_dev_net *adn = arg;
2227 const struct net_device *dev = adn->dev;
2229 if ((rt->dst.dev == dev || !dev) &&
2230 rt != adn->net->ipv6.ip6_null_entry)
2236 void rt6_ifdown(struct net *net, struct net_device *dev)
2238 struct arg_dev_net adn = {
2243 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2244 icmp6_clean_all(fib6_ifdown, &adn);
2247 struct rt6_mtu_change_arg {
2248 struct net_device *dev;
2252 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2254 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2255 struct inet6_dev *idev;
2257 /* In IPv6 pmtu discovery is not optional,
2258 so that RTAX_MTU lock cannot disable it.
2259 We still use this lock to block changes
2260 caused by addrconf/ndisc.
2263 idev = __in6_dev_get(arg->dev);
2267 /* For administrative MTU increase, there is no way to discover
2268 IPv6 PMTU increase, so PMTU increase should be updated here.
2269 Since RFC 1981 doesn't include administrative MTU increase
2270 update PMTU increase is a MUST. (i.e. jumbo frame)
2273 If new MTU is less than route PMTU, this new MTU will be the
2274 lowest MTU in the path, update the route PMTU to reflect PMTU
2275 decreases; if new MTU is greater than route PMTU, and the
2276 old MTU is the lowest MTU in the path, update the route PMTU
2277 to reflect the increase. In this case if the other nodes' MTU
2278 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2281 if (rt->dst.dev == arg->dev &&
2282 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2283 (dst_mtu(&rt->dst) >= arg->mtu ||
2284 (dst_mtu(&rt->dst) < arg->mtu &&
2285 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2286 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2291 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2293 struct rt6_mtu_change_arg arg = {
2298 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2301 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2302 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2303 [RTA_OIF] = { .type = NLA_U32 },
2304 [RTA_IIF] = { .type = NLA_U32 },
2305 [RTA_PRIORITY] = { .type = NLA_U32 },
2306 [RTA_METRICS] = { .type = NLA_NESTED },
2307 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2310 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2311 struct fib6_config *cfg)
2314 struct nlattr *tb[RTA_MAX+1];
2317 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2322 rtm = nlmsg_data(nlh);
2323 memset(cfg, 0, sizeof(*cfg));
2325 cfg->fc_table = rtm->rtm_table;
2326 cfg->fc_dst_len = rtm->rtm_dst_len;
2327 cfg->fc_src_len = rtm->rtm_src_len;
2328 cfg->fc_flags = RTF_UP;
2329 cfg->fc_protocol = rtm->rtm_protocol;
2330 cfg->fc_type = rtm->rtm_type;
2332 if (rtm->rtm_type == RTN_UNREACHABLE ||
2333 rtm->rtm_type == RTN_BLACKHOLE ||
2334 rtm->rtm_type == RTN_PROHIBIT ||
2335 rtm->rtm_type == RTN_THROW)
2336 cfg->fc_flags |= RTF_REJECT;
2338 if (rtm->rtm_type == RTN_LOCAL)
2339 cfg->fc_flags |= RTF_LOCAL;
2341 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2342 cfg->fc_nlinfo.nlh = nlh;
2343 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2345 if (tb[RTA_GATEWAY]) {
2346 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2347 cfg->fc_flags |= RTF_GATEWAY;
2351 int plen = (rtm->rtm_dst_len + 7) >> 3;
2353 if (nla_len(tb[RTA_DST]) < plen)
2356 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2360 int plen = (rtm->rtm_src_len + 7) >> 3;
2362 if (nla_len(tb[RTA_SRC]) < plen)
2365 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2368 if (tb[RTA_PREFSRC])
2369 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2372 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2374 if (tb[RTA_PRIORITY])
2375 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2377 if (tb[RTA_METRICS]) {
2378 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2379 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2383 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2385 if (tb[RTA_MULTIPATH]) {
2386 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2387 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2395 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2397 struct fib6_config r_cfg;
2398 struct rtnexthop *rtnh;
2401 int err = 0, last_err = 0;
2404 rtnh = (struct rtnexthop *)cfg->fc_mp;
2405 remaining = cfg->fc_mp_len;
2407 /* Parse a Multipath Entry */
2408 while (rtnh_ok(rtnh, remaining)) {
2409 memcpy(&r_cfg, cfg, sizeof(*cfg));
2410 if (rtnh->rtnh_ifindex)
2411 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2413 attrlen = rtnh_attrlen(rtnh);
2415 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2417 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2419 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2420 r_cfg.fc_flags |= RTF_GATEWAY;
2423 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2426 /* If we are trying to remove a route, do not stop the
2427 * loop when ip6_route_del() fails (because next hop is
2428 * already gone), we should try to remove all next hops.
2431 /* If add fails, we should try to delete all
2432 * next hops that have been already added.
2438 /* Because each route is added like a single route we remove
2439 * this flag after the first nexthop (if there is a collision,
2440 * we have already fail to add the first nexthop:
2441 * fib6_add_rt2node() has reject it).
2443 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2444 rtnh = rtnh_next(rtnh, &remaining);
2450 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2452 struct fib6_config cfg;
2455 err = rtm_to_fib6_config(skb, nlh, &cfg);
2460 return ip6_route_multipath(&cfg, 0);
2462 return ip6_route_del(&cfg);
2465 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2467 struct fib6_config cfg;
2470 err = rtm_to_fib6_config(skb, nlh, &cfg);
2475 return ip6_route_multipath(&cfg, 1);
2477 return ip6_route_add(&cfg);
2480 static inline size_t rt6_nlmsg_size(void)
2482 return NLMSG_ALIGN(sizeof(struct rtmsg))
2483 + nla_total_size(16) /* RTA_SRC */
2484 + nla_total_size(16) /* RTA_DST */
2485 + nla_total_size(16) /* RTA_GATEWAY */
2486 + nla_total_size(16) /* RTA_PREFSRC */
2487 + nla_total_size(4) /* RTA_TABLE */
2488 + nla_total_size(4) /* RTA_IIF */
2489 + nla_total_size(4) /* RTA_OIF */
2490 + nla_total_size(4) /* RTA_PRIORITY */
2491 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2492 + nla_total_size(sizeof(struct rta_cacheinfo));
2495 static int rt6_fill_node(struct net *net,
2496 struct sk_buff *skb, struct rt6_info *rt,
2497 struct in6_addr *dst, struct in6_addr *src,
2498 int iif, int type, u32 portid, u32 seq,
2499 int prefix, int nowait, unsigned int flags)
2502 struct nlmsghdr *nlh;
2505 struct neighbour *n;
2507 if (prefix) { /* user wants prefix routes only */
2508 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2509 /* success since this is not a prefix route */
2514 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2518 rtm = nlmsg_data(nlh);
2519 rtm->rtm_family = AF_INET6;
2520 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2521 rtm->rtm_src_len = rt->rt6i_src.plen;
2524 table = rt->rt6i_table->tb6_id;
2526 table = RT6_TABLE_UNSPEC;
2527 rtm->rtm_table = table;
2528 if (nla_put_u32(skb, RTA_TABLE, table))
2529 goto nla_put_failure;
2530 if (rt->rt6i_flags & RTF_REJECT) {
2531 switch (rt->dst.error) {
2533 rtm->rtm_type = RTN_BLACKHOLE;
2536 rtm->rtm_type = RTN_PROHIBIT;
2539 rtm->rtm_type = RTN_THROW;
2542 rtm->rtm_type = RTN_UNREACHABLE;
2546 else if (rt->rt6i_flags & RTF_LOCAL)
2547 rtm->rtm_type = RTN_LOCAL;
2548 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2549 rtm->rtm_type = RTN_LOCAL;
2551 rtm->rtm_type = RTN_UNICAST;
2553 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2554 rtm->rtm_protocol = rt->rt6i_protocol;
2555 if (rt->rt6i_flags & RTF_DYNAMIC)
2556 rtm->rtm_protocol = RTPROT_REDIRECT;
2557 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2558 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2559 rtm->rtm_protocol = RTPROT_RA;
2561 rtm->rtm_protocol = RTPROT_KERNEL;
2564 if (rt->rt6i_flags & RTF_CACHE)
2565 rtm->rtm_flags |= RTM_F_CLONED;
2568 if (nla_put(skb, RTA_DST, 16, dst))
2569 goto nla_put_failure;
2570 rtm->rtm_dst_len = 128;
2571 } else if (rtm->rtm_dst_len)
2572 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2573 goto nla_put_failure;
2574 #ifdef CONFIG_IPV6_SUBTREES
2576 if (nla_put(skb, RTA_SRC, 16, src))
2577 goto nla_put_failure;
2578 rtm->rtm_src_len = 128;
2579 } else if (rtm->rtm_src_len &&
2580 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2581 goto nla_put_failure;
2584 #ifdef CONFIG_IPV6_MROUTE
2585 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2586 int err = ip6mr_get_route(net, skb, rtm, nowait);
2591 goto nla_put_failure;
2593 if (err == -EMSGSIZE)
2594 goto nla_put_failure;
2599 if (nla_put_u32(skb, RTA_IIF, iif))
2600 goto nla_put_failure;
2602 struct in6_addr saddr_buf;
2603 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2604 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2605 goto nla_put_failure;
2608 if (rt->rt6i_prefsrc.plen) {
2609 struct in6_addr saddr_buf;
2610 saddr_buf = rt->rt6i_prefsrc.addr;
2611 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2612 goto nla_put_failure;
2615 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2616 goto nla_put_failure;
2620 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2621 goto nla_put_failure;
2625 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2626 goto nla_put_failure;
2627 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2628 goto nla_put_failure;
2630 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2632 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2633 goto nla_put_failure;
2635 return nlmsg_end(skb, nlh);
2638 nlmsg_cancel(skb, nlh);
2642 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2644 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2647 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2648 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2649 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2653 return rt6_fill_node(arg->net,
2654 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2655 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2656 prefix, 0, NLM_F_MULTI);
2659 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2661 struct net *net = sock_net(in_skb->sk);
2662 struct nlattr *tb[RTA_MAX+1];
2663 struct rt6_info *rt;
2664 struct sk_buff *skb;
2667 int err, iif = 0, oif = 0;
2669 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2674 memset(&fl6, 0, sizeof(fl6));
2677 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2680 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2684 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2687 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2691 iif = nla_get_u32(tb[RTA_IIF]);
2694 oif = nla_get_u32(tb[RTA_OIF]);
2697 struct net_device *dev;
2700 dev = __dev_get_by_index(net, iif);
2706 fl6.flowi6_iif = iif;
2708 if (!ipv6_addr_any(&fl6.saddr))
2709 flags |= RT6_LOOKUP_F_HAS_SADDR;
2711 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2714 fl6.flowi6_oif = oif;
2716 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2719 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2726 /* Reserve room for dummy headers, this skb can pass
2727 through good chunk of routing engine.
2729 skb_reset_mac_header(skb);
2730 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2732 skb_dst_set(skb, &rt->dst);
2734 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2735 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2736 nlh->nlmsg_seq, 0, 0, 0);
2742 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2747 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2749 struct sk_buff *skb;
2750 struct net *net = info->nl_net;
2755 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2757 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2761 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2762 event, info->portid, seq, 0, 0, 0);
2764 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2765 WARN_ON(err == -EMSGSIZE);
2769 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2770 info->nlh, gfp_any());
2774 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2777 static int ip6_route_dev_notify(struct notifier_block *this,
2778 unsigned long event, void *data)
2780 struct net_device *dev = (struct net_device *)data;
2781 struct net *net = dev_net(dev);
2783 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2784 net->ipv6.ip6_null_entry->dst.dev = dev;
2785 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2786 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2787 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2788 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2789 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2790 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2801 #ifdef CONFIG_PROC_FS
2812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2814 struct seq_file *m = p_arg;
2815 struct neighbour *n;
2817 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2819 #ifdef CONFIG_IPV6_SUBTREES
2820 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2822 seq_puts(m, "00000000000000000000000000000000 00 ");
2826 seq_printf(m, "%pi6", n->primary_key);
2828 seq_puts(m, "00000000000000000000000000000000");
2830 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2831 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2832 rt->dst.__use, rt->rt6i_flags,
2833 rt->dst.dev ? rt->dst.dev->name : "");
2837 static int ipv6_route_show(struct seq_file *m, void *v)
2839 struct net *net = (struct net *)m->private;
2840 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2844 static int ipv6_route_open(struct inode *inode, struct file *file)
2846 return single_open_net(inode, file, ipv6_route_show);
2849 static const struct file_operations ipv6_route_proc_fops = {
2850 .owner = THIS_MODULE,
2851 .open = ipv6_route_open,
2853 .llseek = seq_lseek,
2854 .release = single_release_net,
2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2859 struct net *net = (struct net *)seq->private;
2860 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861 net->ipv6.rt6_stats->fib_nodes,
2862 net->ipv6.rt6_stats->fib_route_nodes,
2863 net->ipv6.rt6_stats->fib_rt_alloc,
2864 net->ipv6.rt6_stats->fib_rt_entries,
2865 net->ipv6.rt6_stats->fib_rt_cache,
2866 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867 net->ipv6.rt6_stats->fib_discarded_routes);
2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2874 return single_open_net(inode, file, rt6_stats_seq_show);
2877 static const struct file_operations rt6_stats_seq_fops = {
2878 .owner = THIS_MODULE,
2879 .open = rt6_stats_seq_open,
2881 .llseek = seq_lseek,
2882 .release = single_release_net,
2884 #endif /* CONFIG_PROC_FS */
2886 #ifdef CONFIG_SYSCTL
2889 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2890 void __user *buffer, size_t *lenp, loff_t *ppos)
2897 net = (struct net *)ctl->extra1;
2898 delay = net->ipv6.sysctl.flush_delay;
2899 proc_dointvec(ctl, write, buffer, lenp, ppos);
2900 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2904 ctl_table ipv6_route_table_template[] = {
2906 .procname = "flush",
2907 .data = &init_net.ipv6.sysctl.flush_delay,
2908 .maxlen = sizeof(int),
2910 .proc_handler = ipv6_sysctl_rtcache_flush
2913 .procname = "gc_thresh",
2914 .data = &ip6_dst_ops_template.gc_thresh,
2915 .maxlen = sizeof(int),
2917 .proc_handler = proc_dointvec,
2920 .procname = "max_size",
2921 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2922 .maxlen = sizeof(int),
2924 .proc_handler = proc_dointvec,
2927 .procname = "gc_min_interval",
2928 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929 .maxlen = sizeof(int),
2931 .proc_handler = proc_dointvec_jiffies,
2934 .procname = "gc_timeout",
2935 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936 .maxlen = sizeof(int),
2938 .proc_handler = proc_dointvec_jiffies,
2941 .procname = "gc_interval",
2942 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943 .maxlen = sizeof(int),
2945 .proc_handler = proc_dointvec_jiffies,
2948 .procname = "gc_elasticity",
2949 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950 .maxlen = sizeof(int),
2952 .proc_handler = proc_dointvec,
2955 .procname = "mtu_expires",
2956 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957 .maxlen = sizeof(int),
2959 .proc_handler = proc_dointvec_jiffies,
2962 .procname = "min_adv_mss",
2963 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964 .maxlen = sizeof(int),
2966 .proc_handler = proc_dointvec,
2969 .procname = "gc_min_interval_ms",
2970 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971 .maxlen = sizeof(int),
2973 .proc_handler = proc_dointvec_ms_jiffies,
2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2980 struct ctl_table *table;
2982 table = kmemdup(ipv6_route_table_template,
2983 sizeof(ipv6_route_table_template),
2987 table[0].data = &net->ipv6.sysctl.flush_delay;
2988 table[0].extra1 = net;
2989 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3004 static int __net_init ip6_route_net_init(struct net *net)
3008 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3009 sizeof(net->ipv6.ip6_dst_ops));
3011 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3012 goto out_ip6_dst_ops;
3014 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3015 sizeof(*net->ipv6.ip6_null_entry),
3017 if (!net->ipv6.ip6_null_entry)
3018 goto out_ip6_dst_entries;
3019 net->ipv6.ip6_null_entry->dst.path =
3020 (struct dst_entry *)net->ipv6.ip6_null_entry;
3021 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3022 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3023 ip6_template_metrics, true);
3025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3026 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3027 sizeof(*net->ipv6.ip6_prohibit_entry),
3029 if (!net->ipv6.ip6_prohibit_entry)
3030 goto out_ip6_null_entry;
3031 net->ipv6.ip6_prohibit_entry->dst.path =
3032 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3033 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3034 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3035 ip6_template_metrics, true);
3037 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3038 sizeof(*net->ipv6.ip6_blk_hole_entry),
3040 if (!net->ipv6.ip6_blk_hole_entry)
3041 goto out_ip6_prohibit_entry;
3042 net->ipv6.ip6_blk_hole_entry->dst.path =
3043 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3044 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3045 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3046 ip6_template_metrics, true);
3049 net->ipv6.sysctl.flush_delay = 0;
3050 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3051 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3052 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3053 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3054 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3055 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3056 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3058 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3064 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3065 out_ip6_prohibit_entry:
3066 kfree(net->ipv6.ip6_prohibit_entry);
3068 kfree(net->ipv6.ip6_null_entry);
3070 out_ip6_dst_entries:
3071 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 static void __net_exit ip6_route_net_exit(struct net *net)
3078 kfree(net->ipv6.ip6_null_entry);
3079 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3080 kfree(net->ipv6.ip6_prohibit_entry);
3081 kfree(net->ipv6.ip6_blk_hole_entry);
3083 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3086 static int __net_init ip6_route_net_init_late(struct net *net)
3088 #ifdef CONFIG_PROC_FS
3089 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3090 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3095 static void __net_exit ip6_route_net_exit_late(struct net *net)
3097 #ifdef CONFIG_PROC_FS
3098 proc_net_remove(net, "ipv6_route");
3099 proc_net_remove(net, "rt6_stats");
3103 static struct pernet_operations ip6_route_net_ops = {
3104 .init = ip6_route_net_init,
3105 .exit = ip6_route_net_exit,
3108 static int __net_init ipv6_inetpeer_init(struct net *net)
3110 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3114 inet_peer_base_init(bp);
3115 net->ipv6.peers = bp;
3119 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3121 struct inet_peer_base *bp = net->ipv6.peers;
3123 net->ipv6.peers = NULL;
3124 inetpeer_invalidate_tree(bp);
3128 static struct pernet_operations ipv6_inetpeer_ops = {
3129 .init = ipv6_inetpeer_init,
3130 .exit = ipv6_inetpeer_exit,
3133 static struct pernet_operations ip6_route_net_late_ops = {
3134 .init = ip6_route_net_init_late,
3135 .exit = ip6_route_net_exit_late,
3138 static struct notifier_block ip6_route_dev_notifier = {
3139 .notifier_call = ip6_route_dev_notify,
3143 int __init ip6_route_init(void)
3148 ip6_dst_ops_template.kmem_cachep =
3149 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3150 SLAB_HWCACHE_ALIGN, NULL);
3151 if (!ip6_dst_ops_template.kmem_cachep)
3154 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3156 goto out_kmem_cache;
3158 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3160 goto out_dst_entries;
3162 ret = register_pernet_subsys(&ip6_route_net_ops);
3164 goto out_register_inetpeer;
3166 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3168 /* Registering of the loopback is done before this portion of code,
3169 * the loopback reference in rt6_info will not be taken, do it
3170 * manually for init_net */
3171 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3172 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3173 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3174 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3175 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3176 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3177 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3181 goto out_register_subsys;
3187 ret = fib6_rules_init();
3191 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3193 goto fib6_rules_init;
3196 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3197 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3198 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3199 goto out_register_late_subsys;
3201 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3203 goto out_register_late_subsys;
3208 out_register_late_subsys:
3209 unregister_pernet_subsys(&ip6_route_net_late_ops);
3211 fib6_rules_cleanup();
3216 out_register_subsys:
3217 unregister_pernet_subsys(&ip6_route_net_ops);
3218 out_register_inetpeer:
3219 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3221 dst_entries_destroy(&ip6_dst_blackhole_ops);
3223 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3227 void ip6_route_cleanup(void)
3229 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3230 unregister_pernet_subsys(&ip6_route_net_late_ops);
3231 fib6_rules_cleanup();
3234 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3235 unregister_pernet_subsys(&ip6_route_net_ops);
3236 dst_entries_destroy(&ip6_dst_blackhole_ops);
3237 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);