2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
62 #include <asm/uaccess.h>
65 #include <linux/sysctl.h>
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
95 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
108 static void rt6_bind_peer(struct rt6_info *rt, int create)
110 struct inet_peer_base *base;
111 struct inet_peer *peer;
113 base = inetpeer_base_ptr(rt->_rt6i_peer);
117 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
119 if (!rt6_set_peer(rt, peer))
124 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
126 if (rt6_has_peer(rt))
127 return rt6_peer_ptr(rt);
129 rt6_bind_peer(rt, create);
130 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
133 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
135 return __rt6_get_peer(rt, 1);
138 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
140 struct rt6_info *rt = (struct rt6_info *)dst;
142 if (rt->rt6i_flags & RTF_CACHE)
145 return dst_cow_metrics_generic(dst, old);
148 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
152 struct in6_addr *p = &rt->rt6i_gateway;
154 if (!ipv6_addr_any(p))
155 return (const void *) p;
157 return &ipv6_hdr(skb)->daddr;
161 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
165 struct rt6_info *rt = (struct rt6_info *) dst;
168 daddr = choose_neigh_daddr(rt, skb, daddr);
169 n = __ipv6_neigh_lookup(dst->dev, daddr);
172 return neigh_create(&nd_tbl, daddr, dst->dev);
175 static struct dst_ops ip6_dst_ops_template = {
179 .check = ip6_dst_check,
180 .default_advmss = ip6_default_advmss,
182 .cow_metrics = ipv6_cow_metrics,
183 .destroy = ip6_dst_destroy,
184 .ifdown = ip6_dst_ifdown,
185 .negative_advice = ip6_negative_advice,
186 .link_failure = ip6_link_failure,
187 .update_pmtu = ip6_rt_update_pmtu,
188 .redirect = rt6_do_redirect,
189 .local_out = __ip6_local_out,
190 .neigh_lookup = ip6_neigh_lookup,
193 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
195 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
197 return mtu ? : dst->dev->mtu;
200 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
201 struct sk_buff *skb, u32 mtu)
205 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
210 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
216 static struct dst_ops ip6_dst_blackhole_ops = {
218 .destroy = ip6_dst_destroy,
219 .check = ip6_dst_check,
220 .mtu = ip6_blackhole_mtu,
221 .default_advmss = ip6_default_advmss,
222 .update_pmtu = ip6_rt_blackhole_update_pmtu,
223 .redirect = ip6_rt_blackhole_redirect,
224 .cow_metrics = ip6_rt_blackhole_cow_metrics,
225 .neigh_lookup = ip6_neigh_lookup,
228 static const u32 ip6_template_metrics[RTAX_MAX] = {
229 [RTAX_HOPLIMIT - 1] = 0,
232 static const struct rt6_info ip6_null_entry_template = {
234 .__refcnt = ATOMIC_INIT(1),
236 .obsolete = DST_OBSOLETE_FORCE_CHK,
237 .error = -ENETUNREACH,
238 .input = ip6_pkt_discard,
239 .output = ip6_pkt_discard_out,
241 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
242 .rt6i_protocol = RTPROT_KERNEL,
243 .rt6i_metric = ~(u32) 0,
244 .rt6i_ref = ATOMIC_INIT(1),
247 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
249 static const struct rt6_info ip6_prohibit_entry_template = {
251 .__refcnt = ATOMIC_INIT(1),
253 .obsolete = DST_OBSOLETE_FORCE_CHK,
255 .input = ip6_pkt_prohibit,
256 .output = ip6_pkt_prohibit_out,
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
259 .rt6i_protocol = RTPROT_KERNEL,
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
264 static const struct rt6_info ip6_blk_hole_entry_template = {
266 .__refcnt = ATOMIC_INIT(1),
268 .obsolete = DST_OBSOLETE_FORCE_CHK,
270 .input = dst_discard,
271 .output = dst_discard_sk,
273 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
274 .rt6i_protocol = RTPROT_KERNEL,
275 .rt6i_metric = ~(u32) 0,
276 .rt6i_ref = ATOMIC_INIT(1),
281 /* allocate dst with ip6_dst_ops */
282 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
283 struct net_device *dev,
285 struct fib6_table *table)
287 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
288 0, DST_OBSOLETE_FORCE_CHK, flags);
291 struct dst_entry *dst = &rt->dst;
293 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
294 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
295 INIT_LIST_HEAD(&rt->rt6i_siblings);
300 static void ip6_dst_destroy(struct dst_entry *dst)
302 struct rt6_info *rt = (struct rt6_info *)dst;
303 struct inet6_dev *idev = rt->rt6i_idev;
304 struct dst_entry *from = dst->from;
306 dst_destroy_metrics_generic(dst);
309 rt->rt6i_idev = NULL;
317 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
320 struct rt6_info *rt = (struct rt6_info *)dst;
321 struct inet6_dev *idev = rt->rt6i_idev;
322 struct net_device *loopback_dev =
323 dev_net(dev)->loopback_dev;
325 if (dev != loopback_dev) {
326 if (idev && idev->dev == dev) {
327 struct inet6_dev *loopback_idev =
328 in6_dev_get(loopback_dev);
330 rt->rt6i_idev = loopback_idev;
337 static bool rt6_check_expired(const struct rt6_info *rt)
339 if (rt->rt6i_flags & RTF_EXPIRES) {
340 if (time_after(jiffies, rt->dst.expires))
342 } else if (rt->dst.from) {
343 return rt6_check_expired((struct rt6_info *) rt->dst.from);
348 /* Multipath route selection:
349 * Hash based function using packet header and flowlabel.
350 * Adapted from fib_info_hashfn()
352 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
353 const struct flowi6 *fl6)
355 unsigned int val = fl6->flowi6_proto;
357 val ^= ipv6_addr_hash(&fl6->daddr);
358 val ^= ipv6_addr_hash(&fl6->saddr);
360 /* Work only if this not encapsulated */
361 switch (fl6->flowi6_proto) {
365 val ^= (__force u16)fl6->fl6_sport;
366 val ^= (__force u16)fl6->fl6_dport;
370 val ^= (__force u16)fl6->fl6_icmp_type;
371 val ^= (__force u16)fl6->fl6_icmp_code;
374 /* RFC6438 recommands to use flowlabel */
375 val ^= (__force u32)fl6->flowlabel;
377 /* Perhaps, we need to tune, this function? */
378 val = val ^ (val >> 7) ^ (val >> 12);
379 return val % candidate_count;
382 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
383 struct flowi6 *fl6, int oif,
386 struct rt6_info *sibling, *next_sibling;
389 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
390 /* Don't change the route, if route_choosen == 0
391 * (siblings does not include ourself)
394 list_for_each_entry_safe(sibling, next_sibling,
395 &match->rt6i_siblings, rt6i_siblings) {
397 if (route_choosen == 0) {
398 if (rt6_score_route(sibling, oif, strict) < 0)
408 * Route lookup. Any table->tb6_lock is implied.
411 static inline struct rt6_info *rt6_device_match(struct net *net,
413 const struct in6_addr *saddr,
417 struct rt6_info *local = NULL;
418 struct rt6_info *sprt;
420 if (!oif && ipv6_addr_any(saddr))
423 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
424 struct net_device *dev = sprt->dst.dev;
427 if (dev->ifindex == oif)
429 if (dev->flags & IFF_LOOPBACK) {
430 if (!sprt->rt6i_idev ||
431 sprt->rt6i_idev->dev->ifindex != oif) {
432 if (flags & RT6_LOOKUP_F_IFACE && oif)
434 if (local && (!oif ||
435 local->rt6i_idev->dev->ifindex == oif))
441 if (ipv6_chk_addr(net, saddr, dev,
442 flags & RT6_LOOKUP_F_IFACE))
451 if (flags & RT6_LOOKUP_F_IFACE)
452 return net->ipv6.ip6_null_entry;
458 #ifdef CONFIG_IPV6_ROUTER_PREF
459 struct __rt6_probe_work {
460 struct work_struct work;
461 struct in6_addr target;
462 struct net_device *dev;
465 static void rt6_probe_deferred(struct work_struct *w)
467 struct in6_addr mcaddr;
468 struct __rt6_probe_work *work =
469 container_of(w, struct __rt6_probe_work, work);
471 addrconf_addr_solict_mult(&work->target, &mcaddr);
472 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
477 static void rt6_probe(struct rt6_info *rt)
479 struct neighbour *neigh;
481 * Okay, this does not seem to be appropriate
482 * for now, however, we need to check if it
483 * is really so; aka Router Reachability Probing.
485 * Router Reachability Probe MUST be rate-limited
486 * to no more than one per minute.
488 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
491 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
493 write_lock(&neigh->lock);
494 if (neigh->nud_state & NUD_VALID)
499 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
500 struct __rt6_probe_work *work;
502 work = kmalloc(sizeof(*work), GFP_ATOMIC);
505 __neigh_set_probe_once(neigh);
508 write_unlock(&neigh->lock);
511 INIT_WORK(&work->work, rt6_probe_deferred);
512 work->target = rt->rt6i_gateway;
513 dev_hold(rt->dst.dev);
514 work->dev = rt->dst.dev;
515 schedule_work(&work->work);
519 write_unlock(&neigh->lock);
521 rcu_read_unlock_bh();
524 static inline void rt6_probe(struct rt6_info *rt)
530 * Default Router Selection (RFC 2461 6.3.6)
532 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
534 struct net_device *dev = rt->dst.dev;
535 if (!oif || dev->ifindex == oif)
537 if ((dev->flags & IFF_LOOPBACK) &&
538 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
545 struct neighbour *neigh;
546 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
548 if (rt->rt6i_flags & RTF_NONEXTHOP ||
549 !(rt->rt6i_flags & RTF_GATEWAY))
550 return RT6_NUD_SUCCEED;
553 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
555 read_lock(&neigh->lock);
556 if (neigh->nud_state & NUD_VALID)
557 ret = RT6_NUD_SUCCEED;
558 #ifdef CONFIG_IPV6_ROUTER_PREF
559 else if (!(neigh->nud_state & NUD_FAILED))
560 ret = RT6_NUD_SUCCEED;
562 ret = RT6_NUD_FAIL_PROBE;
564 read_unlock(&neigh->lock);
566 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
567 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
569 rcu_read_unlock_bh();
574 static int rt6_score_route(struct rt6_info *rt, int oif,
579 m = rt6_check_dev(rt, oif);
580 if (!m && (strict & RT6_LOOKUP_F_IFACE))
581 return RT6_NUD_FAIL_HARD;
582 #ifdef CONFIG_IPV6_ROUTER_PREF
583 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
585 if (strict & RT6_LOOKUP_F_REACHABLE) {
586 int n = rt6_check_neigh(rt);
593 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
594 int *mpri, struct rt6_info *match,
598 bool match_do_rr = false;
600 if (rt6_check_expired(rt))
603 m = rt6_score_route(rt, oif, strict);
604 if (m == RT6_NUD_FAIL_DO_RR) {
606 m = 0; /* lowest valid score */
607 } else if (m == RT6_NUD_FAIL_HARD) {
611 if (strict & RT6_LOOKUP_F_REACHABLE)
614 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
616 *do_rr = match_do_rr;
624 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
625 struct rt6_info *rr_head,
626 u32 metric, int oif, int strict,
629 struct rt6_info *rt, *match, *cont;
634 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
635 if (rt->rt6i_metric != metric) {
640 match = find_match(rt, oif, strict, &mpri, match, do_rr);
643 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
644 if (rt->rt6i_metric != metric) {
649 match = find_match(rt, oif, strict, &mpri, match, do_rr);
655 for (rt = cont; rt; rt = rt->dst.rt6_next)
656 match = find_match(rt, oif, strict, &mpri, match, do_rr);
661 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
663 struct rt6_info *match, *rt0;
669 fn->rr_ptr = rt0 = fn->leaf;
671 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
675 struct rt6_info *next = rt0->dst.rt6_next;
677 /* no entries matched; do round-robin */
678 if (!next || next->rt6i_metric != rt0->rt6i_metric)
685 net = dev_net(rt0->dst.dev);
686 return match ? match : net->ipv6.ip6_null_entry;
689 #ifdef CONFIG_IPV6_ROUTE_INFO
690 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
691 const struct in6_addr *gwaddr)
693 struct net *net = dev_net(dev);
694 struct route_info *rinfo = (struct route_info *) opt;
695 struct in6_addr prefix_buf, *prefix;
697 unsigned long lifetime;
700 if (len < sizeof(struct route_info)) {
704 /* Sanity check for prefix_len and length */
705 if (rinfo->length > 3) {
707 } else if (rinfo->prefix_len > 128) {
709 } else if (rinfo->prefix_len > 64) {
710 if (rinfo->length < 2) {
713 } else if (rinfo->prefix_len > 0) {
714 if (rinfo->length < 1) {
719 pref = rinfo->route_pref;
720 if (pref == ICMPV6_ROUTER_PREF_INVALID)
723 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
725 if (rinfo->length == 3)
726 prefix = (struct in6_addr *)rinfo->prefix;
728 /* this function is safe */
729 ipv6_addr_prefix(&prefix_buf,
730 (struct in6_addr *)rinfo->prefix,
732 prefix = &prefix_buf;
735 if (rinfo->prefix_len == 0)
736 rt = rt6_get_dflt_router(gwaddr, dev);
738 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
739 gwaddr, dev->ifindex);
741 if (rt && !lifetime) {
747 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
750 rt->rt6i_flags = RTF_ROUTEINFO |
751 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
754 if (!addrconf_finite_timeout(lifetime))
755 rt6_clean_expires(rt);
757 rt6_set_expires(rt, jiffies + HZ * lifetime);
765 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
766 struct in6_addr *saddr)
768 struct fib6_node *pn;
770 if (fn->fn_flags & RTN_TL_ROOT)
773 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
774 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
777 if (fn->fn_flags & RTN_RTINFO)
782 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
783 struct fib6_table *table,
784 struct flowi6 *fl6, int flags)
786 struct fib6_node *fn;
789 read_lock_bh(&table->tb6_lock);
790 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
793 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
794 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
795 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
796 if (rt == net->ipv6.ip6_null_entry) {
797 fn = fib6_backtrack(fn, &fl6->saddr);
801 dst_use(&rt->dst, jiffies);
802 read_unlock_bh(&table->tb6_lock);
807 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
810 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
812 EXPORT_SYMBOL_GPL(ip6_route_lookup);
814 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
815 const struct in6_addr *saddr, int oif, int strict)
817 struct flowi6 fl6 = {
821 struct dst_entry *dst;
822 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
825 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
826 flags |= RT6_LOOKUP_F_HAS_SADDR;
829 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
831 return (struct rt6_info *) dst;
837 EXPORT_SYMBOL(rt6_lookup);
839 /* ip6_ins_rt is called with FREE table->tb6_lock.
840 It takes new route entry, the addition fails by any reason the
841 route is freed. In any case, if caller does not hold it, it may
845 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
846 struct mx6_config *mxc)
849 struct fib6_table *table;
851 table = rt->rt6i_table;
852 write_lock_bh(&table->tb6_lock);
853 err = fib6_add(&table->tb6_root, rt, info, mxc);
854 write_unlock_bh(&table->tb6_lock);
859 int ip6_ins_rt(struct rt6_info *rt)
861 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
862 struct mx6_config mxc = { .mx = NULL, };
864 return __ip6_ins_rt(rt, &info, &mxc);
867 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
868 const struct in6_addr *daddr,
869 const struct in6_addr *saddr)
877 rt = ip6_rt_copy(ort, daddr);
880 if (ort->rt6i_dst.plen != 128 &&
881 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
882 rt->rt6i_flags |= RTF_ANYCAST;
884 rt->rt6i_flags |= RTF_CACHE;
886 #ifdef CONFIG_IPV6_SUBTREES
887 if (rt->rt6i_src.plen && saddr) {
888 rt->rt6i_src.addr = *saddr;
889 rt->rt6i_src.plen = 128;
897 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
898 const struct in6_addr *daddr)
900 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
903 rt->rt6i_flags |= RTF_CACHE;
907 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
908 struct flowi6 *fl6, int flags)
910 struct fib6_node *fn, *saved_fn;
911 struct rt6_info *rt, *nrt;
916 strict |= flags & RT6_LOOKUP_F_IFACE;
917 if (net->ipv6.devconf_all->forwarding == 0)
918 strict |= RT6_LOOKUP_F_REACHABLE;
920 redo_fib6_lookup_lock:
921 read_lock_bh(&table->tb6_lock);
923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
927 rt = rt6_select(fn, oif, strict);
928 if (rt->rt6i_nsiblings)
929 rt = rt6_multipath_select(rt, fl6, oif, strict);
930 if (rt == net->ipv6.ip6_null_entry) {
931 fn = fib6_backtrack(fn, &fl6->saddr);
933 goto redo_rt6_select;
934 else if (strict & RT6_LOOKUP_F_REACHABLE) {
935 /* also consider unreachable route */
936 strict &= ~RT6_LOOKUP_F_REACHABLE;
938 goto redo_rt6_select;
941 read_unlock_bh(&table->tb6_lock);
947 read_unlock_bh(&table->tb6_lock);
949 if (rt->rt6i_flags & RTF_CACHE)
952 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
953 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
954 else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
955 nrt = rt6_alloc_clone(rt, &fl6->daddr);
960 rt = nrt ? : net->ipv6.ip6_null_entry;
964 err = ip6_ins_rt(nrt);
973 * Race condition! In the gap, when table->tb6_lock was
974 * released someone could insert this route. Relookup.
977 goto redo_fib6_lookup_lock;
980 rt6_dst_from_metrics_check(rt);
981 rt->dst.lastuse = jiffies;
987 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
988 struct flowi6 *fl6, int flags)
990 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
993 static struct dst_entry *ip6_route_input_lookup(struct net *net,
994 struct net_device *dev,
995 struct flowi6 *fl6, int flags)
997 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
998 flags |= RT6_LOOKUP_F_IFACE;
1000 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1003 void ip6_route_input(struct sk_buff *skb)
1005 const struct ipv6hdr *iph = ipv6_hdr(skb);
1006 struct net *net = dev_net(skb->dev);
1007 int flags = RT6_LOOKUP_F_HAS_SADDR;
1008 struct flowi6 fl6 = {
1009 .flowi6_iif = skb->dev->ifindex,
1010 .daddr = iph->daddr,
1011 .saddr = iph->saddr,
1012 .flowlabel = ip6_flowinfo(iph),
1013 .flowi6_mark = skb->mark,
1014 .flowi6_proto = iph->nexthdr,
1017 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1020 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1021 struct flowi6 *fl6, int flags)
1023 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1026 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1031 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1033 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1034 flags |= RT6_LOOKUP_F_IFACE;
1036 if (!ipv6_addr_any(&fl6->saddr))
1037 flags |= RT6_LOOKUP_F_HAS_SADDR;
1039 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1041 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1043 EXPORT_SYMBOL(ip6_route_output);
1045 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1047 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1048 struct dst_entry *new = NULL;
1050 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1054 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1055 rt6_init_peer(rt, net->ipv6.peers);
1058 new->input = dst_discard;
1059 new->output = dst_discard_sk;
1061 if (dst_metrics_read_only(&ort->dst))
1062 new->_metrics = ort->dst._metrics;
1064 dst_copy_metrics(new, &ort->dst);
1065 rt->rt6i_idev = ort->rt6i_idev;
1067 in6_dev_hold(rt->rt6i_idev);
1069 rt->rt6i_gateway = ort->rt6i_gateway;
1070 rt->rt6i_flags = ort->rt6i_flags;
1071 rt->rt6i_metric = 0;
1073 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074 #ifdef CONFIG_IPV6_SUBTREES
1075 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1081 dst_release(dst_orig);
1082 return new ? new : ERR_PTR(-ENOMEM);
1086 * Destination cache support functions
1089 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1092 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1093 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1096 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1098 struct rt6_info *rt;
1100 rt = (struct rt6_info *) dst;
1102 /* All IPV6 dsts are created with ->obsolete set to the value
1103 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1104 * into this function always.
1106 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1109 if (rt6_check_expired(rt))
1112 rt6_dst_from_metrics_check(rt);
1117 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1119 struct rt6_info *rt = (struct rt6_info *) dst;
1122 if (rt->rt6i_flags & RTF_CACHE) {
1123 if (rt6_check_expired(rt)) {
1135 static void ip6_link_failure(struct sk_buff *skb)
1137 struct rt6_info *rt;
1139 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1141 rt = (struct rt6_info *) skb_dst(skb);
1143 if (rt->rt6i_flags & RTF_CACHE) {
1147 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1148 rt->rt6i_node->fn_sernum = -1;
1153 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154 struct sk_buff *skb, u32 mtu)
1156 struct rt6_info *rt6 = (struct rt6_info *)dst;
1159 if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1160 struct net *net = dev_net(dst->dev);
1162 rt6->rt6i_flags |= RTF_MODIFIED;
1163 if (mtu < IPV6_MIN_MTU)
1166 rt6->rt6i_pmtu = mtu;
1167 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1171 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1174 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1175 struct dst_entry *dst;
1178 memset(&fl6, 0, sizeof(fl6));
1179 fl6.flowi6_oif = oif;
1180 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1181 fl6.daddr = iph->daddr;
1182 fl6.saddr = iph->saddr;
1183 fl6.flowlabel = ip6_flowinfo(iph);
1185 dst = ip6_route_output(net, NULL, &fl6);
1187 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1190 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1192 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1194 ip6_update_pmtu(skb, sock_net(sk), mtu,
1195 sk->sk_bound_dev_if, sk->sk_mark);
1197 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1199 /* Handle redirects */
1200 struct ip6rd_flowi {
1202 struct in6_addr gateway;
1205 static struct rt6_info *__ip6_route_redirect(struct net *net,
1206 struct fib6_table *table,
1210 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1211 struct rt6_info *rt;
1212 struct fib6_node *fn;
1214 /* Get the "current" route for this destination and
1215 * check if the redirect has come from approriate router.
1217 * RFC 4861 specifies that redirects should only be
1218 * accepted if they come from the nexthop to the target.
1219 * Due to the way the routes are chosen, this notion
1220 * is a bit fuzzy and one might need to check all possible
1224 read_lock_bh(&table->tb6_lock);
1225 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1227 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1228 if (rt6_check_expired(rt))
1232 if (!(rt->rt6i_flags & RTF_GATEWAY))
1234 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1236 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1242 rt = net->ipv6.ip6_null_entry;
1243 else if (rt->dst.error) {
1244 rt = net->ipv6.ip6_null_entry;
1248 if (rt == net->ipv6.ip6_null_entry) {
1249 fn = fib6_backtrack(fn, &fl6->saddr);
1257 read_unlock_bh(&table->tb6_lock);
1262 static struct dst_entry *ip6_route_redirect(struct net *net,
1263 const struct flowi6 *fl6,
1264 const struct in6_addr *gateway)
1266 int flags = RT6_LOOKUP_F_HAS_SADDR;
1267 struct ip6rd_flowi rdfl;
1270 rdfl.gateway = *gateway;
1272 return fib6_rule_lookup(net, &rdfl.fl6,
1273 flags, __ip6_route_redirect);
1276 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1278 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1279 struct dst_entry *dst;
1282 memset(&fl6, 0, sizeof(fl6));
1283 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1284 fl6.flowi6_oif = oif;
1285 fl6.flowi6_mark = mark;
1286 fl6.daddr = iph->daddr;
1287 fl6.saddr = iph->saddr;
1288 fl6.flowlabel = ip6_flowinfo(iph);
1290 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1291 rt6_do_redirect(dst, NULL, skb);
1294 EXPORT_SYMBOL_GPL(ip6_redirect);
1296 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1299 const struct ipv6hdr *iph = ipv6_hdr(skb);
1300 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1301 struct dst_entry *dst;
1304 memset(&fl6, 0, sizeof(fl6));
1305 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1306 fl6.flowi6_oif = oif;
1307 fl6.flowi6_mark = mark;
1308 fl6.daddr = msg->dest;
1309 fl6.saddr = iph->daddr;
1311 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1312 rt6_do_redirect(dst, NULL, skb);
1316 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1318 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1320 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1322 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1324 struct net_device *dev = dst->dev;
1325 unsigned int mtu = dst_mtu(dst);
1326 struct net *net = dev_net(dev);
1328 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1330 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1331 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1334 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1335 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1336 * IPV6_MAXPLEN is also valid and means: "any MSS,
1337 * rely only on pmtu discovery"
1339 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1344 static unsigned int ip6_mtu(const struct dst_entry *dst)
1346 const struct rt6_info *rt = (const struct rt6_info *)dst;
1347 unsigned int mtu = rt->rt6i_pmtu;
1348 struct inet6_dev *idev;
1353 mtu = dst_metric_raw(dst, RTAX_MTU);
1360 idev = __in6_dev_get(dst->dev);
1362 mtu = idev->cnf.mtu6;
1366 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1369 static struct dst_entry *icmp6_dst_gc_list;
1370 static DEFINE_SPINLOCK(icmp6_dst_lock);
1372 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1375 struct dst_entry *dst;
1376 struct rt6_info *rt;
1377 struct inet6_dev *idev = in6_dev_get(dev);
1378 struct net *net = dev_net(dev);
1380 if (unlikely(!idev))
1381 return ERR_PTR(-ENODEV);
1383 rt = ip6_dst_alloc(net, dev, 0, NULL);
1384 if (unlikely(!rt)) {
1386 dst = ERR_PTR(-ENOMEM);
1390 rt->dst.flags |= DST_HOST;
1391 rt->dst.output = ip6_output;
1392 atomic_set(&rt->dst.__refcnt, 1);
1393 rt->rt6i_gateway = fl6->daddr;
1394 rt->rt6i_dst.addr = fl6->daddr;
1395 rt->rt6i_dst.plen = 128;
1396 rt->rt6i_idev = idev;
1397 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1399 spin_lock_bh(&icmp6_dst_lock);
1400 rt->dst.next = icmp6_dst_gc_list;
1401 icmp6_dst_gc_list = &rt->dst;
1402 spin_unlock_bh(&icmp6_dst_lock);
1404 fib6_force_start_gc(net);
1406 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1412 int icmp6_dst_gc(void)
1414 struct dst_entry *dst, **pprev;
1417 spin_lock_bh(&icmp6_dst_lock);
1418 pprev = &icmp6_dst_gc_list;
1420 while ((dst = *pprev) != NULL) {
1421 if (!atomic_read(&dst->__refcnt)) {
1430 spin_unlock_bh(&icmp6_dst_lock);
1435 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1438 struct dst_entry *dst, **pprev;
1440 spin_lock_bh(&icmp6_dst_lock);
1441 pprev = &icmp6_dst_gc_list;
1442 while ((dst = *pprev) != NULL) {
1443 struct rt6_info *rt = (struct rt6_info *) dst;
1444 if (func(rt, arg)) {
1451 spin_unlock_bh(&icmp6_dst_lock);
1454 static int ip6_dst_gc(struct dst_ops *ops)
1456 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1457 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1458 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1459 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1460 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1461 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1464 entries = dst_entries_get_fast(ops);
1465 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1466 entries <= rt_max_size)
1469 net->ipv6.ip6_rt_gc_expire++;
1470 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1471 entries = dst_entries_get_slow(ops);
1472 if (entries < ops->gc_thresh)
1473 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1475 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1476 return entries > rt_max_size;
1479 static int ip6_convert_metrics(struct mx6_config *mxc,
1480 const struct fib6_config *cfg)
1489 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1493 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1494 int type = nla_type(nla);
1499 if (unlikely(type > RTAX_MAX))
1501 if (type == RTAX_CC_ALGO) {
1502 char tmp[TCP_CA_NAME_MAX];
1504 nla_strlcpy(tmp, nla, sizeof(tmp));
1505 val = tcp_ca_get_key_by_name(tmp);
1506 if (val == TCP_CA_UNSPEC)
1509 val = nla_get_u32(nla);
1513 __set_bit(type - 1, mxc->mx_valid);
1525 int ip6_route_add(struct fib6_config *cfg)
1528 struct net *net = cfg->fc_nlinfo.nl_net;
1529 struct rt6_info *rt = NULL;
1530 struct net_device *dev = NULL;
1531 struct inet6_dev *idev = NULL;
1532 struct fib6_table *table;
1533 struct mx6_config mxc = { .mx = NULL, };
1536 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1538 #ifndef CONFIG_IPV6_SUBTREES
1539 if (cfg->fc_src_len)
1542 if (cfg->fc_ifindex) {
1544 dev = dev_get_by_index(net, cfg->fc_ifindex);
1547 idev = in6_dev_get(dev);
1552 if (cfg->fc_metric == 0)
1553 cfg->fc_metric = IP6_RT_PRIO_USER;
1556 if (cfg->fc_nlinfo.nlh &&
1557 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1558 table = fib6_get_table(net, cfg->fc_table);
1560 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1561 table = fib6_new_table(net, cfg->fc_table);
1564 table = fib6_new_table(net, cfg->fc_table);
1570 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1577 if (cfg->fc_flags & RTF_EXPIRES)
1578 rt6_set_expires(rt, jiffies +
1579 clock_t_to_jiffies(cfg->fc_expires));
1581 rt6_clean_expires(rt);
1583 if (cfg->fc_protocol == RTPROT_UNSPEC)
1584 cfg->fc_protocol = RTPROT_BOOT;
1585 rt->rt6i_protocol = cfg->fc_protocol;
1587 addr_type = ipv6_addr_type(&cfg->fc_dst);
1589 if (addr_type & IPV6_ADDR_MULTICAST)
1590 rt->dst.input = ip6_mc_input;
1591 else if (cfg->fc_flags & RTF_LOCAL)
1592 rt->dst.input = ip6_input;
1594 rt->dst.input = ip6_forward;
1596 rt->dst.output = ip6_output;
1598 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1599 rt->rt6i_dst.plen = cfg->fc_dst_len;
1600 if (rt->rt6i_dst.plen == 128) {
1601 rt->dst.flags |= DST_HOST;
1602 dst_metrics_set_force_overwrite(&rt->dst);
1605 #ifdef CONFIG_IPV6_SUBTREES
1606 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1607 rt->rt6i_src.plen = cfg->fc_src_len;
1610 rt->rt6i_metric = cfg->fc_metric;
1612 /* We cannot add true routes via loopback here,
1613 they would result in kernel looping; promote them to reject routes
1615 if ((cfg->fc_flags & RTF_REJECT) ||
1616 (dev && (dev->flags & IFF_LOOPBACK) &&
1617 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1618 !(cfg->fc_flags & RTF_LOCAL))) {
1619 /* hold loopback dev/idev if we haven't done so. */
1620 if (dev != net->loopback_dev) {
1625 dev = net->loopback_dev;
1627 idev = in6_dev_get(dev);
1633 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1634 switch (cfg->fc_type) {
1636 rt->dst.error = -EINVAL;
1637 rt->dst.output = dst_discard_sk;
1638 rt->dst.input = dst_discard;
1641 rt->dst.error = -EACCES;
1642 rt->dst.output = ip6_pkt_prohibit_out;
1643 rt->dst.input = ip6_pkt_prohibit;
1647 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1649 rt->dst.output = ip6_pkt_discard_out;
1650 rt->dst.input = ip6_pkt_discard;
1656 if (cfg->fc_flags & RTF_GATEWAY) {
1657 const struct in6_addr *gw_addr;
1660 gw_addr = &cfg->fc_gateway;
1661 rt->rt6i_gateway = *gw_addr;
1662 gwa_type = ipv6_addr_type(gw_addr);
1664 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1665 struct rt6_info *grt;
1667 /* IPv6 strictly inhibits using not link-local
1668 addresses as nexthop address.
1669 Otherwise, router will not able to send redirects.
1670 It is very good, but in some (rare!) circumstances
1671 (SIT, PtP, NBMA NOARP links) it is handy to allow
1672 some exceptions. --ANK
1675 if (!(gwa_type & IPV6_ADDR_UNICAST))
1678 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1680 err = -EHOSTUNREACH;
1684 if (dev != grt->dst.dev) {
1690 idev = grt->rt6i_idev;
1692 in6_dev_hold(grt->rt6i_idev);
1694 if (!(grt->rt6i_flags & RTF_GATEWAY))
1702 if (!dev || (dev->flags & IFF_LOOPBACK))
1710 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1711 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1715 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1716 rt->rt6i_prefsrc.plen = 128;
1718 rt->rt6i_prefsrc.plen = 0;
1720 rt->rt6i_flags = cfg->fc_flags;
1724 rt->rt6i_idev = idev;
1725 rt->rt6i_table = table;
1727 cfg->fc_nlinfo.nl_net = dev_net(dev);
1729 err = ip6_convert_metrics(&mxc, cfg);
1733 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1747 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1750 struct fib6_table *table;
1751 struct net *net = dev_net(rt->dst.dev);
1753 if (rt == net->ipv6.ip6_null_entry) {
1758 table = rt->rt6i_table;
1759 write_lock_bh(&table->tb6_lock);
1760 err = fib6_del(rt, info);
1761 write_unlock_bh(&table->tb6_lock);
1768 int ip6_del_rt(struct rt6_info *rt)
1770 struct nl_info info = {
1771 .nl_net = dev_net(rt->dst.dev),
1773 return __ip6_del_rt(rt, &info);
1776 static int ip6_route_del(struct fib6_config *cfg)
1778 struct fib6_table *table;
1779 struct fib6_node *fn;
1780 struct rt6_info *rt;
1783 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1787 read_lock_bh(&table->tb6_lock);
1789 fn = fib6_locate(&table->tb6_root,
1790 &cfg->fc_dst, cfg->fc_dst_len,
1791 &cfg->fc_src, cfg->fc_src_len);
1794 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1795 if ((rt->rt6i_flags & RTF_CACHE) &&
1796 !(cfg->fc_flags & RTF_CACHE))
1798 if (cfg->fc_ifindex &&
1800 rt->dst.dev->ifindex != cfg->fc_ifindex))
1802 if (cfg->fc_flags & RTF_GATEWAY &&
1803 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1805 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1808 read_unlock_bh(&table->tb6_lock);
1810 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1813 read_unlock_bh(&table->tb6_lock);
1818 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1820 struct net *net = dev_net(skb->dev);
1821 struct netevent_redirect netevent;
1822 struct rt6_info *rt, *nrt = NULL;
1823 struct ndisc_options ndopts;
1824 struct inet6_dev *in6_dev;
1825 struct neighbour *neigh;
1827 int optlen, on_link;
1830 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1831 optlen -= sizeof(*msg);
1834 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1838 msg = (struct rd_msg *)icmp6_hdr(skb);
1840 if (ipv6_addr_is_multicast(&msg->dest)) {
1841 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1846 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1848 } else if (ipv6_addr_type(&msg->target) !=
1849 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1850 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1854 in6_dev = __in6_dev_get(skb->dev);
1857 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1861 * The IP source address of the Redirect MUST be the same as the current
1862 * first-hop router for the specified ICMP Destination Address.
1865 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1866 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1871 if (ndopts.nd_opts_tgt_lladdr) {
1872 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1875 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1880 rt = (struct rt6_info *) dst;
1881 if (rt == net->ipv6.ip6_null_entry) {
1882 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1886 /* Redirect received -> path was valid.
1887 * Look, redirects are sent only in response to data packets,
1888 * so that this nexthop apparently is reachable. --ANK
1890 dst_confirm(&rt->dst);
1892 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1897 * We have finally decided to accept it.
1900 neigh_update(neigh, lladdr, NUD_STALE,
1901 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1902 NEIGH_UPDATE_F_OVERRIDE|
1903 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1904 NEIGH_UPDATE_F_ISROUTER))
1907 nrt = ip6_rt_copy(rt, &msg->dest);
1911 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1913 nrt->rt6i_flags &= ~RTF_GATEWAY;
1915 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1917 if (ip6_ins_rt(nrt))
1920 netevent.old = &rt->dst;
1921 netevent.new = &nrt->dst;
1922 netevent.daddr = &msg->dest;
1923 netevent.neigh = neigh;
1924 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1926 if (rt->rt6i_flags & RTF_CACHE) {
1927 rt = (struct rt6_info *) dst_clone(&rt->dst);
1932 neigh_release(neigh);
1936 * Misc support functions
1939 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1941 BUG_ON(from->dst.from);
1943 rt->rt6i_flags &= ~RTF_EXPIRES;
1944 dst_hold(&from->dst);
1945 rt->dst.from = &from->dst;
1946 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1949 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1950 const struct in6_addr *dest)
1952 struct net *net = dev_net(ort->dst.dev);
1953 struct rt6_info *rt;
1955 if (ort->rt6i_flags & RTF_CACHE)
1956 ort = (struct rt6_info *)ort->dst.from;
1958 rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1962 rt->dst.input = ort->dst.input;
1963 rt->dst.output = ort->dst.output;
1964 rt->dst.flags |= DST_HOST;
1966 rt->rt6i_dst.addr = *dest;
1967 rt->rt6i_dst.plen = 128;
1968 rt->dst.error = ort->dst.error;
1969 rt->rt6i_idev = ort->rt6i_idev;
1971 in6_dev_hold(rt->rt6i_idev);
1972 rt->dst.lastuse = jiffies;
1974 if (ort->rt6i_flags & RTF_GATEWAY)
1975 rt->rt6i_gateway = ort->rt6i_gateway;
1977 rt->rt6i_gateway = *dest;
1978 rt->rt6i_flags = ort->rt6i_flags;
1979 rt6_set_from(rt, ort);
1980 rt->rt6i_metric = 0;
1982 #ifdef CONFIG_IPV6_SUBTREES
1983 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1985 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1986 rt->rt6i_table = ort->rt6i_table;
1991 #ifdef CONFIG_IPV6_ROUTE_INFO
1992 static struct rt6_info *rt6_get_route_info(struct net *net,
1993 const struct in6_addr *prefix, int prefixlen,
1994 const struct in6_addr *gwaddr, int ifindex)
1996 struct fib6_node *fn;
1997 struct rt6_info *rt = NULL;
1998 struct fib6_table *table;
2000 table = fib6_get_table(net, RT6_TABLE_INFO);
2004 read_lock_bh(&table->tb6_lock);
2005 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2009 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2010 if (rt->dst.dev->ifindex != ifindex)
2012 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2014 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2020 read_unlock_bh(&table->tb6_lock);
2024 static struct rt6_info *rt6_add_route_info(struct net *net,
2025 const struct in6_addr *prefix, int prefixlen,
2026 const struct in6_addr *gwaddr, int ifindex,
2029 struct fib6_config cfg = {
2030 .fc_table = RT6_TABLE_INFO,
2031 .fc_metric = IP6_RT_PRIO_USER,
2032 .fc_ifindex = ifindex,
2033 .fc_dst_len = prefixlen,
2034 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2035 RTF_UP | RTF_PREF(pref),
2036 .fc_nlinfo.portid = 0,
2037 .fc_nlinfo.nlh = NULL,
2038 .fc_nlinfo.nl_net = net,
2041 cfg.fc_dst = *prefix;
2042 cfg.fc_gateway = *gwaddr;
2044 /* We should treat it as a default route if prefix length is 0. */
2046 cfg.fc_flags |= RTF_DEFAULT;
2048 ip6_route_add(&cfg);
2050 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2054 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2056 struct rt6_info *rt;
2057 struct fib6_table *table;
2059 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2063 read_lock_bh(&table->tb6_lock);
2064 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2065 if (dev == rt->dst.dev &&
2066 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2067 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2072 read_unlock_bh(&table->tb6_lock);
2076 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2077 struct net_device *dev,
2080 struct fib6_config cfg = {
2081 .fc_table = RT6_TABLE_DFLT,
2082 .fc_metric = IP6_RT_PRIO_USER,
2083 .fc_ifindex = dev->ifindex,
2084 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2085 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2086 .fc_nlinfo.portid = 0,
2087 .fc_nlinfo.nlh = NULL,
2088 .fc_nlinfo.nl_net = dev_net(dev),
2091 cfg.fc_gateway = *gwaddr;
2093 ip6_route_add(&cfg);
2095 return rt6_get_dflt_router(gwaddr, dev);
2098 void rt6_purge_dflt_routers(struct net *net)
2100 struct rt6_info *rt;
2101 struct fib6_table *table;
2103 /* NOTE: Keep consistent with rt6_get_dflt_router */
2104 table = fib6_get_table(net, RT6_TABLE_DFLT);
2109 read_lock_bh(&table->tb6_lock);
2110 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2111 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2112 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2114 read_unlock_bh(&table->tb6_lock);
2119 read_unlock_bh(&table->tb6_lock);
2122 static void rtmsg_to_fib6_config(struct net *net,
2123 struct in6_rtmsg *rtmsg,
2124 struct fib6_config *cfg)
2126 memset(cfg, 0, sizeof(*cfg));
2128 cfg->fc_table = RT6_TABLE_MAIN;
2129 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2130 cfg->fc_metric = rtmsg->rtmsg_metric;
2131 cfg->fc_expires = rtmsg->rtmsg_info;
2132 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2133 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2134 cfg->fc_flags = rtmsg->rtmsg_flags;
2136 cfg->fc_nlinfo.nl_net = net;
2138 cfg->fc_dst = rtmsg->rtmsg_dst;
2139 cfg->fc_src = rtmsg->rtmsg_src;
2140 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2143 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2145 struct fib6_config cfg;
2146 struct in6_rtmsg rtmsg;
2150 case SIOCADDRT: /* Add a route */
2151 case SIOCDELRT: /* Delete a route */
2152 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2154 err = copy_from_user(&rtmsg, arg,
2155 sizeof(struct in6_rtmsg));
2159 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2164 err = ip6_route_add(&cfg);
2167 err = ip6_route_del(&cfg);
2181 * Drop the packet on the floor
2184 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2187 struct dst_entry *dst = skb_dst(skb);
2188 switch (ipstats_mib_noroutes) {
2189 case IPSTATS_MIB_INNOROUTES:
2190 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2191 if (type == IPV6_ADDR_ANY) {
2192 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2193 IPSTATS_MIB_INADDRERRORS);
2197 case IPSTATS_MIB_OUTNOROUTES:
2198 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2199 ipstats_mib_noroutes);
2202 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2207 static int ip6_pkt_discard(struct sk_buff *skb)
2209 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2212 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2214 skb->dev = skb_dst(skb)->dev;
2215 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2218 static int ip6_pkt_prohibit(struct sk_buff *skb)
2220 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2223 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2225 skb->dev = skb_dst(skb)->dev;
2226 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2230 * Allocate a dst for local (unicast / anycast) address.
2233 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2234 const struct in6_addr *addr,
2237 struct net *net = dev_net(idev->dev);
2238 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2241 return ERR_PTR(-ENOMEM);
2245 rt->dst.flags |= DST_HOST;
2246 rt->dst.input = ip6_input;
2247 rt->dst.output = ip6_output;
2248 rt->rt6i_idev = idev;
2250 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2252 rt->rt6i_flags |= RTF_ANYCAST;
2254 rt->rt6i_flags |= RTF_LOCAL;
2256 rt->rt6i_gateway = *addr;
2257 rt->rt6i_dst.addr = *addr;
2258 rt->rt6i_dst.plen = 128;
2259 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2261 atomic_set(&rt->dst.__refcnt, 1);
2266 int ip6_route_get_saddr(struct net *net,
2267 struct rt6_info *rt,
2268 const struct in6_addr *daddr,
2270 struct in6_addr *saddr)
2272 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2274 if (rt->rt6i_prefsrc.plen)
2275 *saddr = rt->rt6i_prefsrc.addr;
2277 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2278 daddr, prefs, saddr);
2282 /* remove deleted ip from prefsrc entries */
2283 struct arg_dev_net_ip {
2284 struct net_device *dev;
2286 struct in6_addr *addr;
2289 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2291 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2292 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2293 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2295 if (((void *)rt->dst.dev == dev || !dev) &&
2296 rt != net->ipv6.ip6_null_entry &&
2297 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2298 /* remove prefsrc entry */
2299 rt->rt6i_prefsrc.plen = 0;
2304 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2306 struct net *net = dev_net(ifp->idev->dev);
2307 struct arg_dev_net_ip adni = {
2308 .dev = ifp->idev->dev,
2312 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2315 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2316 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2318 /* Remove routers and update dst entries when gateway turn into host. */
2319 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2321 struct in6_addr *gateway = (struct in6_addr *)arg;
2323 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2324 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2325 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2331 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2333 fib6_clean_all(net, fib6_clean_tohost, gateway);
2336 struct arg_dev_net {
2337 struct net_device *dev;
2341 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2343 const struct arg_dev_net *adn = arg;
2344 const struct net_device *dev = adn->dev;
2346 if ((rt->dst.dev == dev || !dev) &&
2347 rt != adn->net->ipv6.ip6_null_entry)
2353 void rt6_ifdown(struct net *net, struct net_device *dev)
2355 struct arg_dev_net adn = {
2360 fib6_clean_all(net, fib6_ifdown, &adn);
2361 icmp6_clean_all(fib6_ifdown, &adn);
2364 struct rt6_mtu_change_arg {
2365 struct net_device *dev;
2369 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2371 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2372 struct inet6_dev *idev;
2374 /* In IPv6 pmtu discovery is not optional,
2375 so that RTAX_MTU lock cannot disable it.
2376 We still use this lock to block changes
2377 caused by addrconf/ndisc.
2380 idev = __in6_dev_get(arg->dev);
2384 /* For administrative MTU increase, there is no way to discover
2385 IPv6 PMTU increase, so PMTU increase should be updated here.
2386 Since RFC 1981 doesn't include administrative MTU increase
2387 update PMTU increase is a MUST. (i.e. jumbo frame)
2390 If new MTU is less than route PMTU, this new MTU will be the
2391 lowest MTU in the path, update the route PMTU to reflect PMTU
2392 decreases; if new MTU is greater than route PMTU, and the
2393 old MTU is the lowest MTU in the path, update the route PMTU
2394 to reflect the increase. In this case if the other nodes' MTU
2395 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2398 if (rt->dst.dev == arg->dev &&
2399 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2400 if (rt->rt6i_flags & RTF_CACHE) {
2401 /* For RTF_CACHE with rt6i_pmtu == 0
2402 * (i.e. a redirected route),
2403 * the metrics of its rt->dst.from has already
2406 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2407 rt->rt6i_pmtu = arg->mtu;
2408 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2409 (dst_mtu(&rt->dst) < arg->mtu &&
2410 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2411 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2417 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2419 struct rt6_mtu_change_arg arg = {
2424 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2427 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2428 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2429 [RTA_OIF] = { .type = NLA_U32 },
2430 [RTA_IIF] = { .type = NLA_U32 },
2431 [RTA_PRIORITY] = { .type = NLA_U32 },
2432 [RTA_METRICS] = { .type = NLA_NESTED },
2433 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2434 [RTA_PREF] = { .type = NLA_U8 },
2437 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2438 struct fib6_config *cfg)
2441 struct nlattr *tb[RTA_MAX+1];
2445 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2450 rtm = nlmsg_data(nlh);
2451 memset(cfg, 0, sizeof(*cfg));
2453 cfg->fc_table = rtm->rtm_table;
2454 cfg->fc_dst_len = rtm->rtm_dst_len;
2455 cfg->fc_src_len = rtm->rtm_src_len;
2456 cfg->fc_flags = RTF_UP;
2457 cfg->fc_protocol = rtm->rtm_protocol;
2458 cfg->fc_type = rtm->rtm_type;
2460 if (rtm->rtm_type == RTN_UNREACHABLE ||
2461 rtm->rtm_type == RTN_BLACKHOLE ||
2462 rtm->rtm_type == RTN_PROHIBIT ||
2463 rtm->rtm_type == RTN_THROW)
2464 cfg->fc_flags |= RTF_REJECT;
2466 if (rtm->rtm_type == RTN_LOCAL)
2467 cfg->fc_flags |= RTF_LOCAL;
2469 if (rtm->rtm_flags & RTM_F_CLONED)
2470 cfg->fc_flags |= RTF_CACHE;
2472 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2473 cfg->fc_nlinfo.nlh = nlh;
2474 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2476 if (tb[RTA_GATEWAY]) {
2477 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2478 cfg->fc_flags |= RTF_GATEWAY;
2482 int plen = (rtm->rtm_dst_len + 7) >> 3;
2484 if (nla_len(tb[RTA_DST]) < plen)
2487 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2491 int plen = (rtm->rtm_src_len + 7) >> 3;
2493 if (nla_len(tb[RTA_SRC]) < plen)
2496 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2499 if (tb[RTA_PREFSRC])
2500 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2503 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2505 if (tb[RTA_PRIORITY])
2506 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2508 if (tb[RTA_METRICS]) {
2509 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2510 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2514 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2516 if (tb[RTA_MULTIPATH]) {
2517 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2518 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2522 pref = nla_get_u8(tb[RTA_PREF]);
2523 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2524 pref != ICMPV6_ROUTER_PREF_HIGH)
2525 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2526 cfg->fc_flags |= RTF_PREF(pref);
2534 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2536 struct fib6_config r_cfg;
2537 struct rtnexthop *rtnh;
2540 int err = 0, last_err = 0;
2543 rtnh = (struct rtnexthop *)cfg->fc_mp;
2544 remaining = cfg->fc_mp_len;
2546 /* Parse a Multipath Entry */
2547 while (rtnh_ok(rtnh, remaining)) {
2548 memcpy(&r_cfg, cfg, sizeof(*cfg));
2549 if (rtnh->rtnh_ifindex)
2550 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2552 attrlen = rtnh_attrlen(rtnh);
2554 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2556 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2558 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2559 r_cfg.fc_flags |= RTF_GATEWAY;
2562 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2565 /* If we are trying to remove a route, do not stop the
2566 * loop when ip6_route_del() fails (because next hop is
2567 * already gone), we should try to remove all next hops.
2570 /* If add fails, we should try to delete all
2571 * next hops that have been already added.
2577 /* Because each route is added like a single route we remove
2578 * this flag after the first nexthop (if there is a collision,
2579 * we have already fail to add the first nexthop:
2580 * fib6_add_rt2node() has reject it).
2582 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2583 rtnh = rtnh_next(rtnh, &remaining);
2589 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2591 struct fib6_config cfg;
2594 err = rtm_to_fib6_config(skb, nlh, &cfg);
2599 return ip6_route_multipath(&cfg, 0);
2601 return ip6_route_del(&cfg);
2604 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2606 struct fib6_config cfg;
2609 err = rtm_to_fib6_config(skb, nlh, &cfg);
2614 return ip6_route_multipath(&cfg, 1);
2616 return ip6_route_add(&cfg);
2619 static inline size_t rt6_nlmsg_size(void)
2621 return NLMSG_ALIGN(sizeof(struct rtmsg))
2622 + nla_total_size(16) /* RTA_SRC */
2623 + nla_total_size(16) /* RTA_DST */
2624 + nla_total_size(16) /* RTA_GATEWAY */
2625 + nla_total_size(16) /* RTA_PREFSRC */
2626 + nla_total_size(4) /* RTA_TABLE */
2627 + nla_total_size(4) /* RTA_IIF */
2628 + nla_total_size(4) /* RTA_OIF */
2629 + nla_total_size(4) /* RTA_PRIORITY */
2630 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2631 + nla_total_size(sizeof(struct rta_cacheinfo))
2632 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2633 + nla_total_size(1); /* RTA_PREF */
2636 static int rt6_fill_node(struct net *net,
2637 struct sk_buff *skb, struct rt6_info *rt,
2638 struct in6_addr *dst, struct in6_addr *src,
2639 int iif, int type, u32 portid, u32 seq,
2640 int prefix, int nowait, unsigned int flags)
2642 u32 metrics[RTAX_MAX];
2644 struct nlmsghdr *nlh;
2648 if (prefix) { /* user wants prefix routes only */
2649 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2650 /* success since this is not a prefix route */
2655 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2659 rtm = nlmsg_data(nlh);
2660 rtm->rtm_family = AF_INET6;
2661 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2662 rtm->rtm_src_len = rt->rt6i_src.plen;
2665 table = rt->rt6i_table->tb6_id;
2667 table = RT6_TABLE_UNSPEC;
2668 rtm->rtm_table = table;
2669 if (nla_put_u32(skb, RTA_TABLE, table))
2670 goto nla_put_failure;
2671 if (rt->rt6i_flags & RTF_REJECT) {
2672 switch (rt->dst.error) {
2674 rtm->rtm_type = RTN_BLACKHOLE;
2677 rtm->rtm_type = RTN_PROHIBIT;
2680 rtm->rtm_type = RTN_THROW;
2683 rtm->rtm_type = RTN_UNREACHABLE;
2687 else if (rt->rt6i_flags & RTF_LOCAL)
2688 rtm->rtm_type = RTN_LOCAL;
2689 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2690 rtm->rtm_type = RTN_LOCAL;
2692 rtm->rtm_type = RTN_UNICAST;
2694 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2695 rtm->rtm_protocol = rt->rt6i_protocol;
2696 if (rt->rt6i_flags & RTF_DYNAMIC)
2697 rtm->rtm_protocol = RTPROT_REDIRECT;
2698 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2699 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2700 rtm->rtm_protocol = RTPROT_RA;
2702 rtm->rtm_protocol = RTPROT_KERNEL;
2705 if (rt->rt6i_flags & RTF_CACHE)
2706 rtm->rtm_flags |= RTM_F_CLONED;
2709 if (nla_put_in6_addr(skb, RTA_DST, dst))
2710 goto nla_put_failure;
2711 rtm->rtm_dst_len = 128;
2712 } else if (rtm->rtm_dst_len)
2713 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2714 goto nla_put_failure;
2715 #ifdef CONFIG_IPV6_SUBTREES
2717 if (nla_put_in6_addr(skb, RTA_SRC, src))
2718 goto nla_put_failure;
2719 rtm->rtm_src_len = 128;
2720 } else if (rtm->rtm_src_len &&
2721 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2722 goto nla_put_failure;
2725 #ifdef CONFIG_IPV6_MROUTE
2726 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2727 int err = ip6mr_get_route(net, skb, rtm, nowait);
2732 goto nla_put_failure;
2734 if (err == -EMSGSIZE)
2735 goto nla_put_failure;
2740 if (nla_put_u32(skb, RTA_IIF, iif))
2741 goto nla_put_failure;
2743 struct in6_addr saddr_buf;
2744 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2745 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2746 goto nla_put_failure;
2749 if (rt->rt6i_prefsrc.plen) {
2750 struct in6_addr saddr_buf;
2751 saddr_buf = rt->rt6i_prefsrc.addr;
2752 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2753 goto nla_put_failure;
2756 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2758 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2759 if (rtnetlink_put_metrics(skb, metrics) < 0)
2760 goto nla_put_failure;
2762 if (rt->rt6i_flags & RTF_GATEWAY) {
2763 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2764 goto nla_put_failure;
2768 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2769 goto nla_put_failure;
2770 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2771 goto nla_put_failure;
2773 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2775 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2776 goto nla_put_failure;
2778 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2779 goto nla_put_failure;
2781 nlmsg_end(skb, nlh);
2785 nlmsg_cancel(skb, nlh);
2789 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2791 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2794 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2795 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2796 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2800 return rt6_fill_node(arg->net,
2801 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2802 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2803 prefix, 0, NLM_F_MULTI);
2806 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2808 struct net *net = sock_net(in_skb->sk);
2809 struct nlattr *tb[RTA_MAX+1];
2810 struct rt6_info *rt;
2811 struct sk_buff *skb;
2814 int err, iif = 0, oif = 0;
2816 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2821 memset(&fl6, 0, sizeof(fl6));
2824 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2827 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2831 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2834 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2838 iif = nla_get_u32(tb[RTA_IIF]);
2841 oif = nla_get_u32(tb[RTA_OIF]);
2844 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2847 struct net_device *dev;
2850 dev = __dev_get_by_index(net, iif);
2856 fl6.flowi6_iif = iif;
2858 if (!ipv6_addr_any(&fl6.saddr))
2859 flags |= RT6_LOOKUP_F_HAS_SADDR;
2861 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2864 fl6.flowi6_oif = oif;
2866 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2869 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2876 /* Reserve room for dummy headers, this skb can pass
2877 through good chunk of routing engine.
2879 skb_reset_mac_header(skb);
2880 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2882 skb_dst_set(skb, &rt->dst);
2884 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2885 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2886 nlh->nlmsg_seq, 0, 0, 0);
2892 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2897 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2899 struct sk_buff *skb;
2900 struct net *net = info->nl_net;
2905 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2907 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2911 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2912 event, info->portid, seq, 0, 0, 0);
2914 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2915 WARN_ON(err == -EMSGSIZE);
2919 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2920 info->nlh, gfp_any());
2924 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2927 static int ip6_route_dev_notify(struct notifier_block *this,
2928 unsigned long event, void *ptr)
2930 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2931 struct net *net = dev_net(dev);
2933 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2934 net->ipv6.ip6_null_entry->dst.dev = dev;
2935 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2936 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2937 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2938 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2939 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2940 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2951 #ifdef CONFIG_PROC_FS
2953 static const struct file_operations ipv6_route_proc_fops = {
2954 .owner = THIS_MODULE,
2955 .open = ipv6_route_open,
2957 .llseek = seq_lseek,
2958 .release = seq_release_net,
2961 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2963 struct net *net = (struct net *)seq->private;
2964 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2965 net->ipv6.rt6_stats->fib_nodes,
2966 net->ipv6.rt6_stats->fib_route_nodes,
2967 net->ipv6.rt6_stats->fib_rt_alloc,
2968 net->ipv6.rt6_stats->fib_rt_entries,
2969 net->ipv6.rt6_stats->fib_rt_cache,
2970 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2971 net->ipv6.rt6_stats->fib_discarded_routes);
2976 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2978 return single_open_net(inode, file, rt6_stats_seq_show);
2981 static const struct file_operations rt6_stats_seq_fops = {
2982 .owner = THIS_MODULE,
2983 .open = rt6_stats_seq_open,
2985 .llseek = seq_lseek,
2986 .release = single_release_net,
2988 #endif /* CONFIG_PROC_FS */
2990 #ifdef CONFIG_SYSCTL
2993 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2994 void __user *buffer, size_t *lenp, loff_t *ppos)
3001 net = (struct net *)ctl->extra1;
3002 delay = net->ipv6.sysctl.flush_delay;
3003 proc_dointvec(ctl, write, buffer, lenp, ppos);
3004 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3008 struct ctl_table ipv6_route_table_template[] = {
3010 .procname = "flush",
3011 .data = &init_net.ipv6.sysctl.flush_delay,
3012 .maxlen = sizeof(int),
3014 .proc_handler = ipv6_sysctl_rtcache_flush
3017 .procname = "gc_thresh",
3018 .data = &ip6_dst_ops_template.gc_thresh,
3019 .maxlen = sizeof(int),
3021 .proc_handler = proc_dointvec,
3024 .procname = "max_size",
3025 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3026 .maxlen = sizeof(int),
3028 .proc_handler = proc_dointvec,
3031 .procname = "gc_min_interval",
3032 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3033 .maxlen = sizeof(int),
3035 .proc_handler = proc_dointvec_jiffies,
3038 .procname = "gc_timeout",
3039 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3040 .maxlen = sizeof(int),
3042 .proc_handler = proc_dointvec_jiffies,
3045 .procname = "gc_interval",
3046 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3047 .maxlen = sizeof(int),
3049 .proc_handler = proc_dointvec_jiffies,
3052 .procname = "gc_elasticity",
3053 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3054 .maxlen = sizeof(int),
3056 .proc_handler = proc_dointvec,
3059 .procname = "mtu_expires",
3060 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3061 .maxlen = sizeof(int),
3063 .proc_handler = proc_dointvec_jiffies,
3066 .procname = "min_adv_mss",
3067 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3068 .maxlen = sizeof(int),
3070 .proc_handler = proc_dointvec,
3073 .procname = "gc_min_interval_ms",
3074 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3075 .maxlen = sizeof(int),
3077 .proc_handler = proc_dointvec_ms_jiffies,
3082 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3084 struct ctl_table *table;
3086 table = kmemdup(ipv6_route_table_template,
3087 sizeof(ipv6_route_table_template),
3091 table[0].data = &net->ipv6.sysctl.flush_delay;
3092 table[0].extra1 = net;
3093 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3094 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3095 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3096 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3097 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3098 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3099 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3100 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3101 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3103 /* Don't export sysctls to unprivileged users */
3104 if (net->user_ns != &init_user_ns)
3105 table[0].procname = NULL;
3112 static int __net_init ip6_route_net_init(struct net *net)
3116 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3117 sizeof(net->ipv6.ip6_dst_ops));
3119 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3120 goto out_ip6_dst_ops;
3122 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3123 sizeof(*net->ipv6.ip6_null_entry),
3125 if (!net->ipv6.ip6_null_entry)
3126 goto out_ip6_dst_entries;
3127 net->ipv6.ip6_null_entry->dst.path =
3128 (struct dst_entry *)net->ipv6.ip6_null_entry;
3129 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3130 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3131 ip6_template_metrics, true);
3133 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3134 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3135 sizeof(*net->ipv6.ip6_prohibit_entry),
3137 if (!net->ipv6.ip6_prohibit_entry)
3138 goto out_ip6_null_entry;
3139 net->ipv6.ip6_prohibit_entry->dst.path =
3140 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3141 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3142 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3143 ip6_template_metrics, true);
3145 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3146 sizeof(*net->ipv6.ip6_blk_hole_entry),
3148 if (!net->ipv6.ip6_blk_hole_entry)
3149 goto out_ip6_prohibit_entry;
3150 net->ipv6.ip6_blk_hole_entry->dst.path =
3151 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3152 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3153 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3154 ip6_template_metrics, true);
3157 net->ipv6.sysctl.flush_delay = 0;
3158 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3159 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3160 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3161 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3162 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3163 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3164 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3166 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3172 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3173 out_ip6_prohibit_entry:
3174 kfree(net->ipv6.ip6_prohibit_entry);
3176 kfree(net->ipv6.ip6_null_entry);
3178 out_ip6_dst_entries:
3179 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3184 static void __net_exit ip6_route_net_exit(struct net *net)
3186 kfree(net->ipv6.ip6_null_entry);
3187 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3188 kfree(net->ipv6.ip6_prohibit_entry);
3189 kfree(net->ipv6.ip6_blk_hole_entry);
3191 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3194 static int __net_init ip6_route_net_init_late(struct net *net)
3196 #ifdef CONFIG_PROC_FS
3197 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3198 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3203 static void __net_exit ip6_route_net_exit_late(struct net *net)
3205 #ifdef CONFIG_PROC_FS
3206 remove_proc_entry("ipv6_route", net->proc_net);
3207 remove_proc_entry("rt6_stats", net->proc_net);
3211 static struct pernet_operations ip6_route_net_ops = {
3212 .init = ip6_route_net_init,
3213 .exit = ip6_route_net_exit,
3216 static int __net_init ipv6_inetpeer_init(struct net *net)
3218 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3222 inet_peer_base_init(bp);
3223 net->ipv6.peers = bp;
3227 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3229 struct inet_peer_base *bp = net->ipv6.peers;
3231 net->ipv6.peers = NULL;
3232 inetpeer_invalidate_tree(bp);
3236 static struct pernet_operations ipv6_inetpeer_ops = {
3237 .init = ipv6_inetpeer_init,
3238 .exit = ipv6_inetpeer_exit,
3241 static struct pernet_operations ip6_route_net_late_ops = {
3242 .init = ip6_route_net_init_late,
3243 .exit = ip6_route_net_exit_late,
3246 static struct notifier_block ip6_route_dev_notifier = {
3247 .notifier_call = ip6_route_dev_notify,
3251 int __init ip6_route_init(void)
3256 ip6_dst_ops_template.kmem_cachep =
3257 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3258 SLAB_HWCACHE_ALIGN, NULL);
3259 if (!ip6_dst_ops_template.kmem_cachep)
3262 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3264 goto out_kmem_cache;
3266 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3268 goto out_dst_entries;
3270 ret = register_pernet_subsys(&ip6_route_net_ops);
3272 goto out_register_inetpeer;
3274 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3276 /* Registering of the loopback is done before this portion of code,
3277 * the loopback reference in rt6_info will not be taken, do it
3278 * manually for init_net */
3279 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3280 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3281 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3282 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3283 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3284 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3285 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3289 goto out_register_subsys;
3295 ret = fib6_rules_init();
3299 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3301 goto fib6_rules_init;
3304 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3305 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3306 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3307 goto out_register_late_subsys;
3309 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3311 goto out_register_late_subsys;
3316 out_register_late_subsys:
3317 unregister_pernet_subsys(&ip6_route_net_late_ops);
3319 fib6_rules_cleanup();
3324 out_register_subsys:
3325 unregister_pernet_subsys(&ip6_route_net_ops);
3326 out_register_inetpeer:
3327 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3329 dst_entries_destroy(&ip6_dst_blackhole_ops);
3331 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3335 void ip6_route_cleanup(void)
3337 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3338 unregister_pernet_subsys(&ip6_route_net_late_ops);
3339 fib6_rules_cleanup();
3342 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3343 unregister_pernet_subsys(&ip6_route_net_ops);
3344 dst_entries_destroy(&ip6_dst_blackhole_ops);
3345 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);