]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/ipv6/route.c
[IPv6] route: Fix prohibit and blackhole routing decision
[mv-sheeva.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59
60 #include <asm/uaccess.h>
61
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76
77 #define CLONE_OFFLINK_ROUTE 0
78
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void             ip6_dst_destroy(struct dst_entry *);
91 static void             ip6_dst_ifdown(struct dst_entry *,
92                                        struct net_device *dev, int how);
93 static int               ip6_dst_gc(void);
94
95 static int              ip6_pkt_discard(struct sk_buff *skb);
96 static int              ip6_pkt_discard_out(struct sk_buff *skb);
97 static int              ip6_pkt_prohibit(struct sk_buff *skb);
98 static int              ip6_pkt_prohibit_out(struct sk_buff *skb);
99 static int              ip6_pkt_blk_hole(struct sk_buff *skb);
100 static void             ip6_link_failure(struct sk_buff *skb);
101 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
102
103 #ifdef CONFIG_IPV6_ROUTE_INFO
104 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
105                                            struct in6_addr *gwaddr, int ifindex,
106                                            unsigned pref);
107 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
108                                            struct in6_addr *gwaddr, int ifindex);
109 #endif
110
111 static struct dst_ops ip6_dst_ops = {
112         .family                 =       AF_INET6,
113         .protocol               =       __constant_htons(ETH_P_IPV6),
114         .gc                     =       ip6_dst_gc,
115         .gc_thresh              =       1024,
116         .check                  =       ip6_dst_check,
117         .destroy                =       ip6_dst_destroy,
118         .ifdown                 =       ip6_dst_ifdown,
119         .negative_advice        =       ip6_negative_advice,
120         .link_failure           =       ip6_link_failure,
121         .update_pmtu            =       ip6_rt_update_pmtu,
122         .entry_size             =       sizeof(struct rt6_info),
123 };
124
125 struct rt6_info ip6_null_entry = {
126         .u = {
127                 .dst = {
128                         .__refcnt       = ATOMIC_INIT(1),
129                         .__use          = 1,
130                         .dev            = &loopback_dev,
131                         .obsolete       = -1,
132                         .error          = -ENETUNREACH,
133                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
134                         .input          = ip6_pkt_discard,
135                         .output         = ip6_pkt_discard_out,
136                         .ops            = &ip6_dst_ops,
137                         .path           = (struct dst_entry*)&ip6_null_entry,
138                 }
139         },
140         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
141         .rt6i_metric    = ~(u32) 0,
142         .rt6i_ref       = ATOMIC_INIT(1),
143 };
144
145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
147 struct rt6_info ip6_prohibit_entry = {
148         .u = {
149                 .dst = {
150                         .__refcnt       = ATOMIC_INIT(1),
151                         .__use          = 1,
152                         .dev            = &loopback_dev,
153                         .obsolete       = -1,
154                         .error          = -EACCES,
155                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
156                         .input          = ip6_pkt_prohibit,
157                         .output         = ip6_pkt_prohibit_out,
158                         .ops            = &ip6_dst_ops,
159                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
160                 }
161         },
162         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
163         .rt6i_metric    = ~(u32) 0,
164         .rt6i_ref       = ATOMIC_INIT(1),
165 };
166
167 struct rt6_info ip6_blk_hole_entry = {
168         .u = {
169                 .dst = {
170                         .__refcnt       = ATOMIC_INIT(1),
171                         .__use          = 1,
172                         .dev            = &loopback_dev,
173                         .obsolete       = -1,
174                         .error          = -EINVAL,
175                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
176                         .input          = ip6_pkt_blk_hole,
177                         .output         = ip6_pkt_blk_hole,
178                         .ops            = &ip6_dst_ops,
179                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
180                 }
181         },
182         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
183         .rt6i_metric    = ~(u32) 0,
184         .rt6i_ref       = ATOMIC_INIT(1),
185 };
186
187 #endif
188
189 /* allocate dst with ip6_dst_ops */
190 static __inline__ struct rt6_info *ip6_dst_alloc(void)
191 {
192         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 }
194
195 static void ip6_dst_destroy(struct dst_entry *dst)
196 {
197         struct rt6_info *rt = (struct rt6_info *)dst;
198         struct inet6_dev *idev = rt->rt6i_idev;
199
200         if (idev != NULL) {
201                 rt->rt6i_idev = NULL;
202                 in6_dev_put(idev);
203         }       
204 }
205
206 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207                            int how)
208 {
209         struct rt6_info *rt = (struct rt6_info *)dst;
210         struct inet6_dev *idev = rt->rt6i_idev;
211
212         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
213                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
214                 if (loopback_idev != NULL) {
215                         rt->rt6i_idev = loopback_idev;
216                         in6_dev_put(idev);
217                 }
218         }
219 }
220
221 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
222 {
223         return (rt->rt6i_flags & RTF_EXPIRES &&
224                 time_after(jiffies, rt->rt6i_expires));
225 }
226
227 static inline int rt6_need_strict(struct in6_addr *daddr)
228 {
229         return (ipv6_addr_type(daddr) &
230                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
231 }
232
233 /*
234  *      Route lookup. Any table->tb6_lock is implied.
235  */
236
237 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
238                                                     int oif,
239                                                     int strict)
240 {
241         struct rt6_info *local = NULL;
242         struct rt6_info *sprt;
243
244         if (oif) {
245                 for (sprt = rt; sprt; sprt = sprt->u.next) {
246                         struct net_device *dev = sprt->rt6i_dev;
247                         if (dev->ifindex == oif)
248                                 return sprt;
249                         if (dev->flags & IFF_LOOPBACK) {
250                                 if (sprt->rt6i_idev == NULL ||
251                                     sprt->rt6i_idev->dev->ifindex != oif) {
252                                         if (strict && oif)
253                                                 continue;
254                                         if (local && (!oif || 
255                                                       local->rt6i_idev->dev->ifindex == oif))
256                                                 continue;
257                                 }
258                                 local = sprt;
259                         }
260                 }
261
262                 if (local)
263                         return local;
264
265                 if (strict)
266                         return &ip6_null_entry;
267         }
268         return rt;
269 }
270
271 #ifdef CONFIG_IPV6_ROUTER_PREF
272 static void rt6_probe(struct rt6_info *rt)
273 {
274         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
275         /*
276          * Okay, this does not seem to be appropriate
277          * for now, however, we need to check if it
278          * is really so; aka Router Reachability Probing.
279          *
280          * Router Reachability Probe MUST be rate-limited
281          * to no more than one per minute.
282          */
283         if (!neigh || (neigh->nud_state & NUD_VALID))
284                 return;
285         read_lock_bh(&neigh->lock);
286         if (!(neigh->nud_state & NUD_VALID) &&
287             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
288                 struct in6_addr mcaddr;
289                 struct in6_addr *target;
290
291                 neigh->updated = jiffies;
292                 read_unlock_bh(&neigh->lock);
293
294                 target = (struct in6_addr *)&neigh->primary_key;
295                 addrconf_addr_solict_mult(target, &mcaddr);
296                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
297         } else
298                 read_unlock_bh(&neigh->lock);
299 }
300 #else
301 static inline void rt6_probe(struct rt6_info *rt)
302 {
303         return;
304 }
305 #endif
306
307 /*
308  * Default Router Selection (RFC 2461 6.3.6)
309  */
310 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
311 {
312         struct net_device *dev = rt->rt6i_dev;
313         if (!oif || dev->ifindex == oif)
314                 return 2;
315         if ((dev->flags & IFF_LOOPBACK) &&
316             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
317                 return 1;
318         return 0;
319 }
320
321 static int inline rt6_check_neigh(struct rt6_info *rt)
322 {
323         struct neighbour *neigh = rt->rt6i_nexthop;
324         int m = 0;
325         if (rt->rt6i_flags & RTF_NONEXTHOP ||
326             !(rt->rt6i_flags & RTF_GATEWAY))
327                 m = 1;
328         else if (neigh) {
329                 read_lock_bh(&neigh->lock);
330                 if (neigh->nud_state & NUD_VALID)
331                         m = 2;
332                 read_unlock_bh(&neigh->lock);
333         }
334         return m;
335 }
336
337 static int rt6_score_route(struct rt6_info *rt, int oif,
338                            int strict)
339 {
340         int m, n;
341                 
342         m = rt6_check_dev(rt, oif);
343         if (!m && (strict & RT6_LOOKUP_F_IFACE))
344                 return -1;
345 #ifdef CONFIG_IPV6_ROUTER_PREF
346         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
347 #endif
348         n = rt6_check_neigh(rt);
349         if (n > 1)
350                 m |= 16;
351         else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
352                 return -1;
353         return m;
354 }
355
356 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
357                                    int strict)
358 {
359         struct rt6_info *match = NULL, *last = NULL;
360         struct rt6_info *rt, *rt0 = *head;
361         u32 metric;
362         int mpri = -1;
363
364         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
365                   __FUNCTION__, head, head ? *head : NULL, oif);
366
367         for (rt = rt0, metric = rt0->rt6i_metric;
368              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
369              rt = rt->u.next) {
370                 int m;
371
372                 if (rt6_check_expired(rt))
373                         continue;
374
375                 last = rt;
376
377                 m = rt6_score_route(rt, oif, strict);
378                 if (m < 0)
379                         continue;
380
381                 if (m > mpri) {
382                         rt6_probe(match);
383                         match = rt;
384                         mpri = m;
385                 } else {
386                         rt6_probe(rt);
387                 }
388         }
389
390         if (!match &&
391             (strict & RT6_LOOKUP_F_REACHABLE) &&
392             last && last != rt0) {
393                 /* no entries matched; do round-robin */
394                 static DEFINE_SPINLOCK(lock);
395                 spin_lock(&lock);
396                 *head = rt0->u.next;
397                 rt0->u.next = last->u.next;
398                 last->u.next = rt0;
399                 spin_unlock(&lock);
400         }
401
402         RT6_TRACE("%s() => %p, score=%d\n",
403                   __FUNCTION__, match, mpri);
404
405         return (match ? match : &ip6_null_entry);
406 }
407
408 #ifdef CONFIG_IPV6_ROUTE_INFO
409 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
410                   struct in6_addr *gwaddr)
411 {
412         struct route_info *rinfo = (struct route_info *) opt;
413         struct in6_addr prefix_buf, *prefix;
414         unsigned int pref;
415         u32 lifetime;
416         struct rt6_info *rt;
417
418         if (len < sizeof(struct route_info)) {
419                 return -EINVAL;
420         }
421
422         /* Sanity check for prefix_len and length */
423         if (rinfo->length > 3) {
424                 return -EINVAL;
425         } else if (rinfo->prefix_len > 128) {
426                 return -EINVAL;
427         } else if (rinfo->prefix_len > 64) {
428                 if (rinfo->length < 2) {
429                         return -EINVAL;
430                 }
431         } else if (rinfo->prefix_len > 0) {
432                 if (rinfo->length < 1) {
433                         return -EINVAL;
434                 }
435         }
436
437         pref = rinfo->route_pref;
438         if (pref == ICMPV6_ROUTER_PREF_INVALID)
439                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
440
441         lifetime = htonl(rinfo->lifetime);
442         if (lifetime == 0xffffffff) {
443                 /* infinity */
444         } else if (lifetime > 0x7fffffff/HZ) {
445                 /* Avoid arithmetic overflow */
446                 lifetime = 0x7fffffff/HZ - 1;
447         }
448
449         if (rinfo->length == 3)
450                 prefix = (struct in6_addr *)rinfo->prefix;
451         else {
452                 /* this function is safe */
453                 ipv6_addr_prefix(&prefix_buf,
454                                  (struct in6_addr *)rinfo->prefix,
455                                  rinfo->prefix_len);
456                 prefix = &prefix_buf;
457         }
458
459         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
460
461         if (rt && !lifetime) {
462                 ip6_del_rt(rt);
463                 rt = NULL;
464         }
465
466         if (!rt && lifetime)
467                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
468                                         pref);
469         else if (rt)
470                 rt->rt6i_flags = RTF_ROUTEINFO |
471                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
472
473         if (rt) {
474                 if (lifetime == 0xffffffff) {
475                         rt->rt6i_flags &= ~RTF_EXPIRES;
476                 } else {
477                         rt->rt6i_expires = jiffies + HZ * lifetime;
478                         rt->rt6i_flags |= RTF_EXPIRES;
479                 }
480                 dst_release(&rt->u.dst);
481         }
482         return 0;
483 }
484 #endif
485
486 #define BACKTRACK(saddr) \
487 do { \
488         if (rt == &ip6_null_entry) { \
489                 struct fib6_node *pn; \
490                 while (1) { \
491                         if (fn->fn_flags & RTN_TL_ROOT) \
492                                 goto out; \
493                         pn = fn->parent; \
494                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
495                                 fn = fib6_lookup(pn->subtree, NULL, saddr); \
496                         else \
497                                 fn = pn; \
498                         if (fn->fn_flags & RTN_RTINFO) \
499                                 goto restart; \
500                 } \
501         } \
502 } while(0)
503
504 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
505                                              struct flowi *fl, int flags)
506 {
507         struct fib6_node *fn;
508         struct rt6_info *rt;
509
510         read_lock_bh(&table->tb6_lock);
511         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
512 restart:
513         rt = fn->leaf;
514         rt = rt6_device_match(rt, fl->oif, flags);
515         BACKTRACK(&fl->fl6_src);
516 out:
517         dst_hold(&rt->u.dst);
518         read_unlock_bh(&table->tb6_lock);
519
520         rt->u.dst.lastuse = jiffies;
521         rt->u.dst.__use++;
522
523         return rt;
524
525 }
526
527 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
528                             int oif, int strict)
529 {
530         struct flowi fl = {
531                 .oif = oif,
532                 .nl_u = {
533                         .ip6_u = {
534                                 .daddr = *daddr,
535                         },
536                 },
537         };
538         struct dst_entry *dst;
539         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
540
541         if (saddr) {
542                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
543                 flags |= RT6_LOOKUP_F_HAS_SADDR;
544         }
545
546         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
547         if (dst->error == 0)
548                 return (struct rt6_info *) dst;
549
550         dst_release(dst);
551
552         return NULL;
553 }
554
555 /* ip6_ins_rt is called with FREE table->tb6_lock.
556    It takes new route entry, the addition fails by any reason the
557    route is freed. In any case, if caller does not hold it, it may
558    be destroyed.
559  */
560
561 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
562 {
563         int err;
564         struct fib6_table *table;
565
566         table = rt->rt6i_table;
567         write_lock_bh(&table->tb6_lock);
568         err = fib6_add(&table->tb6_root, rt, info);
569         write_unlock_bh(&table->tb6_lock);
570
571         return err;
572 }
573
574 int ip6_ins_rt(struct rt6_info *rt)
575 {
576         return __ip6_ins_rt(rt, NULL);
577 }
578
579 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
580                                       struct in6_addr *saddr)
581 {
582         struct rt6_info *rt;
583
584         /*
585          *      Clone the route.
586          */
587
588         rt = ip6_rt_copy(ort);
589
590         if (rt) {
591                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
592                         if (rt->rt6i_dst.plen != 128 &&
593                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
594                                 rt->rt6i_flags |= RTF_ANYCAST;
595                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
596                 }
597
598                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
599                 rt->rt6i_dst.plen = 128;
600                 rt->rt6i_flags |= RTF_CACHE;
601                 rt->u.dst.flags |= DST_HOST;
602
603 #ifdef CONFIG_IPV6_SUBTREES
604                 if (rt->rt6i_src.plen && saddr) {
605                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
606                         rt->rt6i_src.plen = 128;
607                 }
608 #endif
609
610                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
611
612         }
613
614         return rt;
615 }
616
617 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
618 {
619         struct rt6_info *rt = ip6_rt_copy(ort);
620         if (rt) {
621                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
622                 rt->rt6i_dst.plen = 128;
623                 rt->rt6i_flags |= RTF_CACHE;
624                 rt->u.dst.flags |= DST_HOST;
625                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
626         }
627         return rt;
628 }
629
630 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
631                                             struct flowi *fl, int flags)
632 {
633         struct fib6_node *fn;
634         struct rt6_info *rt, *nrt;
635         int strict = 0;
636         int attempts = 3;
637         int err;
638         int reachable = RT6_LOOKUP_F_REACHABLE;
639
640         strict |= flags & RT6_LOOKUP_F_IFACE;
641
642 relookup:
643         read_lock_bh(&table->tb6_lock);
644
645 restart_2:
646         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
647
648 restart:
649         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
650         BACKTRACK(&fl->fl6_src);
651         if (rt == &ip6_null_entry ||
652             rt->rt6i_flags & RTF_CACHE)
653                 goto out;
654
655         dst_hold(&rt->u.dst);
656         read_unlock_bh(&table->tb6_lock);
657
658         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
659                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
660         else {
661 #if CLONE_OFFLINK_ROUTE
662                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
663 #else
664                 goto out2;
665 #endif
666         }
667
668         dst_release(&rt->u.dst);
669         rt = nrt ? : &ip6_null_entry;
670
671         dst_hold(&rt->u.dst);
672         if (nrt) {
673                 err = ip6_ins_rt(nrt);
674                 if (!err)
675                         goto out2;
676         }
677
678         if (--attempts <= 0)
679                 goto out2;
680
681         /*
682          * Race condition! In the gap, when table->tb6_lock was
683          * released someone could insert this route.  Relookup.
684          */
685         dst_release(&rt->u.dst);
686         goto relookup;
687
688 out:
689         if (reachable) {
690                 reachable = 0;
691                 goto restart_2;
692         }
693         dst_hold(&rt->u.dst);
694         read_unlock_bh(&table->tb6_lock);
695 out2:
696         rt->u.dst.lastuse = jiffies;
697         rt->u.dst.__use++;
698
699         return rt;
700 }
701
702 void ip6_route_input(struct sk_buff *skb)
703 {
704         struct ipv6hdr *iph = skb->nh.ipv6h;
705         int flags = RT6_LOOKUP_F_HAS_SADDR;
706         struct flowi fl = {
707                 .iif = skb->dev->ifindex,
708                 .nl_u = {
709                         .ip6_u = {
710                                 .daddr = iph->daddr,
711                                 .saddr = iph->saddr,
712 #ifdef CONFIG_IPV6_ROUTE_FWMARK
713                                 .fwmark = skb->nfmark,
714 #endif
715                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
716                         },
717                 },
718                 .proto = iph->nexthdr,
719         };
720
721         if (rt6_need_strict(&iph->daddr))
722                 flags |= RT6_LOOKUP_F_IFACE;
723
724         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
725 }
726
727 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
728                                              struct flowi *fl, int flags)
729 {
730         struct fib6_node *fn;
731         struct rt6_info *rt, *nrt;
732         int strict = 0;
733         int attempts = 3;
734         int err;
735         int reachable = RT6_LOOKUP_F_REACHABLE;
736
737         strict |= flags & RT6_LOOKUP_F_IFACE;
738
739 relookup:
740         read_lock_bh(&table->tb6_lock);
741
742 restart_2:
743         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
744
745 restart:
746         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
747         BACKTRACK(&fl->fl6_src);
748         if (rt == &ip6_null_entry ||
749             rt->rt6i_flags & RTF_CACHE)
750                 goto out;
751
752         dst_hold(&rt->u.dst);
753         read_unlock_bh(&table->tb6_lock);
754
755         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
756                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
757         else {
758 #if CLONE_OFFLINK_ROUTE
759                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
760 #else
761                 goto out2;
762 #endif
763         }
764
765         dst_release(&rt->u.dst);
766         rt = nrt ? : &ip6_null_entry;
767
768         dst_hold(&rt->u.dst);
769         if (nrt) {
770                 err = ip6_ins_rt(nrt);
771                 if (!err)
772                         goto out2;
773         }
774
775         if (--attempts <= 0)
776                 goto out2;
777
778         /*
779          * Race condition! In the gap, when table->tb6_lock was
780          * released someone could insert this route.  Relookup.
781          */
782         dst_release(&rt->u.dst);
783         goto relookup;
784
785 out:
786         if (reachable) {
787                 reachable = 0;
788                 goto restart_2;
789         }
790         dst_hold(&rt->u.dst);
791         read_unlock_bh(&table->tb6_lock);
792 out2:
793         rt->u.dst.lastuse = jiffies;
794         rt->u.dst.__use++;
795         return rt;
796 }
797
798 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
799 {
800         int flags = 0;
801
802         if (rt6_need_strict(&fl->fl6_dst))
803                 flags |= RT6_LOOKUP_F_IFACE;
804
805         if (!ipv6_addr_any(&fl->fl6_src))
806                 flags |= RT6_LOOKUP_F_HAS_SADDR;
807
808         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
809 }
810
811
812 /*
813  *      Destination cache support functions
814  */
815
816 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
817 {
818         struct rt6_info *rt;
819
820         rt = (struct rt6_info *) dst;
821
822         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
823                 return dst;
824
825         return NULL;
826 }
827
828 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
829 {
830         struct rt6_info *rt = (struct rt6_info *) dst;
831
832         if (rt) {
833                 if (rt->rt6i_flags & RTF_CACHE)
834                         ip6_del_rt(rt);
835                 else
836                         dst_release(dst);
837         }
838         return NULL;
839 }
840
841 static void ip6_link_failure(struct sk_buff *skb)
842 {
843         struct rt6_info *rt;
844
845         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
846
847         rt = (struct rt6_info *) skb->dst;
848         if (rt) {
849                 if (rt->rt6i_flags&RTF_CACHE) {
850                         dst_set_expires(&rt->u.dst, 0);
851                         rt->rt6i_flags |= RTF_EXPIRES;
852                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
853                         rt->rt6i_node->fn_sernum = -1;
854         }
855 }
856
857 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
858 {
859         struct rt6_info *rt6 = (struct rt6_info*)dst;
860
861         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
862                 rt6->rt6i_flags |= RTF_MODIFIED;
863                 if (mtu < IPV6_MIN_MTU) {
864                         mtu = IPV6_MIN_MTU;
865                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
866                 }
867                 dst->metrics[RTAX_MTU-1] = mtu;
868                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
869         }
870 }
871
872 static int ipv6_get_mtu(struct net_device *dev);
873
874 static inline unsigned int ipv6_advmss(unsigned int mtu)
875 {
876         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
877
878         if (mtu < ip6_rt_min_advmss)
879                 mtu = ip6_rt_min_advmss;
880
881         /*
882          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
883          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
884          * IPV6_MAXPLEN is also valid and means: "any MSS, 
885          * rely only on pmtu discovery"
886          */
887         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
888                 mtu = IPV6_MAXPLEN;
889         return mtu;
890 }
891
892 static struct dst_entry *ndisc_dst_gc_list;
893 static DEFINE_SPINLOCK(ndisc_lock);
894
895 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
896                                   struct neighbour *neigh,
897                                   struct in6_addr *addr,
898                                   int (*output)(struct sk_buff *))
899 {
900         struct rt6_info *rt;
901         struct inet6_dev *idev = in6_dev_get(dev);
902
903         if (unlikely(idev == NULL))
904                 return NULL;
905
906         rt = ip6_dst_alloc();
907         if (unlikely(rt == NULL)) {
908                 in6_dev_put(idev);
909                 goto out;
910         }
911
912         dev_hold(dev);
913         if (neigh)
914                 neigh_hold(neigh);
915         else
916                 neigh = ndisc_get_neigh(dev, addr);
917
918         rt->rt6i_dev      = dev;
919         rt->rt6i_idev     = idev;
920         rt->rt6i_nexthop  = neigh;
921         atomic_set(&rt->u.dst.__refcnt, 1);
922         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
923         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
924         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
925         rt->u.dst.output  = output;
926
927 #if 0   /* there's no chance to use these for ndisc */
928         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
929                                 ? DST_HOST 
930                                 : 0;
931         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
932         rt->rt6i_dst.plen = 128;
933 #endif
934
935         spin_lock_bh(&ndisc_lock);
936         rt->u.dst.next = ndisc_dst_gc_list;
937         ndisc_dst_gc_list = &rt->u.dst;
938         spin_unlock_bh(&ndisc_lock);
939
940         fib6_force_start_gc();
941
942 out:
943         return (struct dst_entry *)rt;
944 }
945
946 int ndisc_dst_gc(int *more)
947 {
948         struct dst_entry *dst, *next, **pprev;
949         int freed;
950
951         next = NULL;
952         freed = 0;
953
954         spin_lock_bh(&ndisc_lock);
955         pprev = &ndisc_dst_gc_list;
956
957         while ((dst = *pprev) != NULL) {
958                 if (!atomic_read(&dst->__refcnt)) {
959                         *pprev = dst->next;
960                         dst_free(dst);
961                         freed++;
962                 } else {
963                         pprev = &dst->next;
964                         (*more)++;
965                 }
966         }
967
968         spin_unlock_bh(&ndisc_lock);
969
970         return freed;
971 }
972
973 static int ip6_dst_gc(void)
974 {
975         static unsigned expire = 30*HZ;
976         static unsigned long last_gc;
977         unsigned long now = jiffies;
978
979         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
980             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
981                 goto out;
982
983         expire++;
984         fib6_run_gc(expire);
985         last_gc = now;
986         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
987                 expire = ip6_rt_gc_timeout>>1;
988
989 out:
990         expire -= expire>>ip6_rt_gc_elasticity;
991         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
992 }
993
994 /* Clean host part of a prefix. Not necessary in radix tree,
995    but results in cleaner routing tables.
996
997    Remove it only when all the things will work!
998  */
999
1000 static int ipv6_get_mtu(struct net_device *dev)
1001 {
1002         int mtu = IPV6_MIN_MTU;
1003         struct inet6_dev *idev;
1004
1005         idev = in6_dev_get(dev);
1006         if (idev) {
1007                 mtu = idev->cnf.mtu6;
1008                 in6_dev_put(idev);
1009         }
1010         return mtu;
1011 }
1012
1013 int ipv6_get_hoplimit(struct net_device *dev)
1014 {
1015         int hoplimit = ipv6_devconf.hop_limit;
1016         struct inet6_dev *idev;
1017
1018         idev = in6_dev_get(dev);
1019         if (idev) {
1020                 hoplimit = idev->cnf.hop_limit;
1021                 in6_dev_put(idev);
1022         }
1023         return hoplimit;
1024 }
1025
1026 /*
1027  *
1028  */
1029
1030 int ip6_route_add(struct fib6_config *cfg)
1031 {
1032         int err;
1033         struct rt6_info *rt = NULL;
1034         struct net_device *dev = NULL;
1035         struct inet6_dev *idev = NULL;
1036         struct fib6_table *table;
1037         int addr_type;
1038
1039         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1040                 return -EINVAL;
1041 #ifndef CONFIG_IPV6_SUBTREES
1042         if (cfg->fc_src_len)
1043                 return -EINVAL;
1044 #endif
1045         if (cfg->fc_ifindex) {
1046                 err = -ENODEV;
1047                 dev = dev_get_by_index(cfg->fc_ifindex);
1048                 if (!dev)
1049                         goto out;
1050                 idev = in6_dev_get(dev);
1051                 if (!idev)
1052                         goto out;
1053         }
1054
1055         if (cfg->fc_metric == 0)
1056                 cfg->fc_metric = IP6_RT_PRIO_USER;
1057
1058         table = fib6_new_table(cfg->fc_table);
1059         if (table == NULL) {
1060                 err = -ENOBUFS;
1061                 goto out;
1062         }
1063
1064         rt = ip6_dst_alloc();
1065
1066         if (rt == NULL) {
1067                 err = -ENOMEM;
1068                 goto out;
1069         }
1070
1071         rt->u.dst.obsolete = -1;
1072         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1073
1074         if (cfg->fc_protocol == RTPROT_UNSPEC)
1075                 cfg->fc_protocol = RTPROT_BOOT;
1076         rt->rt6i_protocol = cfg->fc_protocol;
1077
1078         addr_type = ipv6_addr_type(&cfg->fc_dst);
1079
1080         if (addr_type & IPV6_ADDR_MULTICAST)
1081                 rt->u.dst.input = ip6_mc_input;
1082         else
1083                 rt->u.dst.input = ip6_forward;
1084
1085         rt->u.dst.output = ip6_output;
1086
1087         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1088         rt->rt6i_dst.plen = cfg->fc_dst_len;
1089         if (rt->rt6i_dst.plen == 128)
1090                rt->u.dst.flags = DST_HOST;
1091
1092 #ifdef CONFIG_IPV6_SUBTREES
1093         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1094         rt->rt6i_src.plen = cfg->fc_src_len;
1095 #endif
1096
1097         rt->rt6i_metric = cfg->fc_metric;
1098
1099         /* We cannot add true routes via loopback here,
1100            they would result in kernel looping; promote them to reject routes
1101          */
1102         if ((cfg->fc_flags & RTF_REJECT) ||
1103             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1104                 /* hold loopback dev/idev if we haven't done so. */
1105                 if (dev != &loopback_dev) {
1106                         if (dev) {
1107                                 dev_put(dev);
1108                                 in6_dev_put(idev);
1109                         }
1110                         dev = &loopback_dev;
1111                         dev_hold(dev);
1112                         idev = in6_dev_get(dev);
1113                         if (!idev) {
1114                                 err = -ENODEV;
1115                                 goto out;
1116                         }
1117                 }
1118                 rt->u.dst.output = ip6_pkt_discard_out;
1119                 rt->u.dst.input = ip6_pkt_discard;
1120                 rt->u.dst.error = -ENETUNREACH;
1121                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1122                 goto install_route;
1123         }
1124
1125         if (cfg->fc_flags & RTF_GATEWAY) {
1126                 struct in6_addr *gw_addr;
1127                 int gwa_type;
1128
1129                 gw_addr = &cfg->fc_gateway;
1130                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1131                 gwa_type = ipv6_addr_type(gw_addr);
1132
1133                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1134                         struct rt6_info *grt;
1135
1136                         /* IPv6 strictly inhibits using not link-local
1137                            addresses as nexthop address.
1138                            Otherwise, router will not able to send redirects.
1139                            It is very good, but in some (rare!) circumstances
1140                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1141                            some exceptions. --ANK
1142                          */
1143                         err = -EINVAL;
1144                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1145                                 goto out;
1146
1147                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1148
1149                         err = -EHOSTUNREACH;
1150                         if (grt == NULL)
1151                                 goto out;
1152                         if (dev) {
1153                                 if (dev != grt->rt6i_dev) {
1154                                         dst_release(&grt->u.dst);
1155                                         goto out;
1156                                 }
1157                         } else {
1158                                 dev = grt->rt6i_dev;
1159                                 idev = grt->rt6i_idev;
1160                                 dev_hold(dev);
1161                                 in6_dev_hold(grt->rt6i_idev);
1162                         }
1163                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1164                                 err = 0;
1165                         dst_release(&grt->u.dst);
1166
1167                         if (err)
1168                                 goto out;
1169                 }
1170                 err = -EINVAL;
1171                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1172                         goto out;
1173         }
1174
1175         err = -ENODEV;
1176         if (dev == NULL)
1177                 goto out;
1178
1179         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1180                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1181                 if (IS_ERR(rt->rt6i_nexthop)) {
1182                         err = PTR_ERR(rt->rt6i_nexthop);
1183                         rt->rt6i_nexthop = NULL;
1184                         goto out;
1185                 }
1186         }
1187
1188         rt->rt6i_flags = cfg->fc_flags;
1189
1190 install_route:
1191         if (cfg->fc_mx) {
1192                 struct nlattr *nla;
1193                 int remaining;
1194
1195                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1196                         int type = nla->nla_type;
1197
1198                         if (type) {
1199                                 if (type > RTAX_MAX) {
1200                                         err = -EINVAL;
1201                                         goto out;
1202                                 }
1203
1204                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1205                         }
1206                 }
1207         }
1208
1209         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1210                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1211         if (!rt->u.dst.metrics[RTAX_MTU-1])
1212                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1213         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1214                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1215         rt->u.dst.dev = dev;
1216         rt->rt6i_idev = idev;
1217         rt->rt6i_table = table;
1218         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1219
1220 out:
1221         if (dev)
1222                 dev_put(dev);
1223         if (idev)
1224                 in6_dev_put(idev);
1225         if (rt)
1226                 dst_free((struct dst_entry *) rt);
1227         return err;
1228 }
1229
1230 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1231 {
1232         int err;
1233         struct fib6_table *table;
1234
1235         if (rt == &ip6_null_entry)
1236                 return -ENOENT;
1237
1238         table = rt->rt6i_table;
1239         write_lock_bh(&table->tb6_lock);
1240
1241         err = fib6_del(rt, info);
1242         dst_release(&rt->u.dst);
1243
1244         write_unlock_bh(&table->tb6_lock);
1245
1246         return err;
1247 }
1248
1249 int ip6_del_rt(struct rt6_info *rt)
1250 {
1251         return __ip6_del_rt(rt, NULL);
1252 }
1253
1254 static int ip6_route_del(struct fib6_config *cfg)
1255 {
1256         struct fib6_table *table;
1257         struct fib6_node *fn;
1258         struct rt6_info *rt;
1259         int err = -ESRCH;
1260
1261         table = fib6_get_table(cfg->fc_table);
1262         if (table == NULL)
1263                 return err;
1264
1265         read_lock_bh(&table->tb6_lock);
1266
1267         fn = fib6_locate(&table->tb6_root,
1268                          &cfg->fc_dst, cfg->fc_dst_len,
1269                          &cfg->fc_src, cfg->fc_src_len);
1270         
1271         if (fn) {
1272                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1273                         if (cfg->fc_ifindex &&
1274                             (rt->rt6i_dev == NULL ||
1275                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1276                                 continue;
1277                         if (cfg->fc_flags & RTF_GATEWAY &&
1278                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1279                                 continue;
1280                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1281                                 continue;
1282                         dst_hold(&rt->u.dst);
1283                         read_unlock_bh(&table->tb6_lock);
1284
1285                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1286                 }
1287         }
1288         read_unlock_bh(&table->tb6_lock);
1289
1290         return err;
1291 }
1292
1293 /*
1294  *      Handle redirects
1295  */
1296 struct ip6rd_flowi {
1297         struct flowi fl;
1298         struct in6_addr gateway;
1299 };
1300
1301 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1302                                              struct flowi *fl,
1303                                              int flags)
1304 {
1305         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1306         struct rt6_info *rt;
1307         struct fib6_node *fn;
1308
1309         /*
1310          * Get the "current" route for this destination and
1311          * check if the redirect has come from approriate router.
1312          *
1313          * RFC 2461 specifies that redirects should only be
1314          * accepted if they come from the nexthop to the target.
1315          * Due to the way the routes are chosen, this notion
1316          * is a bit fuzzy and one might need to check all possible
1317          * routes.
1318          */
1319
1320         read_lock_bh(&table->tb6_lock);
1321         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1322 restart:
1323         for (rt = fn->leaf; rt; rt = rt->u.next) {
1324                 /*
1325                  * Current route is on-link; redirect is always invalid.
1326                  *
1327                  * Seems, previous statement is not true. It could
1328                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1329                  * But then router serving it might decide, that we should
1330                  * know truth 8)8) --ANK (980726).
1331                  */
1332                 if (rt6_check_expired(rt))
1333                         continue;
1334                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1335                         continue;
1336                 if (fl->oif != rt->rt6i_dev->ifindex)
1337                         continue;
1338                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1339                         continue;
1340                 break;
1341         }
1342
1343         if (!rt)
1344                 rt = &ip6_null_entry;
1345         BACKTRACK(&fl->fl6_src);
1346 out:
1347         dst_hold(&rt->u.dst);
1348
1349         read_unlock_bh(&table->tb6_lock);
1350
1351         return rt;
1352 };
1353
1354 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1355                                            struct in6_addr *src,
1356                                            struct in6_addr *gateway,
1357                                            struct net_device *dev)
1358 {
1359         int flags = RT6_LOOKUP_F_HAS_SADDR;
1360         struct ip6rd_flowi rdfl = {
1361                 .fl = {
1362                         .oif = dev->ifindex,
1363                         .nl_u = {
1364                                 .ip6_u = {
1365                                         .daddr = *dest,
1366                                         .saddr = *src,
1367                                 },
1368                         },
1369                 },
1370                 .gateway = *gateway,
1371         };
1372
1373         if (rt6_need_strict(dest))
1374                 flags |= RT6_LOOKUP_F_IFACE;
1375
1376         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1377 }
1378
1379 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1380                   struct in6_addr *saddr,
1381                   struct neighbour *neigh, u8 *lladdr, int on_link)
1382 {
1383         struct rt6_info *rt, *nrt = NULL;
1384         struct netevent_redirect netevent;
1385
1386         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1387
1388         if (rt == &ip6_null_entry) {
1389                 if (net_ratelimit())
1390                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1391                                "for redirect target\n");
1392                 goto out;
1393         }
1394
1395         /*
1396          *      We have finally decided to accept it.
1397          */
1398
1399         neigh_update(neigh, lladdr, NUD_STALE, 
1400                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1401                      NEIGH_UPDATE_F_OVERRIDE|
1402                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1403                                      NEIGH_UPDATE_F_ISROUTER))
1404                      );
1405
1406         /*
1407          * Redirect received -> path was valid.
1408          * Look, redirects are sent only in response to data packets,
1409          * so that this nexthop apparently is reachable. --ANK
1410          */
1411         dst_confirm(&rt->u.dst);
1412
1413         /* Duplicate redirect: silently ignore. */
1414         if (neigh == rt->u.dst.neighbour)
1415                 goto out;
1416
1417         nrt = ip6_rt_copy(rt);
1418         if (nrt == NULL)
1419                 goto out;
1420
1421         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1422         if (on_link)
1423                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1424
1425         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1426         nrt->rt6i_dst.plen = 128;
1427         nrt->u.dst.flags |= DST_HOST;
1428
1429         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1430         nrt->rt6i_nexthop = neigh_clone(neigh);
1431         /* Reset pmtu, it may be better */
1432         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1433         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1434
1435         if (ip6_ins_rt(nrt))
1436                 goto out;
1437
1438         netevent.old = &rt->u.dst;
1439         netevent.new = &nrt->u.dst;
1440         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1441
1442         if (rt->rt6i_flags&RTF_CACHE) {
1443                 ip6_del_rt(rt);
1444                 return;
1445         }
1446
1447 out:
1448         dst_release(&rt->u.dst);
1449         return;
1450 }
1451
1452 /*
1453  *      Handle ICMP "packet too big" messages
1454  *      i.e. Path MTU discovery
1455  */
1456
1457 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1458                         struct net_device *dev, u32 pmtu)
1459 {
1460         struct rt6_info *rt, *nrt;
1461         int allfrag = 0;
1462
1463         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1464         if (rt == NULL)
1465                 return;
1466
1467         if (pmtu >= dst_mtu(&rt->u.dst))
1468                 goto out;
1469
1470         if (pmtu < IPV6_MIN_MTU) {
1471                 /*
1472                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1473                  * MTU (1280) and a fragment header should always be included
1474                  * after a node receiving Too Big message reporting PMTU is
1475                  * less than the IPv6 Minimum Link MTU.
1476                  */
1477                 pmtu = IPV6_MIN_MTU;
1478                 allfrag = 1;
1479         }
1480
1481         /* New mtu received -> path was valid.
1482            They are sent only in response to data packets,
1483            so that this nexthop apparently is reachable. --ANK
1484          */
1485         dst_confirm(&rt->u.dst);
1486
1487         /* Host route. If it is static, it would be better
1488            not to override it, but add new one, so that
1489            when cache entry will expire old pmtu
1490            would return automatically.
1491          */
1492         if (rt->rt6i_flags & RTF_CACHE) {
1493                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1494                 if (allfrag)
1495                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1496                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1497                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1498                 goto out;
1499         }
1500
1501         /* Network route.
1502            Two cases are possible:
1503            1. It is connected route. Action: COW
1504            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1505          */
1506         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1507                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1508         else
1509                 nrt = rt6_alloc_clone(rt, daddr);
1510
1511         if (nrt) {
1512                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1513                 if (allfrag)
1514                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1515
1516                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1517                  * happened within 5 mins, the recommended timer is 10 mins.
1518                  * Here this route expiration time is set to ip6_rt_mtu_expires
1519                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1520                  * and detecting PMTU increase will be automatically happened.
1521                  */
1522                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1523                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1524
1525                 ip6_ins_rt(nrt);
1526         }
1527 out:
1528         dst_release(&rt->u.dst);
1529 }
1530
1531 /*
1532  *      Misc support functions
1533  */
1534
1535 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1536 {
1537         struct rt6_info *rt = ip6_dst_alloc();
1538
1539         if (rt) {
1540                 rt->u.dst.input = ort->u.dst.input;
1541                 rt->u.dst.output = ort->u.dst.output;
1542
1543                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1544                 rt->u.dst.error = ort->u.dst.error;
1545                 rt->u.dst.dev = ort->u.dst.dev;
1546                 if (rt->u.dst.dev)
1547                         dev_hold(rt->u.dst.dev);
1548                 rt->rt6i_idev = ort->rt6i_idev;
1549                 if (rt->rt6i_idev)
1550                         in6_dev_hold(rt->rt6i_idev);
1551                 rt->u.dst.lastuse = jiffies;
1552                 rt->rt6i_expires = 0;
1553
1554                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1555                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1556                 rt->rt6i_metric = 0;
1557
1558                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1559 #ifdef CONFIG_IPV6_SUBTREES
1560                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1561 #endif
1562                 rt->rt6i_table = ort->rt6i_table;
1563         }
1564         return rt;
1565 }
1566
1567 #ifdef CONFIG_IPV6_ROUTE_INFO
1568 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1569                                            struct in6_addr *gwaddr, int ifindex)
1570 {
1571         struct fib6_node *fn;
1572         struct rt6_info *rt = NULL;
1573         struct fib6_table *table;
1574
1575         table = fib6_get_table(RT6_TABLE_INFO);
1576         if (table == NULL)
1577                 return NULL;
1578
1579         write_lock_bh(&table->tb6_lock);
1580         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1581         if (!fn)
1582                 goto out;
1583
1584         for (rt = fn->leaf; rt; rt = rt->u.next) {
1585                 if (rt->rt6i_dev->ifindex != ifindex)
1586                         continue;
1587                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1588                         continue;
1589                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1590                         continue;
1591                 dst_hold(&rt->u.dst);
1592                 break;
1593         }
1594 out:
1595         write_unlock_bh(&table->tb6_lock);
1596         return rt;
1597 }
1598
1599 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1600                                            struct in6_addr *gwaddr, int ifindex,
1601                                            unsigned pref)
1602 {
1603         struct fib6_config cfg = {
1604                 .fc_table       = RT6_TABLE_INFO,
1605                 .fc_metric      = 1024,
1606                 .fc_ifindex     = ifindex,
1607                 .fc_dst_len     = prefixlen,
1608                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1609                                   RTF_UP | RTF_PREF(pref),
1610         };
1611
1612         ipv6_addr_copy(&cfg.fc_dst, prefix);
1613         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1614
1615         /* We should treat it as a default route if prefix length is 0. */
1616         if (!prefixlen)
1617                 cfg.fc_flags |= RTF_DEFAULT;
1618
1619         ip6_route_add(&cfg);
1620
1621         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1622 }
1623 #endif
1624
1625 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1626 {       
1627         struct rt6_info *rt;
1628         struct fib6_table *table;
1629
1630         table = fib6_get_table(RT6_TABLE_DFLT);
1631         if (table == NULL)
1632                 return NULL;
1633
1634         write_lock_bh(&table->tb6_lock);
1635         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1636                 if (dev == rt->rt6i_dev &&
1637                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1638                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1639                         break;
1640         }
1641         if (rt)
1642                 dst_hold(&rt->u.dst);
1643         write_unlock_bh(&table->tb6_lock);
1644         return rt;
1645 }
1646
1647 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1648                                      struct net_device *dev,
1649                                      unsigned int pref)
1650 {
1651         struct fib6_config cfg = {
1652                 .fc_table       = RT6_TABLE_DFLT,
1653                 .fc_metric      = 1024,
1654                 .fc_ifindex     = dev->ifindex,
1655                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1656                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1657         };
1658
1659         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1660
1661         ip6_route_add(&cfg);
1662
1663         return rt6_get_dflt_router(gwaddr, dev);
1664 }
1665
1666 void rt6_purge_dflt_routers(void)
1667 {
1668         struct rt6_info *rt;
1669         struct fib6_table *table;
1670
1671         /* NOTE: Keep consistent with rt6_get_dflt_router */
1672         table = fib6_get_table(RT6_TABLE_DFLT);
1673         if (table == NULL)
1674                 return;
1675
1676 restart:
1677         read_lock_bh(&table->tb6_lock);
1678         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1679                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1680                         dst_hold(&rt->u.dst);
1681                         read_unlock_bh(&table->tb6_lock);
1682                         ip6_del_rt(rt);
1683                         goto restart;
1684                 }
1685         }
1686         read_unlock_bh(&table->tb6_lock);
1687 }
1688
1689 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1690                                  struct fib6_config *cfg)
1691 {
1692         memset(cfg, 0, sizeof(*cfg));
1693
1694         cfg->fc_table = RT6_TABLE_MAIN;
1695         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1696         cfg->fc_metric = rtmsg->rtmsg_metric;
1697         cfg->fc_expires = rtmsg->rtmsg_info;
1698         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1699         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1700         cfg->fc_flags = rtmsg->rtmsg_flags;
1701
1702         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1703         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1704         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1705 }
1706
1707 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1708 {
1709         struct fib6_config cfg;
1710         struct in6_rtmsg rtmsg;
1711         int err;
1712
1713         switch(cmd) {
1714         case SIOCADDRT:         /* Add a route */
1715         case SIOCDELRT:         /* Delete a route */
1716                 if (!capable(CAP_NET_ADMIN))
1717                         return -EPERM;
1718                 err = copy_from_user(&rtmsg, arg,
1719                                      sizeof(struct in6_rtmsg));
1720                 if (err)
1721                         return -EFAULT;
1722
1723                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1724
1725                 rtnl_lock();
1726                 switch (cmd) {
1727                 case SIOCADDRT:
1728                         err = ip6_route_add(&cfg);
1729                         break;
1730                 case SIOCDELRT:
1731                         err = ip6_route_del(&cfg);
1732                         break;
1733                 default:
1734                         err = -EINVAL;
1735                 }
1736                 rtnl_unlock();
1737
1738                 return err;
1739         };
1740
1741         return -EINVAL;
1742 }
1743
1744 /*
1745  *      Drop the packet on the floor
1746  */
1747
1748 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1749 {
1750         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1751         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1752                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1753
1754         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1755         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1756         kfree_skb(skb);
1757         return 0;
1758 }
1759
1760 static int ip6_pkt_discard(struct sk_buff *skb)
1761 {
1762         return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1763 }
1764
1765 static int ip6_pkt_discard_out(struct sk_buff *skb)
1766 {
1767         skb->dev = skb->dst->dev;
1768         return ip6_pkt_discard(skb);
1769 }
1770
1771 static int ip6_pkt_prohibit(struct sk_buff *skb)
1772 {
1773         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1774 }
1775
1776 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1777 {
1778         skb->dev = skb->dst->dev;
1779         return ip6_pkt_prohibit(skb);
1780 }
1781
1782 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1783 {
1784         kfree_skb(skb);
1785         return 0;
1786 }
1787
1788 /*
1789  *      Allocate a dst for local (unicast / anycast) address.
1790  */
1791
1792 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1793                                     const struct in6_addr *addr,
1794                                     int anycast)
1795 {
1796         struct rt6_info *rt = ip6_dst_alloc();
1797
1798         if (rt == NULL)
1799                 return ERR_PTR(-ENOMEM);
1800
1801         dev_hold(&loopback_dev);
1802         in6_dev_hold(idev);
1803
1804         rt->u.dst.flags = DST_HOST;
1805         rt->u.dst.input = ip6_input;
1806         rt->u.dst.output = ip6_output;
1807         rt->rt6i_dev = &loopback_dev;
1808         rt->rt6i_idev = idev;
1809         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1810         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1811         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1812         rt->u.dst.obsolete = -1;
1813
1814         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1815         if (anycast)
1816                 rt->rt6i_flags |= RTF_ANYCAST;
1817         else
1818                 rt->rt6i_flags |= RTF_LOCAL;
1819         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1820         if (rt->rt6i_nexthop == NULL) {
1821                 dst_free((struct dst_entry *) rt);
1822                 return ERR_PTR(-ENOMEM);
1823         }
1824
1825         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1826         rt->rt6i_dst.plen = 128;
1827         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1828
1829         atomic_set(&rt->u.dst.__refcnt, 1);
1830
1831         return rt;
1832 }
1833
1834 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1835 {
1836         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1837             rt != &ip6_null_entry) {
1838                 RT6_TRACE("deleted by ifdown %p\n", rt);
1839                 return -1;
1840         }
1841         return 0;
1842 }
1843
1844 void rt6_ifdown(struct net_device *dev)
1845 {
1846         fib6_clean_all(fib6_ifdown, 0, dev);
1847 }
1848
1849 struct rt6_mtu_change_arg
1850 {
1851         struct net_device *dev;
1852         unsigned mtu;
1853 };
1854
1855 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1856 {
1857         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1858         struct inet6_dev *idev;
1859
1860         /* In IPv6 pmtu discovery is not optional,
1861            so that RTAX_MTU lock cannot disable it.
1862            We still use this lock to block changes
1863            caused by addrconf/ndisc.
1864         */
1865
1866         idev = __in6_dev_get(arg->dev);
1867         if (idev == NULL)
1868                 return 0;
1869
1870         /* For administrative MTU increase, there is no way to discover
1871            IPv6 PMTU increase, so PMTU increase should be updated here.
1872            Since RFC 1981 doesn't include administrative MTU increase
1873            update PMTU increase is a MUST. (i.e. jumbo frame)
1874          */
1875         /*
1876            If new MTU is less than route PMTU, this new MTU will be the
1877            lowest MTU in the path, update the route PMTU to reflect PMTU
1878            decreases; if new MTU is greater than route PMTU, and the
1879            old MTU is the lowest MTU in the path, update the route PMTU
1880            to reflect the increase. In this case if the other nodes' MTU
1881            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1882            PMTU discouvery.
1883          */
1884         if (rt->rt6i_dev == arg->dev &&
1885             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1886             (dst_mtu(&rt->u.dst) > arg->mtu ||
1887              (dst_mtu(&rt->u.dst) < arg->mtu &&
1888               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1889                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1890         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1891         return 0;
1892 }
1893
1894 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1895 {
1896         struct rt6_mtu_change_arg arg = {
1897                 .dev = dev,
1898                 .mtu = mtu,
1899         };
1900
1901         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1902 }
1903
1904 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1905         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1906         [RTA_OIF]               = { .type = NLA_U32 },
1907         [RTA_IIF]               = { .type = NLA_U32 },
1908         [RTA_PRIORITY]          = { .type = NLA_U32 },
1909         [RTA_METRICS]           = { .type = NLA_NESTED },
1910 };
1911
1912 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1913                               struct fib6_config *cfg)
1914 {
1915         struct rtmsg *rtm;
1916         struct nlattr *tb[RTA_MAX+1];
1917         int err;
1918
1919         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1920         if (err < 0)
1921                 goto errout;
1922
1923         err = -EINVAL;
1924         rtm = nlmsg_data(nlh);
1925         memset(cfg, 0, sizeof(*cfg));
1926
1927         cfg->fc_table = rtm->rtm_table;
1928         cfg->fc_dst_len = rtm->rtm_dst_len;
1929         cfg->fc_src_len = rtm->rtm_src_len;
1930         cfg->fc_flags = RTF_UP;
1931         cfg->fc_protocol = rtm->rtm_protocol;
1932
1933         if (rtm->rtm_type == RTN_UNREACHABLE)
1934                 cfg->fc_flags |= RTF_REJECT;
1935
1936         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1937         cfg->fc_nlinfo.nlh = nlh;
1938
1939         if (tb[RTA_GATEWAY]) {
1940                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1941                 cfg->fc_flags |= RTF_GATEWAY;
1942         }
1943
1944         if (tb[RTA_DST]) {
1945                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1946
1947                 if (nla_len(tb[RTA_DST]) < plen)
1948                         goto errout;
1949
1950                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1951         }
1952
1953         if (tb[RTA_SRC]) {
1954                 int plen = (rtm->rtm_src_len + 7) >> 3;
1955
1956                 if (nla_len(tb[RTA_SRC]) < plen)
1957                         goto errout;
1958
1959                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1960         }
1961
1962         if (tb[RTA_OIF])
1963                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1964
1965         if (tb[RTA_PRIORITY])
1966                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1967
1968         if (tb[RTA_METRICS]) {
1969                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1970                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1971         }
1972
1973         if (tb[RTA_TABLE])
1974                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1975
1976         err = 0;
1977 errout:
1978         return err;
1979 }
1980
1981 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1982 {
1983         struct fib6_config cfg;
1984         int err;
1985
1986         err = rtm_to_fib6_config(skb, nlh, &cfg);
1987         if (err < 0)
1988                 return err;
1989
1990         return ip6_route_del(&cfg);
1991 }
1992
1993 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1994 {
1995         struct fib6_config cfg;
1996         int err;
1997
1998         err = rtm_to_fib6_config(skb, nlh, &cfg);
1999         if (err < 0)
2000                 return err;
2001
2002         return ip6_route_add(&cfg);
2003 }
2004
2005 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2006                          struct in6_addr *dst, struct in6_addr *src,
2007                          int iif, int type, u32 pid, u32 seq,
2008                          int prefix, unsigned int flags)
2009 {
2010         struct rtmsg *rtm;
2011         struct nlmsghdr *nlh;
2012         struct rta_cacheinfo ci;
2013         u32 table;
2014
2015         if (prefix) {   /* user wants prefix routes only */
2016                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2017                         /* success since this is not a prefix route */
2018                         return 1;
2019                 }
2020         }
2021
2022         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2023         if (nlh == NULL)
2024                 return -ENOBUFS;
2025
2026         rtm = nlmsg_data(nlh);
2027         rtm->rtm_family = AF_INET6;
2028         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2029         rtm->rtm_src_len = rt->rt6i_src.plen;
2030         rtm->rtm_tos = 0;
2031         if (rt->rt6i_table)
2032                 table = rt->rt6i_table->tb6_id;
2033         else
2034                 table = RT6_TABLE_UNSPEC;
2035         rtm->rtm_table = table;
2036         NLA_PUT_U32(skb, RTA_TABLE, table);
2037         if (rt->rt6i_flags&RTF_REJECT)
2038                 rtm->rtm_type = RTN_UNREACHABLE;
2039         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2040                 rtm->rtm_type = RTN_LOCAL;
2041         else
2042                 rtm->rtm_type = RTN_UNICAST;
2043         rtm->rtm_flags = 0;
2044         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2045         rtm->rtm_protocol = rt->rt6i_protocol;
2046         if (rt->rt6i_flags&RTF_DYNAMIC)
2047                 rtm->rtm_protocol = RTPROT_REDIRECT;
2048         else if (rt->rt6i_flags & RTF_ADDRCONF)
2049                 rtm->rtm_protocol = RTPROT_KERNEL;
2050         else if (rt->rt6i_flags&RTF_DEFAULT)
2051                 rtm->rtm_protocol = RTPROT_RA;
2052
2053         if (rt->rt6i_flags&RTF_CACHE)
2054                 rtm->rtm_flags |= RTM_F_CLONED;
2055
2056         if (dst) {
2057                 NLA_PUT(skb, RTA_DST, 16, dst);
2058                 rtm->rtm_dst_len = 128;
2059         } else if (rtm->rtm_dst_len)
2060                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2061 #ifdef CONFIG_IPV6_SUBTREES
2062         if (src) {
2063                 NLA_PUT(skb, RTA_SRC, 16, src);
2064                 rtm->rtm_src_len = 128;
2065         } else if (rtm->rtm_src_len)
2066                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2067 #endif
2068         if (iif)
2069                 NLA_PUT_U32(skb, RTA_IIF, iif);
2070         else if (dst) {
2071                 struct in6_addr saddr_buf;
2072                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2073                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2074         }
2075
2076         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2077                 goto nla_put_failure;
2078
2079         if (rt->u.dst.neighbour)
2080                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2081
2082         if (rt->u.dst.dev)
2083                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2084
2085         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2086         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2087         if (rt->rt6i_expires)
2088                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2089         else
2090                 ci.rta_expires = 0;
2091         ci.rta_used = rt->u.dst.__use;
2092         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2093         ci.rta_error = rt->u.dst.error;
2094         ci.rta_id = 0;
2095         ci.rta_ts = 0;
2096         ci.rta_tsage = 0;
2097         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2098
2099         return nlmsg_end(skb, nlh);
2100
2101 nla_put_failure:
2102         return nlmsg_cancel(skb, nlh);
2103 }
2104
2105 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2106 {
2107         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2108         int prefix;
2109
2110         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2111                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2112                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2113         } else
2114                 prefix = 0;
2115
2116         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2117                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2118                      prefix, NLM_F_MULTI);
2119 }
2120
2121 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2122 {
2123         struct nlattr *tb[RTA_MAX+1];
2124         struct rt6_info *rt;
2125         struct sk_buff *skb;
2126         struct rtmsg *rtm;
2127         struct flowi fl;
2128         int err, iif = 0;
2129
2130         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2131         if (err < 0)
2132                 goto errout;
2133
2134         err = -EINVAL;
2135         memset(&fl, 0, sizeof(fl));
2136
2137         if (tb[RTA_SRC]) {
2138                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2139                         goto errout;
2140
2141                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2142         }
2143
2144         if (tb[RTA_DST]) {
2145                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2146                         goto errout;
2147
2148                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2149         }
2150
2151         if (tb[RTA_IIF])
2152                 iif = nla_get_u32(tb[RTA_IIF]);
2153
2154         if (tb[RTA_OIF])
2155                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2156
2157         if (iif) {
2158                 struct net_device *dev;
2159                 dev = __dev_get_by_index(iif);
2160                 if (!dev) {
2161                         err = -ENODEV;
2162                         goto errout;
2163                 }
2164         }
2165
2166         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2167         if (skb == NULL) {
2168                 err = -ENOBUFS;
2169                 goto errout;
2170         }
2171
2172         /* Reserve room for dummy headers, this skb can pass
2173            through good chunk of routing engine.
2174          */
2175         skb->mac.raw = skb->data;
2176         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2177
2178         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2179         skb->dst = &rt->u.dst;
2180
2181         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2182                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2183                             nlh->nlmsg_seq, 0, 0);
2184         if (err < 0) {
2185                 kfree_skb(skb);
2186                 goto errout;
2187         }
2188
2189         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2190 errout:
2191         return err;
2192 }
2193
2194 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2195 {
2196         struct sk_buff *skb;
2197         u32 pid = 0, seq = 0;
2198         struct nlmsghdr *nlh = NULL;
2199         int payload = sizeof(struct rtmsg) + 256;
2200         int err = -ENOBUFS;
2201
2202         if (info) {
2203                 pid = info->pid;
2204                 nlh = info->nlh;
2205                 if (nlh)
2206                         seq = nlh->nlmsg_seq;
2207         }
2208
2209         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2210         if (skb == NULL)
2211                 goto errout;
2212
2213         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2214         if (err < 0) {
2215                 kfree_skb(skb);
2216                 goto errout;
2217         }
2218
2219         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2220 errout:
2221         if (err < 0)
2222                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2223 }
2224
2225 /*
2226  *      /proc
2227  */
2228
2229 #ifdef CONFIG_PROC_FS
2230
2231 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2232
2233 struct rt6_proc_arg
2234 {
2235         char *buffer;
2236         int offset;
2237         int length;
2238         int skip;
2239         int len;
2240 };
2241
2242 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2243 {
2244         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2245         int i;
2246
2247         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2248                 arg->skip++;
2249                 return 0;
2250         }
2251
2252         if (arg->len >= arg->length)
2253                 return 0;
2254
2255         for (i=0; i<16; i++) {
2256                 sprintf(arg->buffer + arg->len, "%02x",
2257                         rt->rt6i_dst.addr.s6_addr[i]);
2258                 arg->len += 2;
2259         }
2260         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2261                             rt->rt6i_dst.plen);
2262
2263 #ifdef CONFIG_IPV6_SUBTREES
2264         for (i=0; i<16; i++) {
2265                 sprintf(arg->buffer + arg->len, "%02x",
2266                         rt->rt6i_src.addr.s6_addr[i]);
2267                 arg->len += 2;
2268         }
2269         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2270                             rt->rt6i_src.plen);
2271 #else
2272         sprintf(arg->buffer + arg->len,
2273                 "00000000000000000000000000000000 00 ");
2274         arg->len += 36;
2275 #endif
2276
2277         if (rt->rt6i_nexthop) {
2278                 for (i=0; i<16; i++) {
2279                         sprintf(arg->buffer + arg->len, "%02x",
2280                                 rt->rt6i_nexthop->primary_key[i]);
2281                         arg->len += 2;
2282                 }
2283         } else {
2284                 sprintf(arg->buffer + arg->len,
2285                         "00000000000000000000000000000000");
2286                 arg->len += 32;
2287         }
2288         arg->len += sprintf(arg->buffer + arg->len,
2289                             " %08x %08x %08x %08x %8s\n",
2290                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2291                             rt->u.dst.__use, rt->rt6i_flags, 
2292                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2293         return 0;
2294 }
2295
2296 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2297 {
2298         struct rt6_proc_arg arg = {
2299                 .buffer = buffer,
2300                 .offset = offset,
2301                 .length = length,
2302         };
2303
2304         fib6_clean_all(rt6_info_route, 0, &arg);
2305
2306         *start = buffer;
2307         if (offset)
2308                 *start += offset % RT6_INFO_LEN;
2309
2310         arg.len -= offset % RT6_INFO_LEN;
2311
2312         if (arg.len > length)
2313                 arg.len = length;
2314         if (arg.len < 0)
2315                 arg.len = 0;
2316
2317         return arg.len;
2318 }
2319
2320 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2321 {
2322         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2323                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2324                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2325                       rt6_stats.fib_rt_cache,
2326                       atomic_read(&ip6_dst_ops.entries),
2327                       rt6_stats.fib_discarded_routes);
2328
2329         return 0;
2330 }
2331
2332 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2333 {
2334         return single_open(file, rt6_stats_seq_show, NULL);
2335 }
2336
2337 static struct file_operations rt6_stats_seq_fops = {
2338         .owner   = THIS_MODULE,
2339         .open    = rt6_stats_seq_open,
2340         .read    = seq_read,
2341         .llseek  = seq_lseek,
2342         .release = single_release,
2343 };
2344 #endif  /* CONFIG_PROC_FS */
2345
2346 #ifdef CONFIG_SYSCTL
2347
2348 static int flush_delay;
2349
2350 static
2351 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2352                               void __user *buffer, size_t *lenp, loff_t *ppos)
2353 {
2354         if (write) {
2355                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2356                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2357                 return 0;
2358         } else
2359                 return -EINVAL;
2360 }
2361
2362 ctl_table ipv6_route_table[] = {
2363         {
2364                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2365                 .procname       =       "flush",
2366                 .data           =       &flush_delay,
2367                 .maxlen         =       sizeof(int),
2368                 .mode           =       0200,
2369                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2370         },
2371         {
2372                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2373                 .procname       =       "gc_thresh",
2374                 .data           =       &ip6_dst_ops.gc_thresh,
2375                 .maxlen         =       sizeof(int),
2376                 .mode           =       0644,
2377                 .proc_handler   =       &proc_dointvec,
2378         },
2379         {
2380                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2381                 .procname       =       "max_size",
2382                 .data           =       &ip6_rt_max_size,
2383                 .maxlen         =       sizeof(int),
2384                 .mode           =       0644,
2385                 .proc_handler   =       &proc_dointvec,
2386         },
2387         {
2388                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2389                 .procname       =       "gc_min_interval",
2390                 .data           =       &ip6_rt_gc_min_interval,
2391                 .maxlen         =       sizeof(int),
2392                 .mode           =       0644,
2393                 .proc_handler   =       &proc_dointvec_jiffies,
2394                 .strategy       =       &sysctl_jiffies,
2395         },
2396         {
2397                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2398                 .procname       =       "gc_timeout",
2399                 .data           =       &ip6_rt_gc_timeout,
2400                 .maxlen         =       sizeof(int),
2401                 .mode           =       0644,
2402                 .proc_handler   =       &proc_dointvec_jiffies,
2403                 .strategy       =       &sysctl_jiffies,
2404         },
2405         {
2406                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2407                 .procname       =       "gc_interval",
2408                 .data           =       &ip6_rt_gc_interval,
2409                 .maxlen         =       sizeof(int),
2410                 .mode           =       0644,
2411                 .proc_handler   =       &proc_dointvec_jiffies,
2412                 .strategy       =       &sysctl_jiffies,
2413         },
2414         {
2415                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2416                 .procname       =       "gc_elasticity",
2417                 .data           =       &ip6_rt_gc_elasticity,
2418                 .maxlen         =       sizeof(int),
2419                 .mode           =       0644,
2420                 .proc_handler   =       &proc_dointvec_jiffies,
2421                 .strategy       =       &sysctl_jiffies,
2422         },
2423         {
2424                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2425                 .procname       =       "mtu_expires",
2426                 .data           =       &ip6_rt_mtu_expires,
2427                 .maxlen         =       sizeof(int),
2428                 .mode           =       0644,
2429                 .proc_handler   =       &proc_dointvec_jiffies,
2430                 .strategy       =       &sysctl_jiffies,
2431         },
2432         {
2433                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2434                 .procname       =       "min_adv_mss",
2435                 .data           =       &ip6_rt_min_advmss,
2436                 .maxlen         =       sizeof(int),
2437                 .mode           =       0644,
2438                 .proc_handler   =       &proc_dointvec_jiffies,
2439                 .strategy       =       &sysctl_jiffies,
2440         },
2441         {
2442                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2443                 .procname       =       "gc_min_interval_ms",
2444                 .data           =       &ip6_rt_gc_min_interval,
2445                 .maxlen         =       sizeof(int),
2446                 .mode           =       0644,
2447                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2448                 .strategy       =       &sysctl_ms_jiffies,
2449         },
2450         { .ctl_name = 0 }
2451 };
2452
2453 #endif
2454
2455 void __init ip6_route_init(void)
2456 {
2457         struct proc_dir_entry *p;
2458
2459         ip6_dst_ops.kmem_cachep =
2460                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2461                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2462         fib6_init();
2463 #ifdef  CONFIG_PROC_FS
2464         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2465         if (p)
2466                 p->owner = THIS_MODULE;
2467
2468         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2469 #endif
2470 #ifdef CONFIG_XFRM
2471         xfrm6_init();
2472 #endif
2473 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2474         fib6_rules_init();
2475 #endif
2476 }
2477
2478 void ip6_route_cleanup(void)
2479 {
2480 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2481         fib6_rules_cleanup();
2482 #endif
2483 #ifdef CONFIG_PROC_FS
2484         proc_net_remove("ipv6_route");
2485         proc_net_remove("rt6_stats");
2486 #endif
2487 #ifdef CONFIG_XFRM
2488         xfrm6_fini();
2489 #endif
2490         rt6_ifdown(NULL);
2491         fib6_gc_cleanup();
2492         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2493 }