]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/ipv6/route.c
[IPV6] ROUTE: Search subtree when backtracking.
[mv-sheeva.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
39
40 #ifdef  CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #endif
44
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 struct rt6_info ip6_null_entry = {
124         .u = {
125                 .dst = {
126                         .__refcnt       = ATOMIC_INIT(1),
127                         .__use          = 1,
128                         .dev            = &loopback_dev,
129                         .obsolete       = -1,
130                         .error          = -ENETUNREACH,
131                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
132                         .input          = ip6_pkt_discard,
133                         .output         = ip6_pkt_discard_out,
134                         .ops            = &ip6_dst_ops,
135                         .path           = (struct dst_entry*)&ip6_null_entry,
136                 }
137         },
138         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
139         .rt6i_metric    = ~(u32) 0,
140         .rt6i_ref       = ATOMIC_INIT(1),
141 };
142
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145 struct rt6_info ip6_prohibit_entry = {
146         .u = {
147                 .dst = {
148                         .__refcnt       = ATOMIC_INIT(1),
149                         .__use          = 1,
150                         .dev            = &loopback_dev,
151                         .obsolete       = -1,
152                         .error          = -EACCES,
153                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
154                         .input          = ip6_pkt_discard,
155                         .output         = ip6_pkt_discard_out,
156                         .ops            = &ip6_dst_ops,
157                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
158                 }
159         },
160         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
161         .rt6i_metric    = ~(u32) 0,
162         .rt6i_ref       = ATOMIC_INIT(1),
163 };
164
165 struct rt6_info ip6_blk_hole_entry = {
166         .u = {
167                 .dst = {
168                         .__refcnt       = ATOMIC_INIT(1),
169                         .__use          = 1,
170                         .dev            = &loopback_dev,
171                         .obsolete       = -1,
172                         .error          = -EINVAL,
173                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
174                         .input          = ip6_pkt_discard,
175                         .output         = ip6_pkt_discard_out,
176                         .ops            = &ip6_dst_ops,
177                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
178                 }
179         },
180         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
181         .rt6i_metric    = ~(u32) 0,
182         .rt6i_ref       = ATOMIC_INIT(1),
183 };
184
185 #endif
186
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
189 {
190         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191 }
192
193 static void ip6_dst_destroy(struct dst_entry *dst)
194 {
195         struct rt6_info *rt = (struct rt6_info *)dst;
196         struct inet6_dev *idev = rt->rt6i_idev;
197
198         if (idev != NULL) {
199                 rt->rt6i_idev = NULL;
200                 in6_dev_put(idev);
201         }       
202 }
203
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205                            int how)
206 {
207         struct rt6_info *rt = (struct rt6_info *)dst;
208         struct inet6_dev *idev = rt->rt6i_idev;
209
210         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212                 if (loopback_idev != NULL) {
213                         rt->rt6i_idev = loopback_idev;
214                         in6_dev_put(idev);
215                 }
216         }
217 }
218
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 {
221         return (rt->rt6i_flags & RTF_EXPIRES &&
222                 time_after(jiffies, rt->rt6i_expires));
223 }
224
225 static inline int rt6_need_strict(struct in6_addr *daddr)
226 {
227         return (ipv6_addr_type(daddr) &
228                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229 }
230
231 /*
232  *      Route lookup. Any table->tb6_lock is implied.
233  */
234
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236                                                     int oif,
237                                                     int strict)
238 {
239         struct rt6_info *local = NULL;
240         struct rt6_info *sprt;
241
242         if (oif) {
243                 for (sprt = rt; sprt; sprt = sprt->u.next) {
244                         struct net_device *dev = sprt->rt6i_dev;
245                         if (dev->ifindex == oif)
246                                 return sprt;
247                         if (dev->flags & IFF_LOOPBACK) {
248                                 if (sprt->rt6i_idev == NULL ||
249                                     sprt->rt6i_idev->dev->ifindex != oif) {
250                                         if (strict && oif)
251                                                 continue;
252                                         if (local && (!oif || 
253                                                       local->rt6i_idev->dev->ifindex == oif))
254                                                 continue;
255                                 }
256                                 local = sprt;
257                         }
258                 }
259
260                 if (local)
261                         return local;
262
263                 if (strict)
264                         return &ip6_null_entry;
265         }
266         return rt;
267 }
268
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
271 {
272         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273         /*
274          * Okay, this does not seem to be appropriate
275          * for now, however, we need to check if it
276          * is really so; aka Router Reachability Probing.
277          *
278          * Router Reachability Probe MUST be rate-limited
279          * to no more than one per minute.
280          */
281         if (!neigh || (neigh->nud_state & NUD_VALID))
282                 return;
283         read_lock_bh(&neigh->lock);
284         if (!(neigh->nud_state & NUD_VALID) &&
285             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286                 struct in6_addr mcaddr;
287                 struct in6_addr *target;
288
289                 neigh->updated = jiffies;
290                 read_unlock_bh(&neigh->lock);
291
292                 target = (struct in6_addr *)&neigh->primary_key;
293                 addrconf_addr_solict_mult(target, &mcaddr);
294                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295         } else
296                 read_unlock_bh(&neigh->lock);
297 }
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
300 {
301         return;
302 }
303 #endif
304
305 /*
306  * Default Router Selection (RFC 2461 6.3.6)
307  */
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309 {
310         struct net_device *dev = rt->rt6i_dev;
311         if (!oif || dev->ifindex == oif)
312                 return 2;
313         if ((dev->flags & IFF_LOOPBACK) &&
314             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315                 return 1;
316         return 0;
317 }
318
319 static int inline rt6_check_neigh(struct rt6_info *rt)
320 {
321         struct neighbour *neigh = rt->rt6i_nexthop;
322         int m = 0;
323         if (rt->rt6i_flags & RTF_NONEXTHOP ||
324             !(rt->rt6i_flags & RTF_GATEWAY))
325                 m = 1;
326         else if (neigh) {
327                 read_lock_bh(&neigh->lock);
328                 if (neigh->nud_state & NUD_VALID)
329                         m = 2;
330                 read_unlock_bh(&neigh->lock);
331         }
332         return m;
333 }
334
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336                            int strict)
337 {
338         int m, n;
339                 
340         m = rt6_check_dev(rt, oif);
341         if (!m && (strict & RT6_SELECT_F_IFACE))
342                 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346         n = rt6_check_neigh(rt);
347         if (n > 1)
348                 m |= 16;
349         else if (!n && strict & RT6_SELECT_F_REACHABLE)
350                 return -1;
351         return m;
352 }
353
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355                                    int strict)
356 {
357         struct rt6_info *match = NULL, *last = NULL;
358         struct rt6_info *rt, *rt0 = *head;
359         u32 metric;
360         int mpri = -1;
361
362         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363                   __FUNCTION__, head, head ? *head : NULL, oif);
364
365         for (rt = rt0, metric = rt0->rt6i_metric;
366              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367              rt = rt->u.next) {
368                 int m;
369
370                 if (rt6_check_expired(rt))
371                         continue;
372
373                 last = rt;
374
375                 m = rt6_score_route(rt, oif, strict);
376                 if (m < 0)
377                         continue;
378
379                 if (m > mpri) {
380                         rt6_probe(match);
381                         match = rt;
382                         mpri = m;
383                 } else {
384                         rt6_probe(rt);
385                 }
386         }
387
388         if (!match &&
389             (strict & RT6_SELECT_F_REACHABLE) &&
390             last && last != rt0) {
391                 /* no entries matched; do round-robin */
392                 static DEFINE_SPINLOCK(lock);
393                 spin_lock(&lock);
394                 *head = rt0->u.next;
395                 rt0->u.next = last->u.next;
396                 last->u.next = rt0;
397                 spin_unlock(&lock);
398         }
399
400         RT6_TRACE("%s() => %p, score=%d\n",
401                   __FUNCTION__, match, mpri);
402
403         return (match ? match : &ip6_null_entry);
404 }
405
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408                   struct in6_addr *gwaddr)
409 {
410         struct route_info *rinfo = (struct route_info *) opt;
411         struct in6_addr prefix_buf, *prefix;
412         unsigned int pref;
413         u32 lifetime;
414         struct rt6_info *rt;
415
416         if (len < sizeof(struct route_info)) {
417                 return -EINVAL;
418         }
419
420         /* Sanity check for prefix_len and length */
421         if (rinfo->length > 3) {
422                 return -EINVAL;
423         } else if (rinfo->prefix_len > 128) {
424                 return -EINVAL;
425         } else if (rinfo->prefix_len > 64) {
426                 if (rinfo->length < 2) {
427                         return -EINVAL;
428                 }
429         } else if (rinfo->prefix_len > 0) {
430                 if (rinfo->length < 1) {
431                         return -EINVAL;
432                 }
433         }
434
435         pref = rinfo->route_pref;
436         if (pref == ICMPV6_ROUTER_PREF_INVALID)
437                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439         lifetime = htonl(rinfo->lifetime);
440         if (lifetime == 0xffffffff) {
441                 /* infinity */
442         } else if (lifetime > 0x7fffffff/HZ) {
443                 /* Avoid arithmetic overflow */
444                 lifetime = 0x7fffffff/HZ - 1;
445         }
446
447         if (rinfo->length == 3)
448                 prefix = (struct in6_addr *)rinfo->prefix;
449         else {
450                 /* this function is safe */
451                 ipv6_addr_prefix(&prefix_buf,
452                                  (struct in6_addr *)rinfo->prefix,
453                                  rinfo->prefix_len);
454                 prefix = &prefix_buf;
455         }
456
457         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459         if (rt && !lifetime) {
460                 ip6_del_rt(rt);
461                 rt = NULL;
462         }
463
464         if (!rt && lifetime)
465                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466                                         pref);
467         else if (rt)
468                 rt->rt6i_flags = RTF_ROUTEINFO |
469                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471         if (rt) {
472                 if (lifetime == 0xffffffff) {
473                         rt->rt6i_flags &= ~RTF_EXPIRES;
474                 } else {
475                         rt->rt6i_expires = jiffies + HZ * lifetime;
476                         rt->rt6i_flags |= RTF_EXPIRES;
477                 }
478                 dst_release(&rt->u.dst);
479         }
480         return 0;
481 }
482 #endif
483
484 #define BACKTRACK(saddr) \
485 do { \
486         if (rt == &ip6_null_entry) { \
487                 struct fib6_node *pn; \
488                 while (fn) { \
489                         if (fn->fn_flags & RTN_TL_ROOT) \
490                                 goto out; \
491                         pn = fn->parent; \
492                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
493                                 fn = fib6_lookup(pn->subtree, NULL, saddr); \
494                         else \
495                                 fn = pn; \
496                         if (fn->fn_flags & RTN_RTINFO) \
497                                 goto restart; \
498                 } \
499         } \
500 } while(0)
501
502 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
503                                              struct flowi *fl, int flags)
504 {
505         struct fib6_node *fn;
506         struct rt6_info *rt;
507
508         read_lock_bh(&table->tb6_lock);
509         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
510 restart:
511         rt = fn->leaf;
512         rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
513         BACKTRACK(&fl->fl6_src);
514         dst_hold(&rt->u.dst);
515 out:
516         read_unlock_bh(&table->tb6_lock);
517
518         rt->u.dst.lastuse = jiffies;
519         rt->u.dst.__use++;
520
521         return rt;
522
523 }
524
525 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
526                             int oif, int strict)
527 {
528         struct flowi fl = {
529                 .oif = oif,
530                 .nl_u = {
531                         .ip6_u = {
532                                 .daddr = *daddr,
533                                 /* TODO: saddr */
534                         },
535                 },
536         };
537         struct dst_entry *dst;
538         int flags = strict ? RT6_F_STRICT : 0;
539
540         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
541         if (dst->error == 0)
542                 return (struct rt6_info *) dst;
543
544         dst_release(dst);
545
546         return NULL;
547 }
548
549 /* ip6_ins_rt is called with FREE table->tb6_lock.
550    It takes new route entry, the addition fails by any reason the
551    route is freed. In any case, if caller does not hold it, it may
552    be destroyed.
553  */
554
555 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
556 {
557         int err;
558         struct fib6_table *table;
559
560         table = rt->rt6i_table;
561         write_lock_bh(&table->tb6_lock);
562         err = fib6_add(&table->tb6_root, rt, info);
563         write_unlock_bh(&table->tb6_lock);
564
565         return err;
566 }
567
568 int ip6_ins_rt(struct rt6_info *rt)
569 {
570         return __ip6_ins_rt(rt, NULL);
571 }
572
573 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
574                                       struct in6_addr *saddr)
575 {
576         struct rt6_info *rt;
577
578         /*
579          *      Clone the route.
580          */
581
582         rt = ip6_rt_copy(ort);
583
584         if (rt) {
585                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
586                         if (rt->rt6i_dst.plen != 128 &&
587                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
588                                 rt->rt6i_flags |= RTF_ANYCAST;
589                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
590                 }
591
592                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
593                 rt->rt6i_dst.plen = 128;
594                 rt->rt6i_flags |= RTF_CACHE;
595                 rt->u.dst.flags |= DST_HOST;
596
597 #ifdef CONFIG_IPV6_SUBTREES
598                 if (rt->rt6i_src.plen && saddr) {
599                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
600                         rt->rt6i_src.plen = 128;
601                 }
602 #endif
603
604                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
605
606         }
607
608         return rt;
609 }
610
611 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
612 {
613         struct rt6_info *rt = ip6_rt_copy(ort);
614         if (rt) {
615                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
616                 rt->rt6i_dst.plen = 128;
617                 rt->rt6i_flags |= RTF_CACHE;
618                 if (rt->rt6i_flags & RTF_REJECT)
619                         rt->u.dst.error = ort->u.dst.error;
620                 rt->u.dst.flags |= DST_HOST;
621                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
622         }
623         return rt;
624 }
625
626 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
627                                             struct flowi *fl, int flags)
628 {
629         struct fib6_node *fn;
630         struct rt6_info *rt, *nrt;
631         int strict = 0;
632         int attempts = 3;
633         int err;
634         int reachable = RT6_SELECT_F_REACHABLE;
635
636         if (flags & RT6_F_STRICT)
637                 strict = RT6_SELECT_F_IFACE;
638
639 relookup:
640         read_lock_bh(&table->tb6_lock);
641
642 restart_2:
643         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
644
645 restart:
646         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
647         BACKTRACK(&fl->fl6_src);
648         if (rt == &ip6_null_entry ||
649             rt->rt6i_flags & RTF_CACHE)
650                 goto out;
651
652         dst_hold(&rt->u.dst);
653         read_unlock_bh(&table->tb6_lock);
654
655         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
656                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
657         else {
658 #if CLONE_OFFLINK_ROUTE
659                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
660 #else
661                 goto out2;
662 #endif
663         }
664
665         dst_release(&rt->u.dst);
666         rt = nrt ? : &ip6_null_entry;
667
668         dst_hold(&rt->u.dst);
669         if (nrt) {
670                 err = ip6_ins_rt(nrt);
671                 if (!err)
672                         goto out2;
673         }
674
675         if (--attempts <= 0)
676                 goto out2;
677
678         /*
679          * Race condition! In the gap, when table->tb6_lock was
680          * released someone could insert this route.  Relookup.
681          */
682         dst_release(&rt->u.dst);
683         goto relookup;
684
685 out:
686         if (reachable) {
687                 reachable = 0;
688                 goto restart_2;
689         }
690         dst_hold(&rt->u.dst);
691         read_unlock_bh(&table->tb6_lock);
692 out2:
693         rt->u.dst.lastuse = jiffies;
694         rt->u.dst.__use++;
695
696         return rt;
697 }
698
699 void ip6_route_input(struct sk_buff *skb)
700 {
701         struct ipv6hdr *iph = skb->nh.ipv6h;
702         struct flowi fl = {
703                 .iif = skb->dev->ifindex,
704                 .nl_u = {
705                         .ip6_u = {
706                                 .daddr = iph->daddr,
707                                 .saddr = iph->saddr,
708                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
709                         },
710                 },
711                 .proto = iph->nexthdr,
712         };
713         int flags = 0;
714
715         if (rt6_need_strict(&iph->daddr))
716                 flags |= RT6_F_STRICT;
717
718         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
719 }
720
721 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
722                                              struct flowi *fl, int flags)
723 {
724         struct fib6_node *fn;
725         struct rt6_info *rt, *nrt;
726         int strict = 0;
727         int attempts = 3;
728         int err;
729         int reachable = RT6_SELECT_F_REACHABLE;
730
731         if (flags & RT6_F_STRICT)
732                 strict = RT6_SELECT_F_IFACE;
733
734 relookup:
735         read_lock_bh(&table->tb6_lock);
736
737 restart_2:
738         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
739
740 restart:
741         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
742         BACKTRACK(&fl->fl6_src);
743         if (rt == &ip6_null_entry ||
744             rt->rt6i_flags & RTF_CACHE)
745                 goto out;
746
747         dst_hold(&rt->u.dst);
748         read_unlock_bh(&table->tb6_lock);
749
750         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
751                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
752         else {
753 #if CLONE_OFFLINK_ROUTE
754                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
755 #else
756                 goto out2;
757 #endif
758         }
759
760         dst_release(&rt->u.dst);
761         rt = nrt ? : &ip6_null_entry;
762
763         dst_hold(&rt->u.dst);
764         if (nrt) {
765                 err = ip6_ins_rt(nrt);
766                 if (!err)
767                         goto out2;
768         }
769
770         if (--attempts <= 0)
771                 goto out2;
772
773         /*
774          * Race condition! In the gap, when table->tb6_lock was
775          * released someone could insert this route.  Relookup.
776          */
777         dst_release(&rt->u.dst);
778         goto relookup;
779
780 out:
781         if (reachable) {
782                 reachable = 0;
783                 goto restart_2;
784         }
785         dst_hold(&rt->u.dst);
786         read_unlock_bh(&table->tb6_lock);
787 out2:
788         rt->u.dst.lastuse = jiffies;
789         rt->u.dst.__use++;
790         return rt;
791 }
792
793 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
794 {
795         int flags = 0;
796
797         if (rt6_need_strict(&fl->fl6_dst))
798                 flags |= RT6_F_STRICT;
799
800         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
801 }
802
803
804 /*
805  *      Destination cache support functions
806  */
807
808 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
809 {
810         struct rt6_info *rt;
811
812         rt = (struct rt6_info *) dst;
813
814         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
815                 return dst;
816
817         return NULL;
818 }
819
820 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
821 {
822         struct rt6_info *rt = (struct rt6_info *) dst;
823
824         if (rt) {
825                 if (rt->rt6i_flags & RTF_CACHE)
826                         ip6_del_rt(rt);
827                 else
828                         dst_release(dst);
829         }
830         return NULL;
831 }
832
833 static void ip6_link_failure(struct sk_buff *skb)
834 {
835         struct rt6_info *rt;
836
837         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
838
839         rt = (struct rt6_info *) skb->dst;
840         if (rt) {
841                 if (rt->rt6i_flags&RTF_CACHE) {
842                         dst_set_expires(&rt->u.dst, 0);
843                         rt->rt6i_flags |= RTF_EXPIRES;
844                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
845                         rt->rt6i_node->fn_sernum = -1;
846         }
847 }
848
849 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
850 {
851         struct rt6_info *rt6 = (struct rt6_info*)dst;
852
853         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
854                 rt6->rt6i_flags |= RTF_MODIFIED;
855                 if (mtu < IPV6_MIN_MTU) {
856                         mtu = IPV6_MIN_MTU;
857                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
858                 }
859                 dst->metrics[RTAX_MTU-1] = mtu;
860                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
861         }
862 }
863
864 static int ipv6_get_mtu(struct net_device *dev);
865
866 static inline unsigned int ipv6_advmss(unsigned int mtu)
867 {
868         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
869
870         if (mtu < ip6_rt_min_advmss)
871                 mtu = ip6_rt_min_advmss;
872
873         /*
874          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
875          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
876          * IPV6_MAXPLEN is also valid and means: "any MSS, 
877          * rely only on pmtu discovery"
878          */
879         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
880                 mtu = IPV6_MAXPLEN;
881         return mtu;
882 }
883
884 static struct dst_entry *ndisc_dst_gc_list;
885 static DEFINE_SPINLOCK(ndisc_lock);
886
887 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
888                                   struct neighbour *neigh,
889                                   struct in6_addr *addr,
890                                   int (*output)(struct sk_buff *))
891 {
892         struct rt6_info *rt;
893         struct inet6_dev *idev = in6_dev_get(dev);
894
895         if (unlikely(idev == NULL))
896                 return NULL;
897
898         rt = ip6_dst_alloc();
899         if (unlikely(rt == NULL)) {
900                 in6_dev_put(idev);
901                 goto out;
902         }
903
904         dev_hold(dev);
905         if (neigh)
906                 neigh_hold(neigh);
907         else
908                 neigh = ndisc_get_neigh(dev, addr);
909
910         rt->rt6i_dev      = dev;
911         rt->rt6i_idev     = idev;
912         rt->rt6i_nexthop  = neigh;
913         atomic_set(&rt->u.dst.__refcnt, 1);
914         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
915         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
916         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
917         rt->u.dst.output  = output;
918
919 #if 0   /* there's no chance to use these for ndisc */
920         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
921                                 ? DST_HOST 
922                                 : 0;
923         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
924         rt->rt6i_dst.plen = 128;
925 #endif
926
927         spin_lock_bh(&ndisc_lock);
928         rt->u.dst.next = ndisc_dst_gc_list;
929         ndisc_dst_gc_list = &rt->u.dst;
930         spin_unlock_bh(&ndisc_lock);
931
932         fib6_force_start_gc();
933
934 out:
935         return (struct dst_entry *)rt;
936 }
937
938 int ndisc_dst_gc(int *more)
939 {
940         struct dst_entry *dst, *next, **pprev;
941         int freed;
942
943         next = NULL;
944         freed = 0;
945
946         spin_lock_bh(&ndisc_lock);
947         pprev = &ndisc_dst_gc_list;
948
949         while ((dst = *pprev) != NULL) {
950                 if (!atomic_read(&dst->__refcnt)) {
951                         *pprev = dst->next;
952                         dst_free(dst);
953                         freed++;
954                 } else {
955                         pprev = &dst->next;
956                         (*more)++;
957                 }
958         }
959
960         spin_unlock_bh(&ndisc_lock);
961
962         return freed;
963 }
964
965 static int ip6_dst_gc(void)
966 {
967         static unsigned expire = 30*HZ;
968         static unsigned long last_gc;
969         unsigned long now = jiffies;
970
971         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
972             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
973                 goto out;
974
975         expire++;
976         fib6_run_gc(expire);
977         last_gc = now;
978         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
979                 expire = ip6_rt_gc_timeout>>1;
980
981 out:
982         expire -= expire>>ip6_rt_gc_elasticity;
983         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
984 }
985
986 /* Clean host part of a prefix. Not necessary in radix tree,
987    but results in cleaner routing tables.
988
989    Remove it only when all the things will work!
990  */
991
992 static int ipv6_get_mtu(struct net_device *dev)
993 {
994         int mtu = IPV6_MIN_MTU;
995         struct inet6_dev *idev;
996
997         idev = in6_dev_get(dev);
998         if (idev) {
999                 mtu = idev->cnf.mtu6;
1000                 in6_dev_put(idev);
1001         }
1002         return mtu;
1003 }
1004
1005 int ipv6_get_hoplimit(struct net_device *dev)
1006 {
1007         int hoplimit = ipv6_devconf.hop_limit;
1008         struct inet6_dev *idev;
1009
1010         idev = in6_dev_get(dev);
1011         if (idev) {
1012                 hoplimit = idev->cnf.hop_limit;
1013                 in6_dev_put(idev);
1014         }
1015         return hoplimit;
1016 }
1017
1018 /*
1019  *
1020  */
1021
1022 int ip6_route_add(struct fib6_config *cfg)
1023 {
1024         int err;
1025         struct rt6_info *rt = NULL;
1026         struct net_device *dev = NULL;
1027         struct inet6_dev *idev = NULL;
1028         struct fib6_table *table;
1029         int addr_type;
1030
1031         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1032                 return -EINVAL;
1033 #ifndef CONFIG_IPV6_SUBTREES
1034         if (cfg->fc_src_len)
1035                 return -EINVAL;
1036 #endif
1037         if (cfg->fc_ifindex) {
1038                 err = -ENODEV;
1039                 dev = dev_get_by_index(cfg->fc_ifindex);
1040                 if (!dev)
1041                         goto out;
1042                 idev = in6_dev_get(dev);
1043                 if (!idev)
1044                         goto out;
1045         }
1046
1047         if (cfg->fc_metric == 0)
1048                 cfg->fc_metric = IP6_RT_PRIO_USER;
1049
1050         table = fib6_new_table(cfg->fc_table);
1051         if (table == NULL) {
1052                 err = -ENOBUFS;
1053                 goto out;
1054         }
1055
1056         rt = ip6_dst_alloc();
1057
1058         if (rt == NULL) {
1059                 err = -ENOMEM;
1060                 goto out;
1061         }
1062
1063         rt->u.dst.obsolete = -1;
1064         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1065
1066         if (cfg->fc_protocol == RTPROT_UNSPEC)
1067                 cfg->fc_protocol = RTPROT_BOOT;
1068         rt->rt6i_protocol = cfg->fc_protocol;
1069
1070         addr_type = ipv6_addr_type(&cfg->fc_dst);
1071
1072         if (addr_type & IPV6_ADDR_MULTICAST)
1073                 rt->u.dst.input = ip6_mc_input;
1074         else
1075                 rt->u.dst.input = ip6_forward;
1076
1077         rt->u.dst.output = ip6_output;
1078
1079         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1080         rt->rt6i_dst.plen = cfg->fc_dst_len;
1081         if (rt->rt6i_dst.plen == 128)
1082                rt->u.dst.flags = DST_HOST;
1083
1084 #ifdef CONFIG_IPV6_SUBTREES
1085         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1086         rt->rt6i_src.plen = cfg->fc_src_len;
1087 #endif
1088
1089         rt->rt6i_metric = cfg->fc_metric;
1090
1091         /* We cannot add true routes via loopback here,
1092            they would result in kernel looping; promote them to reject routes
1093          */
1094         if ((cfg->fc_flags & RTF_REJECT) ||
1095             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1096                 /* hold loopback dev/idev if we haven't done so. */
1097                 if (dev != &loopback_dev) {
1098                         if (dev) {
1099                                 dev_put(dev);
1100                                 in6_dev_put(idev);
1101                         }
1102                         dev = &loopback_dev;
1103                         dev_hold(dev);
1104                         idev = in6_dev_get(dev);
1105                         if (!idev) {
1106                                 err = -ENODEV;
1107                                 goto out;
1108                         }
1109                 }
1110                 rt->u.dst.output = ip6_pkt_discard_out;
1111                 rt->u.dst.input = ip6_pkt_discard;
1112                 rt->u.dst.error = -ENETUNREACH;
1113                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1114                 goto install_route;
1115         }
1116
1117         if (cfg->fc_flags & RTF_GATEWAY) {
1118                 struct in6_addr *gw_addr;
1119                 int gwa_type;
1120
1121                 gw_addr = &cfg->fc_gateway;
1122                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1123                 gwa_type = ipv6_addr_type(gw_addr);
1124
1125                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1126                         struct rt6_info *grt;
1127
1128                         /* IPv6 strictly inhibits using not link-local
1129                            addresses as nexthop address.
1130                            Otherwise, router will not able to send redirects.
1131                            It is very good, but in some (rare!) circumstances
1132                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1133                            some exceptions. --ANK
1134                          */
1135                         err = -EINVAL;
1136                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1137                                 goto out;
1138
1139                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1140
1141                         err = -EHOSTUNREACH;
1142                         if (grt == NULL)
1143                                 goto out;
1144                         if (dev) {
1145                                 if (dev != grt->rt6i_dev) {
1146                                         dst_release(&grt->u.dst);
1147                                         goto out;
1148                                 }
1149                         } else {
1150                                 dev = grt->rt6i_dev;
1151                                 idev = grt->rt6i_idev;
1152                                 dev_hold(dev);
1153                                 in6_dev_hold(grt->rt6i_idev);
1154                         }
1155                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1156                                 err = 0;
1157                         dst_release(&grt->u.dst);
1158
1159                         if (err)
1160                                 goto out;
1161                 }
1162                 err = -EINVAL;
1163                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1164                         goto out;
1165         }
1166
1167         err = -ENODEV;
1168         if (dev == NULL)
1169                 goto out;
1170
1171         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1172                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1173                 if (IS_ERR(rt->rt6i_nexthop)) {
1174                         err = PTR_ERR(rt->rt6i_nexthop);
1175                         rt->rt6i_nexthop = NULL;
1176                         goto out;
1177                 }
1178         }
1179
1180         rt->rt6i_flags = cfg->fc_flags;
1181
1182 install_route:
1183         if (cfg->fc_mx) {
1184                 struct nlattr *nla;
1185                 int remaining;
1186
1187                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1188                         int type = nla->nla_type;
1189
1190                         if (type) {
1191                                 if (type > RTAX_MAX) {
1192                                         err = -EINVAL;
1193                                         goto out;
1194                                 }
1195
1196                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1197                         }
1198                 }
1199         }
1200
1201         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1202                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1203         if (!rt->u.dst.metrics[RTAX_MTU-1])
1204                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1205         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1206                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1207         rt->u.dst.dev = dev;
1208         rt->rt6i_idev = idev;
1209         rt->rt6i_table = table;
1210         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1211
1212 out:
1213         if (dev)
1214                 dev_put(dev);
1215         if (idev)
1216                 in6_dev_put(idev);
1217         if (rt)
1218                 dst_free((struct dst_entry *) rt);
1219         return err;
1220 }
1221
1222 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1223 {
1224         int err;
1225         struct fib6_table *table;
1226
1227         if (rt == &ip6_null_entry)
1228                 return -ENOENT;
1229
1230         table = rt->rt6i_table;
1231         write_lock_bh(&table->tb6_lock);
1232
1233         err = fib6_del(rt, info);
1234         dst_release(&rt->u.dst);
1235
1236         write_unlock_bh(&table->tb6_lock);
1237
1238         return err;
1239 }
1240
1241 int ip6_del_rt(struct rt6_info *rt)
1242 {
1243         return __ip6_del_rt(rt, NULL);
1244 }
1245
1246 static int ip6_route_del(struct fib6_config *cfg)
1247 {
1248         struct fib6_table *table;
1249         struct fib6_node *fn;
1250         struct rt6_info *rt;
1251         int err = -ESRCH;
1252
1253         table = fib6_get_table(cfg->fc_table);
1254         if (table == NULL)
1255                 return err;
1256
1257         read_lock_bh(&table->tb6_lock);
1258
1259         fn = fib6_locate(&table->tb6_root,
1260                          &cfg->fc_dst, cfg->fc_dst_len,
1261                          &cfg->fc_src, cfg->fc_src_len);
1262         
1263         if (fn) {
1264                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1265                         if (cfg->fc_ifindex &&
1266                             (rt->rt6i_dev == NULL ||
1267                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1268                                 continue;
1269                         if (cfg->fc_flags & RTF_GATEWAY &&
1270                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1271                                 continue;
1272                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1273                                 continue;
1274                         dst_hold(&rt->u.dst);
1275                         read_unlock_bh(&table->tb6_lock);
1276
1277                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1278                 }
1279         }
1280         read_unlock_bh(&table->tb6_lock);
1281
1282         return err;
1283 }
1284
1285 /*
1286  *      Handle redirects
1287  */
1288 struct ip6rd_flowi {
1289         struct flowi fl;
1290         struct in6_addr gateway;
1291 };
1292
1293 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1294                                              struct flowi *fl,
1295                                              int flags)
1296 {
1297         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1298         struct rt6_info *rt;
1299         struct fib6_node *fn;
1300
1301         /*
1302          * Get the "current" route for this destination and
1303          * check if the redirect has come from approriate router.
1304          *
1305          * RFC 2461 specifies that redirects should only be
1306          * accepted if they come from the nexthop to the target.
1307          * Due to the way the routes are chosen, this notion
1308          * is a bit fuzzy and one might need to check all possible
1309          * routes.
1310          */
1311
1312         read_lock_bh(&table->tb6_lock);
1313         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1314 restart:
1315         for (rt = fn->leaf; rt; rt = rt->u.next) {
1316                 /*
1317                  * Current route is on-link; redirect is always invalid.
1318                  *
1319                  * Seems, previous statement is not true. It could
1320                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1321                  * But then router serving it might decide, that we should
1322                  * know truth 8)8) --ANK (980726).
1323                  */
1324                 if (rt6_check_expired(rt))
1325                         continue;
1326                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1327                         continue;
1328                 if (fl->oif != rt->rt6i_dev->ifindex)
1329                         continue;
1330                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1331                         continue;
1332                 break;
1333         }
1334
1335         if (!rt) {
1336                 if (rt6_need_strict(&fl->fl6_dst)) {
1337                         while ((fn = fn->parent) != NULL) {
1338                                 if (fn->fn_flags & RTN_ROOT)
1339                                         break;
1340                                 if (fn->fn_flags & RTN_RTINFO)
1341                                         goto restart;
1342                         }
1343                 }
1344                 rt = &ip6_null_entry;
1345         }
1346         dst_hold(&rt->u.dst);
1347
1348         read_unlock_bh(&table->tb6_lock);
1349
1350         return rt;
1351 };
1352
1353 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1354                                            struct in6_addr *src,
1355                                            struct in6_addr *gateway,
1356                                            struct net_device *dev)
1357 {
1358         struct ip6rd_flowi rdfl = {
1359                 .fl = {
1360                         .oif = dev->ifindex,
1361                         .nl_u = {
1362                                 .ip6_u = {
1363                                         .daddr = *dest,
1364                                         .saddr = *src,
1365                                 },
1366                         },
1367                 },
1368                 .gateway = *gateway,
1369         };
1370         int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1371
1372         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1373 }
1374
1375 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1376                   struct in6_addr *saddr,
1377                   struct neighbour *neigh, u8 *lladdr, int on_link)
1378 {
1379         struct rt6_info *rt, *nrt = NULL;
1380         struct netevent_redirect netevent;
1381
1382         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1383
1384         if (rt == &ip6_null_entry) {
1385                 if (net_ratelimit())
1386                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1387                                "for redirect target\n");
1388                 goto out;
1389         }
1390
1391         /*
1392          *      We have finally decided to accept it.
1393          */
1394
1395         neigh_update(neigh, lladdr, NUD_STALE, 
1396                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1397                      NEIGH_UPDATE_F_OVERRIDE|
1398                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1399                                      NEIGH_UPDATE_F_ISROUTER))
1400                      );
1401
1402         /*
1403          * Redirect received -> path was valid.
1404          * Look, redirects are sent only in response to data packets,
1405          * so that this nexthop apparently is reachable. --ANK
1406          */
1407         dst_confirm(&rt->u.dst);
1408
1409         /* Duplicate redirect: silently ignore. */
1410         if (neigh == rt->u.dst.neighbour)
1411                 goto out;
1412
1413         nrt = ip6_rt_copy(rt);
1414         if (nrt == NULL)
1415                 goto out;
1416
1417         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1418         if (on_link)
1419                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1420
1421         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1422         nrt->rt6i_dst.plen = 128;
1423         nrt->u.dst.flags |= DST_HOST;
1424
1425         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1426         nrt->rt6i_nexthop = neigh_clone(neigh);
1427         /* Reset pmtu, it may be better */
1428         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1429         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1430
1431         if (ip6_ins_rt(nrt))
1432                 goto out;
1433
1434         netevent.old = &rt->u.dst;
1435         netevent.new = &nrt->u.dst;
1436         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1437
1438         if (rt->rt6i_flags&RTF_CACHE) {
1439                 ip6_del_rt(rt);
1440                 return;
1441         }
1442
1443 out:
1444         dst_release(&rt->u.dst);
1445         return;
1446 }
1447
1448 /*
1449  *      Handle ICMP "packet too big" messages
1450  *      i.e. Path MTU discovery
1451  */
1452
1453 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1454                         struct net_device *dev, u32 pmtu)
1455 {
1456         struct rt6_info *rt, *nrt;
1457         int allfrag = 0;
1458
1459         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1460         if (rt == NULL)
1461                 return;
1462
1463         if (pmtu >= dst_mtu(&rt->u.dst))
1464                 goto out;
1465
1466         if (pmtu < IPV6_MIN_MTU) {
1467                 /*
1468                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1469                  * MTU (1280) and a fragment header should always be included
1470                  * after a node receiving Too Big message reporting PMTU is
1471                  * less than the IPv6 Minimum Link MTU.
1472                  */
1473                 pmtu = IPV6_MIN_MTU;
1474                 allfrag = 1;
1475         }
1476
1477         /* New mtu received -> path was valid.
1478            They are sent only in response to data packets,
1479            so that this nexthop apparently is reachable. --ANK
1480          */
1481         dst_confirm(&rt->u.dst);
1482
1483         /* Host route. If it is static, it would be better
1484            not to override it, but add new one, so that
1485            when cache entry will expire old pmtu
1486            would return automatically.
1487          */
1488         if (rt->rt6i_flags & RTF_CACHE) {
1489                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1490                 if (allfrag)
1491                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1492                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1493                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1494                 goto out;
1495         }
1496
1497         /* Network route.
1498            Two cases are possible:
1499            1. It is connected route. Action: COW
1500            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1501          */
1502         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1503                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1504         else
1505                 nrt = rt6_alloc_clone(rt, daddr);
1506
1507         if (nrt) {
1508                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1509                 if (allfrag)
1510                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1511
1512                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1513                  * happened within 5 mins, the recommended timer is 10 mins.
1514                  * Here this route expiration time is set to ip6_rt_mtu_expires
1515                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1516                  * and detecting PMTU increase will be automatically happened.
1517                  */
1518                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1519                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1520
1521                 ip6_ins_rt(nrt);
1522         }
1523 out:
1524         dst_release(&rt->u.dst);
1525 }
1526
1527 /*
1528  *      Misc support functions
1529  */
1530
1531 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1532 {
1533         struct rt6_info *rt = ip6_dst_alloc();
1534
1535         if (rt) {
1536                 rt->u.dst.input = ort->u.dst.input;
1537                 rt->u.dst.output = ort->u.dst.output;
1538
1539                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1540                 rt->u.dst.dev = ort->u.dst.dev;
1541                 if (rt->u.dst.dev)
1542                         dev_hold(rt->u.dst.dev);
1543                 rt->rt6i_idev = ort->rt6i_idev;
1544                 if (rt->rt6i_idev)
1545                         in6_dev_hold(rt->rt6i_idev);
1546                 rt->u.dst.lastuse = jiffies;
1547                 rt->rt6i_expires = 0;
1548
1549                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1550                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1551                 rt->rt6i_metric = 0;
1552
1553                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1554 #ifdef CONFIG_IPV6_SUBTREES
1555                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1556 #endif
1557                 rt->rt6i_table = ort->rt6i_table;
1558         }
1559         return rt;
1560 }
1561
1562 #ifdef CONFIG_IPV6_ROUTE_INFO
1563 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1564                                            struct in6_addr *gwaddr, int ifindex)
1565 {
1566         struct fib6_node *fn;
1567         struct rt6_info *rt = NULL;
1568         struct fib6_table *table;
1569
1570         table = fib6_get_table(RT6_TABLE_INFO);
1571         if (table == NULL)
1572                 return NULL;
1573
1574         write_lock_bh(&table->tb6_lock);
1575         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1576         if (!fn)
1577                 goto out;
1578
1579         for (rt = fn->leaf; rt; rt = rt->u.next) {
1580                 if (rt->rt6i_dev->ifindex != ifindex)
1581                         continue;
1582                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1583                         continue;
1584                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1585                         continue;
1586                 dst_hold(&rt->u.dst);
1587                 break;
1588         }
1589 out:
1590         write_unlock_bh(&table->tb6_lock);
1591         return rt;
1592 }
1593
1594 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1595                                            struct in6_addr *gwaddr, int ifindex,
1596                                            unsigned pref)
1597 {
1598         struct fib6_config cfg = {
1599                 .fc_table       = RT6_TABLE_INFO,
1600                 .fc_metric      = 1024,
1601                 .fc_ifindex     = ifindex,
1602                 .fc_dst_len     = prefixlen,
1603                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1604                                   RTF_UP | RTF_PREF(pref),
1605         };
1606
1607         ipv6_addr_copy(&cfg.fc_dst, prefix);
1608         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1609
1610         /* We should treat it as a default route if prefix length is 0. */
1611         if (!prefixlen)
1612                 cfg.fc_flags |= RTF_DEFAULT;
1613
1614         ip6_route_add(&cfg);
1615
1616         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1617 }
1618 #endif
1619
1620 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1621 {       
1622         struct rt6_info *rt;
1623         struct fib6_table *table;
1624
1625         table = fib6_get_table(RT6_TABLE_DFLT);
1626         if (table == NULL)
1627                 return NULL;
1628
1629         write_lock_bh(&table->tb6_lock);
1630         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1631                 if (dev == rt->rt6i_dev &&
1632                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1633                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1634                         break;
1635         }
1636         if (rt)
1637                 dst_hold(&rt->u.dst);
1638         write_unlock_bh(&table->tb6_lock);
1639         return rt;
1640 }
1641
1642 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1643                                      struct net_device *dev,
1644                                      unsigned int pref)
1645 {
1646         struct fib6_config cfg = {
1647                 .fc_table       = RT6_TABLE_DFLT,
1648                 .fc_metric      = 1024,
1649                 .fc_ifindex     = dev->ifindex,
1650                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1651                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1652         };
1653
1654         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1655
1656         ip6_route_add(&cfg);
1657
1658         return rt6_get_dflt_router(gwaddr, dev);
1659 }
1660
1661 void rt6_purge_dflt_routers(void)
1662 {
1663         struct rt6_info *rt;
1664         struct fib6_table *table;
1665
1666         /* NOTE: Keep consistent with rt6_get_dflt_router */
1667         table = fib6_get_table(RT6_TABLE_DFLT);
1668         if (table == NULL)
1669                 return;
1670
1671 restart:
1672         read_lock_bh(&table->tb6_lock);
1673         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1674                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1675                         dst_hold(&rt->u.dst);
1676                         read_unlock_bh(&table->tb6_lock);
1677                         ip6_del_rt(rt);
1678                         goto restart;
1679                 }
1680         }
1681         read_unlock_bh(&table->tb6_lock);
1682 }
1683
1684 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1685                                  struct fib6_config *cfg)
1686 {
1687         memset(cfg, 0, sizeof(*cfg));
1688
1689         cfg->fc_table = RT6_TABLE_MAIN;
1690         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1691         cfg->fc_metric = rtmsg->rtmsg_metric;
1692         cfg->fc_expires = rtmsg->rtmsg_info;
1693         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1694         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1695         cfg->fc_flags = rtmsg->rtmsg_flags;
1696
1697         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1698         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1699         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1700 }
1701
1702 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1703 {
1704         struct fib6_config cfg;
1705         struct in6_rtmsg rtmsg;
1706         int err;
1707
1708         switch(cmd) {
1709         case SIOCADDRT:         /* Add a route */
1710         case SIOCDELRT:         /* Delete a route */
1711                 if (!capable(CAP_NET_ADMIN))
1712                         return -EPERM;
1713                 err = copy_from_user(&rtmsg, arg,
1714                                      sizeof(struct in6_rtmsg));
1715                 if (err)
1716                         return -EFAULT;
1717
1718                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1719
1720                 rtnl_lock();
1721                 switch (cmd) {
1722                 case SIOCADDRT:
1723                         err = ip6_route_add(&cfg);
1724                         break;
1725                 case SIOCDELRT:
1726                         err = ip6_route_del(&cfg);
1727                         break;
1728                 default:
1729                         err = -EINVAL;
1730                 }
1731                 rtnl_unlock();
1732
1733                 return err;
1734         };
1735
1736         return -EINVAL;
1737 }
1738
1739 /*
1740  *      Drop the packet on the floor
1741  */
1742
1743 static int ip6_pkt_discard(struct sk_buff *skb)
1744 {
1745         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1746         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1747                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1748
1749         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1750         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1751         kfree_skb(skb);
1752         return 0;
1753 }
1754
1755 static int ip6_pkt_discard_out(struct sk_buff *skb)
1756 {
1757         skb->dev = skb->dst->dev;
1758         return ip6_pkt_discard(skb);
1759 }
1760
1761 /*
1762  *      Allocate a dst for local (unicast / anycast) address.
1763  */
1764
1765 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1766                                     const struct in6_addr *addr,
1767                                     int anycast)
1768 {
1769         struct rt6_info *rt = ip6_dst_alloc();
1770
1771         if (rt == NULL)
1772                 return ERR_PTR(-ENOMEM);
1773
1774         dev_hold(&loopback_dev);
1775         in6_dev_hold(idev);
1776
1777         rt->u.dst.flags = DST_HOST;
1778         rt->u.dst.input = ip6_input;
1779         rt->u.dst.output = ip6_output;
1780         rt->rt6i_dev = &loopback_dev;
1781         rt->rt6i_idev = idev;
1782         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1783         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1784         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1785         rt->u.dst.obsolete = -1;
1786
1787         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1788         if (anycast)
1789                 rt->rt6i_flags |= RTF_ANYCAST;
1790         else
1791                 rt->rt6i_flags |= RTF_LOCAL;
1792         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1793         if (rt->rt6i_nexthop == NULL) {
1794                 dst_free((struct dst_entry *) rt);
1795                 return ERR_PTR(-ENOMEM);
1796         }
1797
1798         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1799         rt->rt6i_dst.plen = 128;
1800         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1801
1802         atomic_set(&rt->u.dst.__refcnt, 1);
1803
1804         return rt;
1805 }
1806
1807 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1808 {
1809         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1810             rt != &ip6_null_entry) {
1811                 RT6_TRACE("deleted by ifdown %p\n", rt);
1812                 return -1;
1813         }
1814         return 0;
1815 }
1816
1817 void rt6_ifdown(struct net_device *dev)
1818 {
1819         fib6_clean_all(fib6_ifdown, 0, dev);
1820 }
1821
1822 struct rt6_mtu_change_arg
1823 {
1824         struct net_device *dev;
1825         unsigned mtu;
1826 };
1827
1828 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1829 {
1830         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1831         struct inet6_dev *idev;
1832
1833         /* In IPv6 pmtu discovery is not optional,
1834            so that RTAX_MTU lock cannot disable it.
1835            We still use this lock to block changes
1836            caused by addrconf/ndisc.
1837         */
1838
1839         idev = __in6_dev_get(arg->dev);
1840         if (idev == NULL)
1841                 return 0;
1842
1843         /* For administrative MTU increase, there is no way to discover
1844            IPv6 PMTU increase, so PMTU increase should be updated here.
1845            Since RFC 1981 doesn't include administrative MTU increase
1846            update PMTU increase is a MUST. (i.e. jumbo frame)
1847          */
1848         /*
1849            If new MTU is less than route PMTU, this new MTU will be the
1850            lowest MTU in the path, update the route PMTU to reflect PMTU
1851            decreases; if new MTU is greater than route PMTU, and the
1852            old MTU is the lowest MTU in the path, update the route PMTU
1853            to reflect the increase. In this case if the other nodes' MTU
1854            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1855            PMTU discouvery.
1856          */
1857         if (rt->rt6i_dev == arg->dev &&
1858             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1859             (dst_mtu(&rt->u.dst) > arg->mtu ||
1860              (dst_mtu(&rt->u.dst) < arg->mtu &&
1861               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1862                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1863         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1864         return 0;
1865 }
1866
1867 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1868 {
1869         struct rt6_mtu_change_arg arg = {
1870                 .dev = dev,
1871                 .mtu = mtu,
1872         };
1873
1874         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1875 }
1876
1877 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1878         [RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
1879         [RTA_OIF]               = { .type = NLA_U32 },
1880         [RTA_IIF]               = { .type = NLA_U32 },
1881         [RTA_PRIORITY]          = { .type = NLA_U32 },
1882         [RTA_METRICS]           = { .type = NLA_NESTED },
1883 };
1884
1885 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1886                               struct fib6_config *cfg)
1887 {
1888         struct rtmsg *rtm;
1889         struct nlattr *tb[RTA_MAX+1];
1890         int err;
1891
1892         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1893         if (err < 0)
1894                 goto errout;
1895
1896         err = -EINVAL;
1897         rtm = nlmsg_data(nlh);
1898         memset(cfg, 0, sizeof(*cfg));
1899
1900         cfg->fc_table = rtm->rtm_table;
1901         cfg->fc_dst_len = rtm->rtm_dst_len;
1902         cfg->fc_src_len = rtm->rtm_src_len;
1903         cfg->fc_flags = RTF_UP;
1904         cfg->fc_protocol = rtm->rtm_protocol;
1905
1906         if (rtm->rtm_type == RTN_UNREACHABLE)
1907                 cfg->fc_flags |= RTF_REJECT;
1908
1909         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1910         cfg->fc_nlinfo.nlh = nlh;
1911
1912         if (tb[RTA_GATEWAY]) {
1913                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1914                 cfg->fc_flags |= RTF_GATEWAY;
1915         }
1916
1917         if (tb[RTA_DST]) {
1918                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1919
1920                 if (nla_len(tb[RTA_DST]) < plen)
1921                         goto errout;
1922
1923                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1924         }
1925
1926         if (tb[RTA_SRC]) {
1927                 int plen = (rtm->rtm_src_len + 7) >> 3;
1928
1929                 if (nla_len(tb[RTA_SRC]) < plen)
1930                         goto errout;
1931
1932                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1933         }
1934
1935         if (tb[RTA_OIF])
1936                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1937
1938         if (tb[RTA_PRIORITY])
1939                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1940
1941         if (tb[RTA_METRICS]) {
1942                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1943                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1944         }
1945
1946         if (tb[RTA_TABLE])
1947                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1948
1949         err = 0;
1950 errout:
1951         return err;
1952 }
1953
1954 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1955 {
1956         struct fib6_config cfg;
1957         int err;
1958
1959         err = rtm_to_fib6_config(skb, nlh, &cfg);
1960         if (err < 0)
1961                 return err;
1962
1963         return ip6_route_del(&cfg);
1964 }
1965
1966 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1967 {
1968         struct fib6_config cfg;
1969         int err;
1970
1971         err = rtm_to_fib6_config(skb, nlh, &cfg);
1972         if (err < 0)
1973                 return err;
1974
1975         return ip6_route_add(&cfg);
1976 }
1977
1978 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1979                          struct in6_addr *dst, struct in6_addr *src,
1980                          int iif, int type, u32 pid, u32 seq,
1981                          int prefix, unsigned int flags)
1982 {
1983         struct rtmsg *rtm;
1984         struct nlmsghdr *nlh;
1985         struct rta_cacheinfo ci;
1986         u32 table;
1987
1988         if (prefix) {   /* user wants prefix routes only */
1989                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1990                         /* success since this is not a prefix route */
1991                         return 1;
1992                 }
1993         }
1994
1995         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1996         if (nlh == NULL)
1997                 return -ENOBUFS;
1998
1999         rtm = nlmsg_data(nlh);
2000         rtm->rtm_family = AF_INET6;
2001         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2002         rtm->rtm_src_len = rt->rt6i_src.plen;
2003         rtm->rtm_tos = 0;
2004         if (rt->rt6i_table)
2005                 table = rt->rt6i_table->tb6_id;
2006         else
2007                 table = RT6_TABLE_UNSPEC;
2008         rtm->rtm_table = table;
2009         NLA_PUT_U32(skb, RTA_TABLE, table);
2010         if (rt->rt6i_flags&RTF_REJECT)
2011                 rtm->rtm_type = RTN_UNREACHABLE;
2012         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2013                 rtm->rtm_type = RTN_LOCAL;
2014         else
2015                 rtm->rtm_type = RTN_UNICAST;
2016         rtm->rtm_flags = 0;
2017         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2018         rtm->rtm_protocol = rt->rt6i_protocol;
2019         if (rt->rt6i_flags&RTF_DYNAMIC)
2020                 rtm->rtm_protocol = RTPROT_REDIRECT;
2021         else if (rt->rt6i_flags & RTF_ADDRCONF)
2022                 rtm->rtm_protocol = RTPROT_KERNEL;
2023         else if (rt->rt6i_flags&RTF_DEFAULT)
2024                 rtm->rtm_protocol = RTPROT_RA;
2025
2026         if (rt->rt6i_flags&RTF_CACHE)
2027                 rtm->rtm_flags |= RTM_F_CLONED;
2028
2029         if (dst) {
2030                 NLA_PUT(skb, RTA_DST, 16, dst);
2031                 rtm->rtm_dst_len = 128;
2032         } else if (rtm->rtm_dst_len)
2033                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2034 #ifdef CONFIG_IPV6_SUBTREES
2035         if (src) {
2036                 NLA_PUT(skb, RTA_SRC, 16, src);
2037                 rtm->rtm_src_len = 128;
2038         } else if (rtm->rtm_src_len)
2039                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2040 #endif
2041         if (iif)
2042                 NLA_PUT_U32(skb, RTA_IIF, iif);
2043         else if (dst) {
2044                 struct in6_addr saddr_buf;
2045                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2046                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2047         }
2048
2049         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2050                 goto nla_put_failure;
2051
2052         if (rt->u.dst.neighbour)
2053                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2054
2055         if (rt->u.dst.dev)
2056                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2057
2058         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2059         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2060         if (rt->rt6i_expires)
2061                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2062         else
2063                 ci.rta_expires = 0;
2064         ci.rta_used = rt->u.dst.__use;
2065         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2066         ci.rta_error = rt->u.dst.error;
2067         ci.rta_id = 0;
2068         ci.rta_ts = 0;
2069         ci.rta_tsage = 0;
2070         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2071
2072         return nlmsg_end(skb, nlh);
2073
2074 nla_put_failure:
2075         return nlmsg_cancel(skb, nlh);
2076 }
2077
2078 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2079 {
2080         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2081         int prefix;
2082
2083         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2084                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2085                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2086         } else
2087                 prefix = 0;
2088
2089         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2090                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2091                      prefix, NLM_F_MULTI);
2092 }
2093
2094 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2095 {
2096         struct nlattr *tb[RTA_MAX+1];
2097         struct rt6_info *rt;
2098         struct sk_buff *skb;
2099         struct rtmsg *rtm;
2100         struct flowi fl;
2101         int err, iif = 0;
2102
2103         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2104         if (err < 0)
2105                 goto errout;
2106
2107         err = -EINVAL;
2108         memset(&fl, 0, sizeof(fl));
2109
2110         if (tb[RTA_SRC]) {
2111                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2112                         goto errout;
2113
2114                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2115         }
2116
2117         if (tb[RTA_DST]) {
2118                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2119                         goto errout;
2120
2121                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2122         }
2123
2124         if (tb[RTA_IIF])
2125                 iif = nla_get_u32(tb[RTA_IIF]);
2126
2127         if (tb[RTA_OIF])
2128                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2129
2130         if (iif) {
2131                 struct net_device *dev;
2132                 dev = __dev_get_by_index(iif);
2133                 if (!dev) {
2134                         err = -ENODEV;
2135                         goto errout;
2136                 }
2137         }
2138
2139         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2140         if (skb == NULL) {
2141                 err = -ENOBUFS;
2142                 goto errout;
2143         }
2144
2145         /* Reserve room for dummy headers, this skb can pass
2146            through good chunk of routing engine.
2147          */
2148         skb->mac.raw = skb->data;
2149         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2150
2151         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2152         skb->dst = &rt->u.dst;
2153
2154         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2155                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2156                             nlh->nlmsg_seq, 0, 0);
2157         if (err < 0) {
2158                 kfree_skb(skb);
2159                 goto errout;
2160         }
2161
2162         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2163 errout:
2164         return err;
2165 }
2166
2167 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2168 {
2169         struct sk_buff *skb;
2170         u32 pid = 0, seq = 0;
2171         struct nlmsghdr *nlh = NULL;
2172         int payload = sizeof(struct rtmsg) + 256;
2173         int err = -ENOBUFS;
2174
2175         if (info) {
2176                 pid = info->pid;
2177                 nlh = info->nlh;
2178                 if (nlh)
2179                         seq = nlh->nlmsg_seq;
2180         }
2181
2182         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2183         if (skb == NULL)
2184                 goto errout;
2185
2186         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2187         if (err < 0) {
2188                 kfree_skb(skb);
2189                 goto errout;
2190         }
2191
2192         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2193 errout:
2194         if (err < 0)
2195                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2196 }
2197
2198 /*
2199  *      /proc
2200  */
2201
2202 #ifdef CONFIG_PROC_FS
2203
2204 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2205
2206 struct rt6_proc_arg
2207 {
2208         char *buffer;
2209         int offset;
2210         int length;
2211         int skip;
2212         int len;
2213 };
2214
2215 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2216 {
2217         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2218         int i;
2219
2220         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2221                 arg->skip++;
2222                 return 0;
2223         }
2224
2225         if (arg->len >= arg->length)
2226                 return 0;
2227
2228         for (i=0; i<16; i++) {
2229                 sprintf(arg->buffer + arg->len, "%02x",
2230                         rt->rt6i_dst.addr.s6_addr[i]);
2231                 arg->len += 2;
2232         }
2233         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2234                             rt->rt6i_dst.plen);
2235
2236 #ifdef CONFIG_IPV6_SUBTREES
2237         for (i=0; i<16; i++) {
2238                 sprintf(arg->buffer + arg->len, "%02x",
2239                         rt->rt6i_src.addr.s6_addr[i]);
2240                 arg->len += 2;
2241         }
2242         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2243                             rt->rt6i_src.plen);
2244 #else
2245         sprintf(arg->buffer + arg->len,
2246                 "00000000000000000000000000000000 00 ");
2247         arg->len += 36;
2248 #endif
2249
2250         if (rt->rt6i_nexthop) {
2251                 for (i=0; i<16; i++) {
2252                         sprintf(arg->buffer + arg->len, "%02x",
2253                                 rt->rt6i_nexthop->primary_key[i]);
2254                         arg->len += 2;
2255                 }
2256         } else {
2257                 sprintf(arg->buffer + arg->len,
2258                         "00000000000000000000000000000000");
2259                 arg->len += 32;
2260         }
2261         arg->len += sprintf(arg->buffer + arg->len,
2262                             " %08x %08x %08x %08x %8s\n",
2263                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2264                             rt->u.dst.__use, rt->rt6i_flags, 
2265                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2266         return 0;
2267 }
2268
2269 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2270 {
2271         struct rt6_proc_arg arg = {
2272                 .buffer = buffer,
2273                 .offset = offset,
2274                 .length = length,
2275         };
2276
2277         fib6_clean_all(rt6_info_route, 0, &arg);
2278
2279         *start = buffer;
2280         if (offset)
2281                 *start += offset % RT6_INFO_LEN;
2282
2283         arg.len -= offset % RT6_INFO_LEN;
2284
2285         if (arg.len > length)
2286                 arg.len = length;
2287         if (arg.len < 0)
2288                 arg.len = 0;
2289
2290         return arg.len;
2291 }
2292
2293 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2294 {
2295         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2296                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2297                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2298                       rt6_stats.fib_rt_cache,
2299                       atomic_read(&ip6_dst_ops.entries),
2300                       rt6_stats.fib_discarded_routes);
2301
2302         return 0;
2303 }
2304
2305 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2306 {
2307         return single_open(file, rt6_stats_seq_show, NULL);
2308 }
2309
2310 static struct file_operations rt6_stats_seq_fops = {
2311         .owner   = THIS_MODULE,
2312         .open    = rt6_stats_seq_open,
2313         .read    = seq_read,
2314         .llseek  = seq_lseek,
2315         .release = single_release,
2316 };
2317 #endif  /* CONFIG_PROC_FS */
2318
2319 #ifdef CONFIG_SYSCTL
2320
2321 static int flush_delay;
2322
2323 static
2324 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2325                               void __user *buffer, size_t *lenp, loff_t *ppos)
2326 {
2327         if (write) {
2328                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2329                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2330                 return 0;
2331         } else
2332                 return -EINVAL;
2333 }
2334
2335 ctl_table ipv6_route_table[] = {
2336         {
2337                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2338                 .procname       =       "flush",
2339                 .data           =       &flush_delay,
2340                 .maxlen         =       sizeof(int),
2341                 .mode           =       0200,
2342                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2343         },
2344         {
2345                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2346                 .procname       =       "gc_thresh",
2347                 .data           =       &ip6_dst_ops.gc_thresh,
2348                 .maxlen         =       sizeof(int),
2349                 .mode           =       0644,
2350                 .proc_handler   =       &proc_dointvec,
2351         },
2352         {
2353                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2354                 .procname       =       "max_size",
2355                 .data           =       &ip6_rt_max_size,
2356                 .maxlen         =       sizeof(int),
2357                 .mode           =       0644,
2358                 .proc_handler   =       &proc_dointvec,
2359         },
2360         {
2361                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2362                 .procname       =       "gc_min_interval",
2363                 .data           =       &ip6_rt_gc_min_interval,
2364                 .maxlen         =       sizeof(int),
2365                 .mode           =       0644,
2366                 .proc_handler   =       &proc_dointvec_jiffies,
2367                 .strategy       =       &sysctl_jiffies,
2368         },
2369         {
2370                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2371                 .procname       =       "gc_timeout",
2372                 .data           =       &ip6_rt_gc_timeout,
2373                 .maxlen         =       sizeof(int),
2374                 .mode           =       0644,
2375                 .proc_handler   =       &proc_dointvec_jiffies,
2376                 .strategy       =       &sysctl_jiffies,
2377         },
2378         {
2379                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2380                 .procname       =       "gc_interval",
2381                 .data           =       &ip6_rt_gc_interval,
2382                 .maxlen         =       sizeof(int),
2383                 .mode           =       0644,
2384                 .proc_handler   =       &proc_dointvec_jiffies,
2385                 .strategy       =       &sysctl_jiffies,
2386         },
2387         {
2388                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2389                 .procname       =       "gc_elasticity",
2390                 .data           =       &ip6_rt_gc_elasticity,
2391                 .maxlen         =       sizeof(int),
2392                 .mode           =       0644,
2393                 .proc_handler   =       &proc_dointvec_jiffies,
2394                 .strategy       =       &sysctl_jiffies,
2395         },
2396         {
2397                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2398                 .procname       =       "mtu_expires",
2399                 .data           =       &ip6_rt_mtu_expires,
2400                 .maxlen         =       sizeof(int),
2401                 .mode           =       0644,
2402                 .proc_handler   =       &proc_dointvec_jiffies,
2403                 .strategy       =       &sysctl_jiffies,
2404         },
2405         {
2406                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2407                 .procname       =       "min_adv_mss",
2408                 .data           =       &ip6_rt_min_advmss,
2409                 .maxlen         =       sizeof(int),
2410                 .mode           =       0644,
2411                 .proc_handler   =       &proc_dointvec_jiffies,
2412                 .strategy       =       &sysctl_jiffies,
2413         },
2414         {
2415                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2416                 .procname       =       "gc_min_interval_ms",
2417                 .data           =       &ip6_rt_gc_min_interval,
2418                 .maxlen         =       sizeof(int),
2419                 .mode           =       0644,
2420                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2421                 .strategy       =       &sysctl_ms_jiffies,
2422         },
2423         { .ctl_name = 0 }
2424 };
2425
2426 #endif
2427
2428 void __init ip6_route_init(void)
2429 {
2430         struct proc_dir_entry *p;
2431
2432         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2433                                                      sizeof(struct rt6_info),
2434                                                      0, SLAB_HWCACHE_ALIGN,
2435                                                      NULL, NULL);
2436         if (!ip6_dst_ops.kmem_cachep)
2437                 panic("cannot create ip6_dst_cache");
2438
2439         fib6_init();
2440 #ifdef  CONFIG_PROC_FS
2441         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2442         if (p)
2443                 p->owner = THIS_MODULE;
2444
2445         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2446 #endif
2447 #ifdef CONFIG_XFRM
2448         xfrm6_init();
2449 #endif
2450 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2451         fib6_rules_init();
2452 #endif
2453 }
2454
2455 void ip6_route_cleanup(void)
2456 {
2457 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2458         fib6_rules_cleanup();
2459 #endif
2460 #ifdef CONFIG_PROC_FS
2461         proc_net_remove("ipv6_route");
2462         proc_net_remove("rt6_stats");
2463 #endif
2464 #ifdef CONFIG_XFRM
2465         xfrm6_fini();
2466 #endif
2467         rt6_ifdown(NULL);
2468         fib6_gc_cleanup();
2469         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2470 }