]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/ipv6/route.c
[IPV6] NDISC: Search over all possible rules on receipt of redirect.
[mv-sheeva.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
39
40 #ifdef  CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #endif
44
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 struct rt6_info ip6_null_entry = {
124         .u = {
125                 .dst = {
126                         .__refcnt       = ATOMIC_INIT(1),
127                         .__use          = 1,
128                         .dev            = &loopback_dev,
129                         .obsolete       = -1,
130                         .error          = -ENETUNREACH,
131                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
132                         .input          = ip6_pkt_discard,
133                         .output         = ip6_pkt_discard_out,
134                         .ops            = &ip6_dst_ops,
135                         .path           = (struct dst_entry*)&ip6_null_entry,
136                 }
137         },
138         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
139         .rt6i_metric    = ~(u32) 0,
140         .rt6i_ref       = ATOMIC_INIT(1),
141 };
142
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145 struct rt6_info ip6_prohibit_entry = {
146         .u = {
147                 .dst = {
148                         .__refcnt       = ATOMIC_INIT(1),
149                         .__use          = 1,
150                         .dev            = &loopback_dev,
151                         .obsolete       = -1,
152                         .error          = -EACCES,
153                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
154                         .input          = ip6_pkt_discard,
155                         .output         = ip6_pkt_discard_out,
156                         .ops            = &ip6_dst_ops,
157                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
158                 }
159         },
160         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
161         .rt6i_metric    = ~(u32) 0,
162         .rt6i_ref       = ATOMIC_INIT(1),
163 };
164
165 struct rt6_info ip6_blk_hole_entry = {
166         .u = {
167                 .dst = {
168                         .__refcnt       = ATOMIC_INIT(1),
169                         .__use          = 1,
170                         .dev            = &loopback_dev,
171                         .obsolete       = -1,
172                         .error          = -EINVAL,
173                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
174                         .input          = ip6_pkt_discard,
175                         .output         = ip6_pkt_discard_out,
176                         .ops            = &ip6_dst_ops,
177                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
178                 }
179         },
180         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
181         .rt6i_metric    = ~(u32) 0,
182         .rt6i_ref       = ATOMIC_INIT(1),
183 };
184
185 #endif
186
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
189 {
190         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191 }
192
193 static void ip6_dst_destroy(struct dst_entry *dst)
194 {
195         struct rt6_info *rt = (struct rt6_info *)dst;
196         struct inet6_dev *idev = rt->rt6i_idev;
197
198         if (idev != NULL) {
199                 rt->rt6i_idev = NULL;
200                 in6_dev_put(idev);
201         }       
202 }
203
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205                            int how)
206 {
207         struct rt6_info *rt = (struct rt6_info *)dst;
208         struct inet6_dev *idev = rt->rt6i_idev;
209
210         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212                 if (loopback_idev != NULL) {
213                         rt->rt6i_idev = loopback_idev;
214                         in6_dev_put(idev);
215                 }
216         }
217 }
218
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 {
221         return (rt->rt6i_flags & RTF_EXPIRES &&
222                 time_after(jiffies, rt->rt6i_expires));
223 }
224
225 static inline int rt6_need_strict(struct in6_addr *daddr)
226 {
227         return (ipv6_addr_type(daddr) &
228                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229 }
230
231 /*
232  *      Route lookup. Any table->tb6_lock is implied.
233  */
234
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236                                                     int oif,
237                                                     int strict)
238 {
239         struct rt6_info *local = NULL;
240         struct rt6_info *sprt;
241
242         if (oif) {
243                 for (sprt = rt; sprt; sprt = sprt->u.next) {
244                         struct net_device *dev = sprt->rt6i_dev;
245                         if (dev->ifindex == oif)
246                                 return sprt;
247                         if (dev->flags & IFF_LOOPBACK) {
248                                 if (sprt->rt6i_idev == NULL ||
249                                     sprt->rt6i_idev->dev->ifindex != oif) {
250                                         if (strict && oif)
251                                                 continue;
252                                         if (local && (!oif || 
253                                                       local->rt6i_idev->dev->ifindex == oif))
254                                                 continue;
255                                 }
256                                 local = sprt;
257                         }
258                 }
259
260                 if (local)
261                         return local;
262
263                 if (strict)
264                         return &ip6_null_entry;
265         }
266         return rt;
267 }
268
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
271 {
272         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273         /*
274          * Okay, this does not seem to be appropriate
275          * for now, however, we need to check if it
276          * is really so; aka Router Reachability Probing.
277          *
278          * Router Reachability Probe MUST be rate-limited
279          * to no more than one per minute.
280          */
281         if (!neigh || (neigh->nud_state & NUD_VALID))
282                 return;
283         read_lock_bh(&neigh->lock);
284         if (!(neigh->nud_state & NUD_VALID) &&
285             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286                 struct in6_addr mcaddr;
287                 struct in6_addr *target;
288
289                 neigh->updated = jiffies;
290                 read_unlock_bh(&neigh->lock);
291
292                 target = (struct in6_addr *)&neigh->primary_key;
293                 addrconf_addr_solict_mult(target, &mcaddr);
294                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295         } else
296                 read_unlock_bh(&neigh->lock);
297 }
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
300 {
301         return;
302 }
303 #endif
304
305 /*
306  * Default Router Selection (RFC 2461 6.3.6)
307  */
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309 {
310         struct net_device *dev = rt->rt6i_dev;
311         if (!oif || dev->ifindex == oif)
312                 return 2;
313         if ((dev->flags & IFF_LOOPBACK) &&
314             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315                 return 1;
316         return 0;
317 }
318
319 static int inline rt6_check_neigh(struct rt6_info *rt)
320 {
321         struct neighbour *neigh = rt->rt6i_nexthop;
322         int m = 0;
323         if (rt->rt6i_flags & RTF_NONEXTHOP ||
324             !(rt->rt6i_flags & RTF_GATEWAY))
325                 m = 1;
326         else if (neigh) {
327                 read_lock_bh(&neigh->lock);
328                 if (neigh->nud_state & NUD_VALID)
329                         m = 2;
330                 read_unlock_bh(&neigh->lock);
331         }
332         return m;
333 }
334
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336                            int strict)
337 {
338         int m, n;
339                 
340         m = rt6_check_dev(rt, oif);
341         if (!m && (strict & RT6_SELECT_F_IFACE))
342                 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346         n = rt6_check_neigh(rt);
347         if (n > 1)
348                 m |= 16;
349         else if (!n && strict & RT6_SELECT_F_REACHABLE)
350                 return -1;
351         return m;
352 }
353
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355                                    int strict)
356 {
357         struct rt6_info *match = NULL, *last = NULL;
358         struct rt6_info *rt, *rt0 = *head;
359         u32 metric;
360         int mpri = -1;
361
362         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363                   __FUNCTION__, head, head ? *head : NULL, oif);
364
365         for (rt = rt0, metric = rt0->rt6i_metric;
366              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367              rt = rt->u.next) {
368                 int m;
369
370                 if (rt6_check_expired(rt))
371                         continue;
372
373                 last = rt;
374
375                 m = rt6_score_route(rt, oif, strict);
376                 if (m < 0)
377                         continue;
378
379                 if (m > mpri) {
380                         rt6_probe(match);
381                         match = rt;
382                         mpri = m;
383                 } else {
384                         rt6_probe(rt);
385                 }
386         }
387
388         if (!match &&
389             (strict & RT6_SELECT_F_REACHABLE) &&
390             last && last != rt0) {
391                 /* no entries matched; do round-robin */
392                 static DEFINE_SPINLOCK(lock);
393                 spin_lock(&lock);
394                 *head = rt0->u.next;
395                 rt0->u.next = last->u.next;
396                 last->u.next = rt0;
397                 spin_unlock(&lock);
398         }
399
400         RT6_TRACE("%s() => %p, score=%d\n",
401                   __FUNCTION__, match, mpri);
402
403         return (match ? match : &ip6_null_entry);
404 }
405
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408                   struct in6_addr *gwaddr)
409 {
410         struct route_info *rinfo = (struct route_info *) opt;
411         struct in6_addr prefix_buf, *prefix;
412         unsigned int pref;
413         u32 lifetime;
414         struct rt6_info *rt;
415
416         if (len < sizeof(struct route_info)) {
417                 return -EINVAL;
418         }
419
420         /* Sanity check for prefix_len and length */
421         if (rinfo->length > 3) {
422                 return -EINVAL;
423         } else if (rinfo->prefix_len > 128) {
424                 return -EINVAL;
425         } else if (rinfo->prefix_len > 64) {
426                 if (rinfo->length < 2) {
427                         return -EINVAL;
428                 }
429         } else if (rinfo->prefix_len > 0) {
430                 if (rinfo->length < 1) {
431                         return -EINVAL;
432                 }
433         }
434
435         pref = rinfo->route_pref;
436         if (pref == ICMPV6_ROUTER_PREF_INVALID)
437                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439         lifetime = htonl(rinfo->lifetime);
440         if (lifetime == 0xffffffff) {
441                 /* infinity */
442         } else if (lifetime > 0x7fffffff/HZ) {
443                 /* Avoid arithmetic overflow */
444                 lifetime = 0x7fffffff/HZ - 1;
445         }
446
447         if (rinfo->length == 3)
448                 prefix = (struct in6_addr *)rinfo->prefix;
449         else {
450                 /* this function is safe */
451                 ipv6_addr_prefix(&prefix_buf,
452                                  (struct in6_addr *)rinfo->prefix,
453                                  rinfo->prefix_len);
454                 prefix = &prefix_buf;
455         }
456
457         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459         if (rt && !lifetime) {
460                 ip6_del_rt(rt);
461                 rt = NULL;
462         }
463
464         if (!rt && lifetime)
465                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466                                         pref);
467         else if (rt)
468                 rt->rt6i_flags = RTF_ROUTEINFO |
469                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471         if (rt) {
472                 if (lifetime == 0xffffffff) {
473                         rt->rt6i_flags &= ~RTF_EXPIRES;
474                 } else {
475                         rt->rt6i_expires = jiffies + HZ * lifetime;
476                         rt->rt6i_flags |= RTF_EXPIRES;
477                 }
478                 dst_release(&rt->u.dst);
479         }
480         return 0;
481 }
482 #endif
483
484 #define BACKTRACK() \
485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486         while ((fn = fn->parent) != NULL) { \
487                 if (fn->fn_flags & RTN_TL_ROOT) { \
488                         dst_hold(&rt->u.dst); \
489                         goto out; \
490                 } \
491                 if (fn->fn_flags & RTN_RTINFO) \
492                         goto restart; \
493         } \
494 }
495
496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497                                              struct flowi *fl, int flags)
498 {
499         struct fib6_node *fn;
500         struct rt6_info *rt;
501
502         read_lock_bh(&table->tb6_lock);
503         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504 restart:
505         rt = fn->leaf;
506         rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507         BACKTRACK();
508         dst_hold(&rt->u.dst);
509 out:
510         read_unlock_bh(&table->tb6_lock);
511
512         rt->u.dst.lastuse = jiffies;
513         rt->u.dst.__use++;
514
515         return rt;
516
517 }
518
519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520                             int oif, int strict)
521 {
522         struct flowi fl = {
523                 .oif = oif,
524                 .nl_u = {
525                         .ip6_u = {
526                                 .daddr = *daddr,
527                                 /* TODO: saddr */
528                         },
529                 },
530         };
531         struct dst_entry *dst;
532         int flags = strict ? RT6_F_STRICT : 0;
533
534         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535         if (dst->error == 0)
536                 return (struct rt6_info *) dst;
537
538         dst_release(dst);
539
540         return NULL;
541 }
542
543 /* ip6_ins_rt is called with FREE table->tb6_lock.
544    It takes new route entry, the addition fails by any reason the
545    route is freed. In any case, if caller does not hold it, it may
546    be destroyed.
547  */
548
549 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
550 {
551         int err;
552         struct fib6_table *table;
553
554         table = rt->rt6i_table;
555         write_lock_bh(&table->tb6_lock);
556         err = fib6_add(&table->tb6_root, rt, info);
557         write_unlock_bh(&table->tb6_lock);
558
559         return err;
560 }
561
562 int ip6_ins_rt(struct rt6_info *rt)
563 {
564         return __ip6_ins_rt(rt, NULL);
565 }
566
567 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
568                                       struct in6_addr *saddr)
569 {
570         struct rt6_info *rt;
571
572         /*
573          *      Clone the route.
574          */
575
576         rt = ip6_rt_copy(ort);
577
578         if (rt) {
579                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
580                         if (rt->rt6i_dst.plen != 128 &&
581                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
582                                 rt->rt6i_flags |= RTF_ANYCAST;
583                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
584                 }
585
586                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
587                 rt->rt6i_dst.plen = 128;
588                 rt->rt6i_flags |= RTF_CACHE;
589                 rt->u.dst.flags |= DST_HOST;
590
591 #ifdef CONFIG_IPV6_SUBTREES
592                 if (rt->rt6i_src.plen && saddr) {
593                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
594                         rt->rt6i_src.plen = 128;
595                 }
596 #endif
597
598                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
599
600         }
601
602         return rt;
603 }
604
605 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
606 {
607         struct rt6_info *rt = ip6_rt_copy(ort);
608         if (rt) {
609                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610                 rt->rt6i_dst.plen = 128;
611                 rt->rt6i_flags |= RTF_CACHE;
612                 if (rt->rt6i_flags & RTF_REJECT)
613                         rt->u.dst.error = ort->u.dst.error;
614                 rt->u.dst.flags |= DST_HOST;
615                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
616         }
617         return rt;
618 }
619
620 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
621                                             struct flowi *fl, int flags)
622 {
623         struct fib6_node *fn;
624         struct rt6_info *rt, *nrt;
625         int strict = 0;
626         int attempts = 3;
627         int err;
628         int reachable = RT6_SELECT_F_REACHABLE;
629
630         if (flags & RT6_F_STRICT)
631                 strict = RT6_SELECT_F_IFACE;
632
633 relookup:
634         read_lock_bh(&table->tb6_lock);
635
636 restart_2:
637         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
638
639 restart:
640         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
641         BACKTRACK();
642         if (rt == &ip6_null_entry ||
643             rt->rt6i_flags & RTF_CACHE)
644                 goto out;
645
646         dst_hold(&rt->u.dst);
647         read_unlock_bh(&table->tb6_lock);
648
649         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
650                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
651         else {
652 #if CLONE_OFFLINK_ROUTE
653                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
654 #else
655                 goto out2;
656 #endif
657         }
658
659         dst_release(&rt->u.dst);
660         rt = nrt ? : &ip6_null_entry;
661
662         dst_hold(&rt->u.dst);
663         if (nrt) {
664                 err = ip6_ins_rt(nrt);
665                 if (!err)
666                         goto out2;
667         }
668
669         if (--attempts <= 0)
670                 goto out2;
671
672         /*
673          * Race condition! In the gap, when table->tb6_lock was
674          * released someone could insert this route.  Relookup.
675          */
676         dst_release(&rt->u.dst);
677         goto relookup;
678
679 out:
680         if (reachable) {
681                 reachable = 0;
682                 goto restart_2;
683         }
684         dst_hold(&rt->u.dst);
685         read_unlock_bh(&table->tb6_lock);
686 out2:
687         rt->u.dst.lastuse = jiffies;
688         rt->u.dst.__use++;
689
690         return rt;
691 }
692
693 void ip6_route_input(struct sk_buff *skb)
694 {
695         struct ipv6hdr *iph = skb->nh.ipv6h;
696         struct flowi fl = {
697                 .iif = skb->dev->ifindex,
698                 .nl_u = {
699                         .ip6_u = {
700                                 .daddr = iph->daddr,
701                                 .saddr = iph->saddr,
702                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
703                         },
704                 },
705                 .proto = iph->nexthdr,
706         };
707         int flags = 0;
708
709         if (rt6_need_strict(&iph->daddr))
710                 flags |= RT6_F_STRICT;
711
712         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
713 }
714
715 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
716                                              struct flowi *fl, int flags)
717 {
718         struct fib6_node *fn;
719         struct rt6_info *rt, *nrt;
720         int strict = 0;
721         int attempts = 3;
722         int err;
723         int reachable = RT6_SELECT_F_REACHABLE;
724
725         if (flags & RT6_F_STRICT)
726                 strict = RT6_SELECT_F_IFACE;
727
728 relookup:
729         read_lock_bh(&table->tb6_lock);
730
731 restart_2:
732         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
733
734 restart:
735         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
736         BACKTRACK();
737         if (rt == &ip6_null_entry ||
738             rt->rt6i_flags & RTF_CACHE)
739                 goto out;
740
741         dst_hold(&rt->u.dst);
742         read_unlock_bh(&table->tb6_lock);
743
744         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
745                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
746         else {
747 #if CLONE_OFFLINK_ROUTE
748                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
749 #else
750                 goto out2;
751 #endif
752         }
753
754         dst_release(&rt->u.dst);
755         rt = nrt ? : &ip6_null_entry;
756
757         dst_hold(&rt->u.dst);
758         if (nrt) {
759                 err = ip6_ins_rt(nrt);
760                 if (!err)
761                         goto out2;
762         }
763
764         if (--attempts <= 0)
765                 goto out2;
766
767         /*
768          * Race condition! In the gap, when table->tb6_lock was
769          * released someone could insert this route.  Relookup.
770          */
771         dst_release(&rt->u.dst);
772         goto relookup;
773
774 out:
775         if (reachable) {
776                 reachable = 0;
777                 goto restart_2;
778         }
779         dst_hold(&rt->u.dst);
780         read_unlock_bh(&table->tb6_lock);
781 out2:
782         rt->u.dst.lastuse = jiffies;
783         rt->u.dst.__use++;
784         return rt;
785 }
786
787 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
788 {
789         int flags = 0;
790
791         if (rt6_need_strict(&fl->fl6_dst))
792                 flags |= RT6_F_STRICT;
793
794         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
795 }
796
797
798 /*
799  *      Destination cache support functions
800  */
801
802 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
803 {
804         struct rt6_info *rt;
805
806         rt = (struct rt6_info *) dst;
807
808         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
809                 return dst;
810
811         return NULL;
812 }
813
814 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
815 {
816         struct rt6_info *rt = (struct rt6_info *) dst;
817
818         if (rt) {
819                 if (rt->rt6i_flags & RTF_CACHE)
820                         ip6_del_rt(rt);
821                 else
822                         dst_release(dst);
823         }
824         return NULL;
825 }
826
827 static void ip6_link_failure(struct sk_buff *skb)
828 {
829         struct rt6_info *rt;
830
831         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
832
833         rt = (struct rt6_info *) skb->dst;
834         if (rt) {
835                 if (rt->rt6i_flags&RTF_CACHE) {
836                         dst_set_expires(&rt->u.dst, 0);
837                         rt->rt6i_flags |= RTF_EXPIRES;
838                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839                         rt->rt6i_node->fn_sernum = -1;
840         }
841 }
842
843 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
844 {
845         struct rt6_info *rt6 = (struct rt6_info*)dst;
846
847         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848                 rt6->rt6i_flags |= RTF_MODIFIED;
849                 if (mtu < IPV6_MIN_MTU) {
850                         mtu = IPV6_MIN_MTU;
851                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
852                 }
853                 dst->metrics[RTAX_MTU-1] = mtu;
854                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
855         }
856 }
857
858 static int ipv6_get_mtu(struct net_device *dev);
859
860 static inline unsigned int ipv6_advmss(unsigned int mtu)
861 {
862         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
863
864         if (mtu < ip6_rt_min_advmss)
865                 mtu = ip6_rt_min_advmss;
866
867         /*
868          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
869          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
870          * IPV6_MAXPLEN is also valid and means: "any MSS, 
871          * rely only on pmtu discovery"
872          */
873         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
874                 mtu = IPV6_MAXPLEN;
875         return mtu;
876 }
877
878 static struct dst_entry *ndisc_dst_gc_list;
879 static DEFINE_SPINLOCK(ndisc_lock);
880
881 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
882                                   struct neighbour *neigh,
883                                   struct in6_addr *addr,
884                                   int (*output)(struct sk_buff *))
885 {
886         struct rt6_info *rt;
887         struct inet6_dev *idev = in6_dev_get(dev);
888
889         if (unlikely(idev == NULL))
890                 return NULL;
891
892         rt = ip6_dst_alloc();
893         if (unlikely(rt == NULL)) {
894                 in6_dev_put(idev);
895                 goto out;
896         }
897
898         dev_hold(dev);
899         if (neigh)
900                 neigh_hold(neigh);
901         else
902                 neigh = ndisc_get_neigh(dev, addr);
903
904         rt->rt6i_dev      = dev;
905         rt->rt6i_idev     = idev;
906         rt->rt6i_nexthop  = neigh;
907         atomic_set(&rt->u.dst.__refcnt, 1);
908         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911         rt->u.dst.output  = output;
912
913 #if 0   /* there's no chance to use these for ndisc */
914         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
915                                 ? DST_HOST 
916                                 : 0;
917         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918         rt->rt6i_dst.plen = 128;
919 #endif
920
921         spin_lock_bh(&ndisc_lock);
922         rt->u.dst.next = ndisc_dst_gc_list;
923         ndisc_dst_gc_list = &rt->u.dst;
924         spin_unlock_bh(&ndisc_lock);
925
926         fib6_force_start_gc();
927
928 out:
929         return (struct dst_entry *)rt;
930 }
931
932 int ndisc_dst_gc(int *more)
933 {
934         struct dst_entry *dst, *next, **pprev;
935         int freed;
936
937         next = NULL;
938         freed = 0;
939
940         spin_lock_bh(&ndisc_lock);
941         pprev = &ndisc_dst_gc_list;
942
943         while ((dst = *pprev) != NULL) {
944                 if (!atomic_read(&dst->__refcnt)) {
945                         *pprev = dst->next;
946                         dst_free(dst);
947                         freed++;
948                 } else {
949                         pprev = &dst->next;
950                         (*more)++;
951                 }
952         }
953
954         spin_unlock_bh(&ndisc_lock);
955
956         return freed;
957 }
958
959 static int ip6_dst_gc(void)
960 {
961         static unsigned expire = 30*HZ;
962         static unsigned long last_gc;
963         unsigned long now = jiffies;
964
965         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
967                 goto out;
968
969         expire++;
970         fib6_run_gc(expire);
971         last_gc = now;
972         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973                 expire = ip6_rt_gc_timeout>>1;
974
975 out:
976         expire -= expire>>ip6_rt_gc_elasticity;
977         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
978 }
979
980 /* Clean host part of a prefix. Not necessary in radix tree,
981    but results in cleaner routing tables.
982
983    Remove it only when all the things will work!
984  */
985
986 static int ipv6_get_mtu(struct net_device *dev)
987 {
988         int mtu = IPV6_MIN_MTU;
989         struct inet6_dev *idev;
990
991         idev = in6_dev_get(dev);
992         if (idev) {
993                 mtu = idev->cnf.mtu6;
994                 in6_dev_put(idev);
995         }
996         return mtu;
997 }
998
999 int ipv6_get_hoplimit(struct net_device *dev)
1000 {
1001         int hoplimit = ipv6_devconf.hop_limit;
1002         struct inet6_dev *idev;
1003
1004         idev = in6_dev_get(dev);
1005         if (idev) {
1006                 hoplimit = idev->cnf.hop_limit;
1007                 in6_dev_put(idev);
1008         }
1009         return hoplimit;
1010 }
1011
1012 /*
1013  *
1014  */
1015
1016 int ip6_route_add(struct fib6_config *cfg)
1017 {
1018         int err;
1019         struct rt6_info *rt = NULL;
1020         struct net_device *dev = NULL;
1021         struct inet6_dev *idev = NULL;
1022         struct fib6_table *table;
1023         int addr_type;
1024
1025         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1026                 return -EINVAL;
1027 #ifndef CONFIG_IPV6_SUBTREES
1028         if (cfg->fc_src_len)
1029                 return -EINVAL;
1030 #endif
1031         if (cfg->fc_ifindex) {
1032                 err = -ENODEV;
1033                 dev = dev_get_by_index(cfg->fc_ifindex);
1034                 if (!dev)
1035                         goto out;
1036                 idev = in6_dev_get(dev);
1037                 if (!idev)
1038                         goto out;
1039         }
1040
1041         if (cfg->fc_metric == 0)
1042                 cfg->fc_metric = IP6_RT_PRIO_USER;
1043
1044         table = fib6_new_table(cfg->fc_table);
1045         if (table == NULL) {
1046                 err = -ENOBUFS;
1047                 goto out;
1048         }
1049
1050         rt = ip6_dst_alloc();
1051
1052         if (rt == NULL) {
1053                 err = -ENOMEM;
1054                 goto out;
1055         }
1056
1057         rt->u.dst.obsolete = -1;
1058         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1059
1060         if (cfg->fc_protocol == RTPROT_UNSPEC)
1061                 cfg->fc_protocol = RTPROT_BOOT;
1062         rt->rt6i_protocol = cfg->fc_protocol;
1063
1064         addr_type = ipv6_addr_type(&cfg->fc_dst);
1065
1066         if (addr_type & IPV6_ADDR_MULTICAST)
1067                 rt->u.dst.input = ip6_mc_input;
1068         else
1069                 rt->u.dst.input = ip6_forward;
1070
1071         rt->u.dst.output = ip6_output;
1072
1073         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074         rt->rt6i_dst.plen = cfg->fc_dst_len;
1075         if (rt->rt6i_dst.plen == 128)
1076                rt->u.dst.flags = DST_HOST;
1077
1078 #ifdef CONFIG_IPV6_SUBTREES
1079         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080         rt->rt6i_src.plen = cfg->fc_src_len;
1081 #endif
1082
1083         rt->rt6i_metric = cfg->fc_metric;
1084
1085         /* We cannot add true routes via loopback here,
1086            they would result in kernel looping; promote them to reject routes
1087          */
1088         if ((cfg->fc_flags & RTF_REJECT) ||
1089             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090                 /* hold loopback dev/idev if we haven't done so. */
1091                 if (dev != &loopback_dev) {
1092                         if (dev) {
1093                                 dev_put(dev);
1094                                 in6_dev_put(idev);
1095                         }
1096                         dev = &loopback_dev;
1097                         dev_hold(dev);
1098                         idev = in6_dev_get(dev);
1099                         if (!idev) {
1100                                 err = -ENODEV;
1101                                 goto out;
1102                         }
1103                 }
1104                 rt->u.dst.output = ip6_pkt_discard_out;
1105                 rt->u.dst.input = ip6_pkt_discard;
1106                 rt->u.dst.error = -ENETUNREACH;
1107                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1108                 goto install_route;
1109         }
1110
1111         if (cfg->fc_flags & RTF_GATEWAY) {
1112                 struct in6_addr *gw_addr;
1113                 int gwa_type;
1114
1115                 gw_addr = &cfg->fc_gateway;
1116                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1117                 gwa_type = ipv6_addr_type(gw_addr);
1118
1119                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120                         struct rt6_info *grt;
1121
1122                         /* IPv6 strictly inhibits using not link-local
1123                            addresses as nexthop address.
1124                            Otherwise, router will not able to send redirects.
1125                            It is very good, but in some (rare!) circumstances
1126                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1127                            some exceptions. --ANK
1128                          */
1129                         err = -EINVAL;
1130                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1131                                 goto out;
1132
1133                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1134
1135                         err = -EHOSTUNREACH;
1136                         if (grt == NULL)
1137                                 goto out;
1138                         if (dev) {
1139                                 if (dev != grt->rt6i_dev) {
1140                                         dst_release(&grt->u.dst);
1141                                         goto out;
1142                                 }
1143                         } else {
1144                                 dev = grt->rt6i_dev;
1145                                 idev = grt->rt6i_idev;
1146                                 dev_hold(dev);
1147                                 in6_dev_hold(grt->rt6i_idev);
1148                         }
1149                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1150                                 err = 0;
1151                         dst_release(&grt->u.dst);
1152
1153                         if (err)
1154                                 goto out;
1155                 }
1156                 err = -EINVAL;
1157                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1158                         goto out;
1159         }
1160
1161         err = -ENODEV;
1162         if (dev == NULL)
1163                 goto out;
1164
1165         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1166                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167                 if (IS_ERR(rt->rt6i_nexthop)) {
1168                         err = PTR_ERR(rt->rt6i_nexthop);
1169                         rt->rt6i_nexthop = NULL;
1170                         goto out;
1171                 }
1172         }
1173
1174         rt->rt6i_flags = cfg->fc_flags;
1175
1176 install_route:
1177         if (cfg->fc_mx) {
1178                 struct nlattr *nla;
1179                 int remaining;
1180
1181                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182                         int type = nla->nla_type;
1183
1184                         if (type) {
1185                                 if (type > RTAX_MAX) {
1186                                         err = -EINVAL;
1187                                         goto out;
1188                                 }
1189
1190                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1191                         }
1192                 }
1193         }
1194
1195         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197         if (!rt->u.dst.metrics[RTAX_MTU-1])
1198                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201         rt->u.dst.dev = dev;
1202         rt->rt6i_idev = idev;
1203         rt->rt6i_table = table;
1204         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1205
1206 out:
1207         if (dev)
1208                 dev_put(dev);
1209         if (idev)
1210                 in6_dev_put(idev);
1211         if (rt)
1212                 dst_free((struct dst_entry *) rt);
1213         return err;
1214 }
1215
1216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1217 {
1218         int err;
1219         struct fib6_table *table;
1220
1221         if (rt == &ip6_null_entry)
1222                 return -ENOENT;
1223
1224         table = rt->rt6i_table;
1225         write_lock_bh(&table->tb6_lock);
1226
1227         err = fib6_del(rt, info);
1228         dst_release(&rt->u.dst);
1229
1230         write_unlock_bh(&table->tb6_lock);
1231
1232         return err;
1233 }
1234
1235 int ip6_del_rt(struct rt6_info *rt)
1236 {
1237         return __ip6_del_rt(rt, NULL);
1238 }
1239
1240 static int ip6_route_del(struct fib6_config *cfg)
1241 {
1242         struct fib6_table *table;
1243         struct fib6_node *fn;
1244         struct rt6_info *rt;
1245         int err = -ESRCH;
1246
1247         table = fib6_get_table(cfg->fc_table);
1248         if (table == NULL)
1249                 return err;
1250
1251         read_lock_bh(&table->tb6_lock);
1252
1253         fn = fib6_locate(&table->tb6_root,
1254                          &cfg->fc_dst, cfg->fc_dst_len,
1255                          &cfg->fc_src, cfg->fc_src_len);
1256         
1257         if (fn) {
1258                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1259                         if (cfg->fc_ifindex &&
1260                             (rt->rt6i_dev == NULL ||
1261                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1262                                 continue;
1263                         if (cfg->fc_flags & RTF_GATEWAY &&
1264                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1265                                 continue;
1266                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1267                                 continue;
1268                         dst_hold(&rt->u.dst);
1269                         read_unlock_bh(&table->tb6_lock);
1270
1271                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1272                 }
1273         }
1274         read_unlock_bh(&table->tb6_lock);
1275
1276         return err;
1277 }
1278
1279 /*
1280  *      Handle redirects
1281  */
1282 struct ip6rd_flowi {
1283         struct flowi fl;
1284         struct in6_addr gateway;
1285 };
1286
1287 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1288                                              struct flowi *fl,
1289                                              int flags)
1290 {
1291         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1292         struct rt6_info *rt;
1293         struct fib6_node *fn;
1294
1295         /*
1296          * Get the "current" route for this destination and
1297          * check if the redirect has come from approriate router.
1298          *
1299          * RFC 2461 specifies that redirects should only be
1300          * accepted if they come from the nexthop to the target.
1301          * Due to the way the routes are chosen, this notion
1302          * is a bit fuzzy and one might need to check all possible
1303          * routes.
1304          */
1305
1306         read_lock_bh(&table->tb6_lock);
1307         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1308 restart:
1309         for (rt = fn->leaf; rt; rt = rt->u.next) {
1310                 /*
1311                  * Current route is on-link; redirect is always invalid.
1312                  *
1313                  * Seems, previous statement is not true. It could
1314                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1315                  * But then router serving it might decide, that we should
1316                  * know truth 8)8) --ANK (980726).
1317                  */
1318                 if (rt6_check_expired(rt))
1319                         continue;
1320                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321                         continue;
1322                 if (fl->oif != rt->rt6i_dev->ifindex)
1323                         continue;
1324                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1325                         continue;
1326                 break;
1327         }
1328
1329         if (!rt) {
1330                 if (rt6_need_strict(&fl->fl6_dst)) {
1331                         while ((fn = fn->parent) != NULL) {
1332                                 if (fn->fn_flags & RTN_ROOT)
1333                                         break;
1334                                 if (fn->fn_flags & RTN_RTINFO)
1335                                         goto restart;
1336                         }
1337                 }
1338                 rt = &ip6_null_entry;
1339         }
1340         dst_hold(&rt->u.dst);
1341
1342         read_unlock_bh(&table->tb6_lock);
1343
1344         return rt;
1345 };
1346
1347 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1348                                            struct in6_addr *src,
1349                                            struct in6_addr *gateway,
1350                                            struct net_device *dev)
1351 {
1352         struct ip6rd_flowi rdfl = {
1353                 .fl = {
1354                         .oif = dev->ifindex,
1355                         .nl_u = {
1356                                 .ip6_u = {
1357                                         .daddr = *dest,
1358                                         .saddr = *src,
1359                                 },
1360                         },
1361                 },
1362                 .gateway = *gateway,
1363         };
1364         int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1365
1366         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1367 }
1368
1369 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1370                   struct in6_addr *saddr,
1371                   struct neighbour *neigh, u8 *lladdr, int on_link)
1372 {
1373         struct rt6_info *rt, *nrt = NULL;
1374         struct netevent_redirect netevent;
1375
1376         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1377
1378         if (rt == &ip6_null_entry) {
1379                 if (net_ratelimit())
1380                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1381                                "for redirect target\n");
1382                 goto out;
1383         }
1384
1385         /*
1386          *      We have finally decided to accept it.
1387          */
1388
1389         neigh_update(neigh, lladdr, NUD_STALE, 
1390                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1391                      NEIGH_UPDATE_F_OVERRIDE|
1392                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1393                                      NEIGH_UPDATE_F_ISROUTER))
1394                      );
1395
1396         /*
1397          * Redirect received -> path was valid.
1398          * Look, redirects are sent only in response to data packets,
1399          * so that this nexthop apparently is reachable. --ANK
1400          */
1401         dst_confirm(&rt->u.dst);
1402
1403         /* Duplicate redirect: silently ignore. */
1404         if (neigh == rt->u.dst.neighbour)
1405                 goto out;
1406
1407         nrt = ip6_rt_copy(rt);
1408         if (nrt == NULL)
1409                 goto out;
1410
1411         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1412         if (on_link)
1413                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1414
1415         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1416         nrt->rt6i_dst.plen = 128;
1417         nrt->u.dst.flags |= DST_HOST;
1418
1419         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1420         nrt->rt6i_nexthop = neigh_clone(neigh);
1421         /* Reset pmtu, it may be better */
1422         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1423         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1424
1425         if (ip6_ins_rt(nrt))
1426                 goto out;
1427
1428         netevent.old = &rt->u.dst;
1429         netevent.new = &nrt->u.dst;
1430         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1431
1432         if (rt->rt6i_flags&RTF_CACHE) {
1433                 ip6_del_rt(rt);
1434                 return;
1435         }
1436
1437 out:
1438         dst_release(&rt->u.dst);
1439         return;
1440 }
1441
1442 /*
1443  *      Handle ICMP "packet too big" messages
1444  *      i.e. Path MTU discovery
1445  */
1446
1447 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1448                         struct net_device *dev, u32 pmtu)
1449 {
1450         struct rt6_info *rt, *nrt;
1451         int allfrag = 0;
1452
1453         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1454         if (rt == NULL)
1455                 return;
1456
1457         if (pmtu >= dst_mtu(&rt->u.dst))
1458                 goto out;
1459
1460         if (pmtu < IPV6_MIN_MTU) {
1461                 /*
1462                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1463                  * MTU (1280) and a fragment header should always be included
1464                  * after a node receiving Too Big message reporting PMTU is
1465                  * less than the IPv6 Minimum Link MTU.
1466                  */
1467                 pmtu = IPV6_MIN_MTU;
1468                 allfrag = 1;
1469         }
1470
1471         /* New mtu received -> path was valid.
1472            They are sent only in response to data packets,
1473            so that this nexthop apparently is reachable. --ANK
1474          */
1475         dst_confirm(&rt->u.dst);
1476
1477         /* Host route. If it is static, it would be better
1478            not to override it, but add new one, so that
1479            when cache entry will expire old pmtu
1480            would return automatically.
1481          */
1482         if (rt->rt6i_flags & RTF_CACHE) {
1483                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1484                 if (allfrag)
1485                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1486                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1487                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1488                 goto out;
1489         }
1490
1491         /* Network route.
1492            Two cases are possible:
1493            1. It is connected route. Action: COW
1494            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1495          */
1496         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1497                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1498         else
1499                 nrt = rt6_alloc_clone(rt, daddr);
1500
1501         if (nrt) {
1502                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503                 if (allfrag)
1504                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1505
1506                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1507                  * happened within 5 mins, the recommended timer is 10 mins.
1508                  * Here this route expiration time is set to ip6_rt_mtu_expires
1509                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1510                  * and detecting PMTU increase will be automatically happened.
1511                  */
1512                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1513                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1514
1515                 ip6_ins_rt(nrt);
1516         }
1517 out:
1518         dst_release(&rt->u.dst);
1519 }
1520
1521 /*
1522  *      Misc support functions
1523  */
1524
1525 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1526 {
1527         struct rt6_info *rt = ip6_dst_alloc();
1528
1529         if (rt) {
1530                 rt->u.dst.input = ort->u.dst.input;
1531                 rt->u.dst.output = ort->u.dst.output;
1532
1533                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1534                 rt->u.dst.dev = ort->u.dst.dev;
1535                 if (rt->u.dst.dev)
1536                         dev_hold(rt->u.dst.dev);
1537                 rt->rt6i_idev = ort->rt6i_idev;
1538                 if (rt->rt6i_idev)
1539                         in6_dev_hold(rt->rt6i_idev);
1540                 rt->u.dst.lastuse = jiffies;
1541                 rt->rt6i_expires = 0;
1542
1543                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1544                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1545                 rt->rt6i_metric = 0;
1546
1547                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1548 #ifdef CONFIG_IPV6_SUBTREES
1549                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1550 #endif
1551                 rt->rt6i_table = ort->rt6i_table;
1552         }
1553         return rt;
1554 }
1555
1556 #ifdef CONFIG_IPV6_ROUTE_INFO
1557 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1558                                            struct in6_addr *gwaddr, int ifindex)
1559 {
1560         struct fib6_node *fn;
1561         struct rt6_info *rt = NULL;
1562         struct fib6_table *table;
1563
1564         table = fib6_get_table(RT6_TABLE_INFO);
1565         if (table == NULL)
1566                 return NULL;
1567
1568         write_lock_bh(&table->tb6_lock);
1569         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1570         if (!fn)
1571                 goto out;
1572
1573         for (rt = fn->leaf; rt; rt = rt->u.next) {
1574                 if (rt->rt6i_dev->ifindex != ifindex)
1575                         continue;
1576                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1577                         continue;
1578                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1579                         continue;
1580                 dst_hold(&rt->u.dst);
1581                 break;
1582         }
1583 out:
1584         write_unlock_bh(&table->tb6_lock);
1585         return rt;
1586 }
1587
1588 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1589                                            struct in6_addr *gwaddr, int ifindex,
1590                                            unsigned pref)
1591 {
1592         struct fib6_config cfg = {
1593                 .fc_table       = RT6_TABLE_INFO,
1594                 .fc_metric      = 1024,
1595                 .fc_ifindex     = ifindex,
1596                 .fc_dst_len     = prefixlen,
1597                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1598                                   RTF_UP | RTF_PREF(pref),
1599         };
1600
1601         ipv6_addr_copy(&cfg.fc_dst, prefix);
1602         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1603
1604         /* We should treat it as a default route if prefix length is 0. */
1605         if (!prefixlen)
1606                 cfg.fc_flags |= RTF_DEFAULT;
1607
1608         ip6_route_add(&cfg);
1609
1610         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1611 }
1612 #endif
1613
1614 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1615 {       
1616         struct rt6_info *rt;
1617         struct fib6_table *table;
1618
1619         table = fib6_get_table(RT6_TABLE_DFLT);
1620         if (table == NULL)
1621                 return NULL;
1622
1623         write_lock_bh(&table->tb6_lock);
1624         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1625                 if (dev == rt->rt6i_dev &&
1626                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1627                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1628                         break;
1629         }
1630         if (rt)
1631                 dst_hold(&rt->u.dst);
1632         write_unlock_bh(&table->tb6_lock);
1633         return rt;
1634 }
1635
1636 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1637                                      struct net_device *dev,
1638                                      unsigned int pref)
1639 {
1640         struct fib6_config cfg = {
1641                 .fc_table       = RT6_TABLE_DFLT,
1642                 .fc_metric      = 1024,
1643                 .fc_ifindex     = dev->ifindex,
1644                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1645                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1646         };
1647
1648         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1649
1650         ip6_route_add(&cfg);
1651
1652         return rt6_get_dflt_router(gwaddr, dev);
1653 }
1654
1655 void rt6_purge_dflt_routers(void)
1656 {
1657         struct rt6_info *rt;
1658         struct fib6_table *table;
1659
1660         /* NOTE: Keep consistent with rt6_get_dflt_router */
1661         table = fib6_get_table(RT6_TABLE_DFLT);
1662         if (table == NULL)
1663                 return;
1664
1665 restart:
1666         read_lock_bh(&table->tb6_lock);
1667         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1668                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1669                         dst_hold(&rt->u.dst);
1670                         read_unlock_bh(&table->tb6_lock);
1671                         ip6_del_rt(rt);
1672                         goto restart;
1673                 }
1674         }
1675         read_unlock_bh(&table->tb6_lock);
1676 }
1677
1678 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1679                                  struct fib6_config *cfg)
1680 {
1681         memset(cfg, 0, sizeof(*cfg));
1682
1683         cfg->fc_table = RT6_TABLE_MAIN;
1684         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1685         cfg->fc_metric = rtmsg->rtmsg_metric;
1686         cfg->fc_expires = rtmsg->rtmsg_info;
1687         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1688         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1689         cfg->fc_flags = rtmsg->rtmsg_flags;
1690
1691         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1692         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1693         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1694 }
1695
1696 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1697 {
1698         struct fib6_config cfg;
1699         struct in6_rtmsg rtmsg;
1700         int err;
1701
1702         switch(cmd) {
1703         case SIOCADDRT:         /* Add a route */
1704         case SIOCDELRT:         /* Delete a route */
1705                 if (!capable(CAP_NET_ADMIN))
1706                         return -EPERM;
1707                 err = copy_from_user(&rtmsg, arg,
1708                                      sizeof(struct in6_rtmsg));
1709                 if (err)
1710                         return -EFAULT;
1711
1712                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1713
1714                 rtnl_lock();
1715                 switch (cmd) {
1716                 case SIOCADDRT:
1717                         err = ip6_route_add(&cfg);
1718                         break;
1719                 case SIOCDELRT:
1720                         err = ip6_route_del(&cfg);
1721                         break;
1722                 default:
1723                         err = -EINVAL;
1724                 }
1725                 rtnl_unlock();
1726
1727                 return err;
1728         };
1729
1730         return -EINVAL;
1731 }
1732
1733 /*
1734  *      Drop the packet on the floor
1735  */
1736
1737 static int ip6_pkt_discard(struct sk_buff *skb)
1738 {
1739         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1740         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1741                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1742
1743         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1744         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1745         kfree_skb(skb);
1746         return 0;
1747 }
1748
1749 static int ip6_pkt_discard_out(struct sk_buff *skb)
1750 {
1751         skb->dev = skb->dst->dev;
1752         return ip6_pkt_discard(skb);
1753 }
1754
1755 /*
1756  *      Allocate a dst for local (unicast / anycast) address.
1757  */
1758
1759 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1760                                     const struct in6_addr *addr,
1761                                     int anycast)
1762 {
1763         struct rt6_info *rt = ip6_dst_alloc();
1764
1765         if (rt == NULL)
1766                 return ERR_PTR(-ENOMEM);
1767
1768         dev_hold(&loopback_dev);
1769         in6_dev_hold(idev);
1770
1771         rt->u.dst.flags = DST_HOST;
1772         rt->u.dst.input = ip6_input;
1773         rt->u.dst.output = ip6_output;
1774         rt->rt6i_dev = &loopback_dev;
1775         rt->rt6i_idev = idev;
1776         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1777         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1778         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1779         rt->u.dst.obsolete = -1;
1780
1781         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1782         if (anycast)
1783                 rt->rt6i_flags |= RTF_ANYCAST;
1784         else
1785                 rt->rt6i_flags |= RTF_LOCAL;
1786         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1787         if (rt->rt6i_nexthop == NULL) {
1788                 dst_free((struct dst_entry *) rt);
1789                 return ERR_PTR(-ENOMEM);
1790         }
1791
1792         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1793         rt->rt6i_dst.plen = 128;
1794         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1795
1796         atomic_set(&rt->u.dst.__refcnt, 1);
1797
1798         return rt;
1799 }
1800
1801 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1802 {
1803         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1804             rt != &ip6_null_entry) {
1805                 RT6_TRACE("deleted by ifdown %p\n", rt);
1806                 return -1;
1807         }
1808         return 0;
1809 }
1810
1811 void rt6_ifdown(struct net_device *dev)
1812 {
1813         fib6_clean_all(fib6_ifdown, 0, dev);
1814 }
1815
1816 struct rt6_mtu_change_arg
1817 {
1818         struct net_device *dev;
1819         unsigned mtu;
1820 };
1821
1822 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1823 {
1824         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1825         struct inet6_dev *idev;
1826
1827         /* In IPv6 pmtu discovery is not optional,
1828            so that RTAX_MTU lock cannot disable it.
1829            We still use this lock to block changes
1830            caused by addrconf/ndisc.
1831         */
1832
1833         idev = __in6_dev_get(arg->dev);
1834         if (idev == NULL)
1835                 return 0;
1836
1837         /* For administrative MTU increase, there is no way to discover
1838            IPv6 PMTU increase, so PMTU increase should be updated here.
1839            Since RFC 1981 doesn't include administrative MTU increase
1840            update PMTU increase is a MUST. (i.e. jumbo frame)
1841          */
1842         /*
1843            If new MTU is less than route PMTU, this new MTU will be the
1844            lowest MTU in the path, update the route PMTU to reflect PMTU
1845            decreases; if new MTU is greater than route PMTU, and the
1846            old MTU is the lowest MTU in the path, update the route PMTU
1847            to reflect the increase. In this case if the other nodes' MTU
1848            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1849            PMTU discouvery.
1850          */
1851         if (rt->rt6i_dev == arg->dev &&
1852             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1853             (dst_mtu(&rt->u.dst) > arg->mtu ||
1854              (dst_mtu(&rt->u.dst) < arg->mtu &&
1855               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1856                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1857         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1858         return 0;
1859 }
1860
1861 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1862 {
1863         struct rt6_mtu_change_arg arg = {
1864                 .dev = dev,
1865                 .mtu = mtu,
1866         };
1867
1868         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1869 }
1870
1871 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1872         [RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
1873         [RTA_OIF]               = { .type = NLA_U32 },
1874         [RTA_IIF]               = { .type = NLA_U32 },
1875         [RTA_PRIORITY]          = { .type = NLA_U32 },
1876         [RTA_METRICS]           = { .type = NLA_NESTED },
1877 };
1878
1879 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1880                               struct fib6_config *cfg)
1881 {
1882         struct rtmsg *rtm;
1883         struct nlattr *tb[RTA_MAX+1];
1884         int err;
1885
1886         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1887         if (err < 0)
1888                 goto errout;
1889
1890         err = -EINVAL;
1891         rtm = nlmsg_data(nlh);
1892         memset(cfg, 0, sizeof(*cfg));
1893
1894         cfg->fc_table = rtm->rtm_table;
1895         cfg->fc_dst_len = rtm->rtm_dst_len;
1896         cfg->fc_src_len = rtm->rtm_src_len;
1897         cfg->fc_flags = RTF_UP;
1898         cfg->fc_protocol = rtm->rtm_protocol;
1899
1900         if (rtm->rtm_type == RTN_UNREACHABLE)
1901                 cfg->fc_flags |= RTF_REJECT;
1902
1903         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1904         cfg->fc_nlinfo.nlh = nlh;
1905
1906         if (tb[RTA_GATEWAY]) {
1907                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1908                 cfg->fc_flags |= RTF_GATEWAY;
1909         }
1910
1911         if (tb[RTA_DST]) {
1912                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1913
1914                 if (nla_len(tb[RTA_DST]) < plen)
1915                         goto errout;
1916
1917                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1918         }
1919
1920         if (tb[RTA_SRC]) {
1921                 int plen = (rtm->rtm_src_len + 7) >> 3;
1922
1923                 if (nla_len(tb[RTA_SRC]) < plen)
1924                         goto errout;
1925
1926                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1927         }
1928
1929         if (tb[RTA_OIF])
1930                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1931
1932         if (tb[RTA_PRIORITY])
1933                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1934
1935         if (tb[RTA_METRICS]) {
1936                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1937                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1938         }
1939
1940         if (tb[RTA_TABLE])
1941                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1942
1943         err = 0;
1944 errout:
1945         return err;
1946 }
1947
1948 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1949 {
1950         struct fib6_config cfg;
1951         int err;
1952
1953         err = rtm_to_fib6_config(skb, nlh, &cfg);
1954         if (err < 0)
1955                 return err;
1956
1957         return ip6_route_del(&cfg);
1958 }
1959
1960 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1961 {
1962         struct fib6_config cfg;
1963         int err;
1964
1965         err = rtm_to_fib6_config(skb, nlh, &cfg);
1966         if (err < 0)
1967                 return err;
1968
1969         return ip6_route_add(&cfg);
1970 }
1971
1972 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1973                          struct in6_addr *dst, struct in6_addr *src,
1974                          int iif, int type, u32 pid, u32 seq,
1975                          int prefix, unsigned int flags)
1976 {
1977         struct rtmsg *rtm;
1978         struct nlmsghdr *nlh;
1979         struct rta_cacheinfo ci;
1980         u32 table;
1981
1982         if (prefix) {   /* user wants prefix routes only */
1983                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1984                         /* success since this is not a prefix route */
1985                         return 1;
1986                 }
1987         }
1988
1989         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1990         if (nlh == NULL)
1991                 return -ENOBUFS;
1992
1993         rtm = nlmsg_data(nlh);
1994         rtm->rtm_family = AF_INET6;
1995         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1996         rtm->rtm_src_len = rt->rt6i_src.plen;
1997         rtm->rtm_tos = 0;
1998         if (rt->rt6i_table)
1999                 table = rt->rt6i_table->tb6_id;
2000         else
2001                 table = RT6_TABLE_UNSPEC;
2002         rtm->rtm_table = table;
2003         NLA_PUT_U32(skb, RTA_TABLE, table);
2004         if (rt->rt6i_flags&RTF_REJECT)
2005                 rtm->rtm_type = RTN_UNREACHABLE;
2006         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2007                 rtm->rtm_type = RTN_LOCAL;
2008         else
2009                 rtm->rtm_type = RTN_UNICAST;
2010         rtm->rtm_flags = 0;
2011         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2012         rtm->rtm_protocol = rt->rt6i_protocol;
2013         if (rt->rt6i_flags&RTF_DYNAMIC)
2014                 rtm->rtm_protocol = RTPROT_REDIRECT;
2015         else if (rt->rt6i_flags & RTF_ADDRCONF)
2016                 rtm->rtm_protocol = RTPROT_KERNEL;
2017         else if (rt->rt6i_flags&RTF_DEFAULT)
2018                 rtm->rtm_protocol = RTPROT_RA;
2019
2020         if (rt->rt6i_flags&RTF_CACHE)
2021                 rtm->rtm_flags |= RTM_F_CLONED;
2022
2023         if (dst) {
2024                 NLA_PUT(skb, RTA_DST, 16, dst);
2025                 rtm->rtm_dst_len = 128;
2026         } else if (rtm->rtm_dst_len)
2027                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2028 #ifdef CONFIG_IPV6_SUBTREES
2029         if (src) {
2030                 NLA_PUT(skb, RTA_SRC, 16, src);
2031                 rtm->rtm_src_len = 128;
2032         } else if (rtm->rtm_src_len)
2033                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2034 #endif
2035         if (iif)
2036                 NLA_PUT_U32(skb, RTA_IIF, iif);
2037         else if (dst) {
2038                 struct in6_addr saddr_buf;
2039                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2040                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2041         }
2042
2043         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2044                 goto nla_put_failure;
2045
2046         if (rt->u.dst.neighbour)
2047                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2048
2049         if (rt->u.dst.dev)
2050                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2051
2052         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2053         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2054         if (rt->rt6i_expires)
2055                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2056         else
2057                 ci.rta_expires = 0;
2058         ci.rta_used = rt->u.dst.__use;
2059         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2060         ci.rta_error = rt->u.dst.error;
2061         ci.rta_id = 0;
2062         ci.rta_ts = 0;
2063         ci.rta_tsage = 0;
2064         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2065
2066         return nlmsg_end(skb, nlh);
2067
2068 nla_put_failure:
2069         return nlmsg_cancel(skb, nlh);
2070 }
2071
2072 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2073 {
2074         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2075         int prefix;
2076
2077         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2078                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2079                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2080         } else
2081                 prefix = 0;
2082
2083         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2084                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2085                      prefix, NLM_F_MULTI);
2086 }
2087
2088 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2089 {
2090         struct nlattr *tb[RTA_MAX+1];
2091         struct rt6_info *rt;
2092         struct sk_buff *skb;
2093         struct rtmsg *rtm;
2094         struct flowi fl;
2095         int err, iif = 0;
2096
2097         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2098         if (err < 0)
2099                 goto errout;
2100
2101         err = -EINVAL;
2102         memset(&fl, 0, sizeof(fl));
2103
2104         if (tb[RTA_SRC]) {
2105                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2106                         goto errout;
2107
2108                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2109         }
2110
2111         if (tb[RTA_DST]) {
2112                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2113                         goto errout;
2114
2115                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2116         }
2117
2118         if (tb[RTA_IIF])
2119                 iif = nla_get_u32(tb[RTA_IIF]);
2120
2121         if (tb[RTA_OIF])
2122                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2123
2124         if (iif) {
2125                 struct net_device *dev;
2126                 dev = __dev_get_by_index(iif);
2127                 if (!dev) {
2128                         err = -ENODEV;
2129                         goto errout;
2130                 }
2131         }
2132
2133         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2134         if (skb == NULL) {
2135                 err = -ENOBUFS;
2136                 goto errout;
2137         }
2138
2139         /* Reserve room for dummy headers, this skb can pass
2140            through good chunk of routing engine.
2141          */
2142         skb->mac.raw = skb->data;
2143         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2144
2145         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2146         skb->dst = &rt->u.dst;
2147
2148         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2149                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2150                             nlh->nlmsg_seq, 0, 0);
2151         if (err < 0) {
2152                 kfree_skb(skb);
2153                 goto errout;
2154         }
2155
2156         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2157 errout:
2158         return err;
2159 }
2160
2161 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2162 {
2163         struct sk_buff *skb;
2164         u32 pid = 0, seq = 0;
2165         struct nlmsghdr *nlh = NULL;
2166         int payload = sizeof(struct rtmsg) + 256;
2167         int err = -ENOBUFS;
2168
2169         if (info) {
2170                 pid = info->pid;
2171                 nlh = info->nlh;
2172                 if (nlh)
2173                         seq = nlh->nlmsg_seq;
2174         }
2175
2176         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2177         if (skb == NULL)
2178                 goto errout;
2179
2180         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2181         if (err < 0) {
2182                 kfree_skb(skb);
2183                 goto errout;
2184         }
2185
2186         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2187 errout:
2188         if (err < 0)
2189                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2190 }
2191
2192 /*
2193  *      /proc
2194  */
2195
2196 #ifdef CONFIG_PROC_FS
2197
2198 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2199
2200 struct rt6_proc_arg
2201 {
2202         char *buffer;
2203         int offset;
2204         int length;
2205         int skip;
2206         int len;
2207 };
2208
2209 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2210 {
2211         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2212         int i;
2213
2214         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2215                 arg->skip++;
2216                 return 0;
2217         }
2218
2219         if (arg->len >= arg->length)
2220                 return 0;
2221
2222         for (i=0; i<16; i++) {
2223                 sprintf(arg->buffer + arg->len, "%02x",
2224                         rt->rt6i_dst.addr.s6_addr[i]);
2225                 arg->len += 2;
2226         }
2227         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2228                             rt->rt6i_dst.plen);
2229
2230 #ifdef CONFIG_IPV6_SUBTREES
2231         for (i=0; i<16; i++) {
2232                 sprintf(arg->buffer + arg->len, "%02x",
2233                         rt->rt6i_src.addr.s6_addr[i]);
2234                 arg->len += 2;
2235         }
2236         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2237                             rt->rt6i_src.plen);
2238 #else
2239         sprintf(arg->buffer + arg->len,
2240                 "00000000000000000000000000000000 00 ");
2241         arg->len += 36;
2242 #endif
2243
2244         if (rt->rt6i_nexthop) {
2245                 for (i=0; i<16; i++) {
2246                         sprintf(arg->buffer + arg->len, "%02x",
2247                                 rt->rt6i_nexthop->primary_key[i]);
2248                         arg->len += 2;
2249                 }
2250         } else {
2251                 sprintf(arg->buffer + arg->len,
2252                         "00000000000000000000000000000000");
2253                 arg->len += 32;
2254         }
2255         arg->len += sprintf(arg->buffer + arg->len,
2256                             " %08x %08x %08x %08x %8s\n",
2257                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2258                             rt->u.dst.__use, rt->rt6i_flags, 
2259                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2260         return 0;
2261 }
2262
2263 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2264 {
2265         struct rt6_proc_arg arg = {
2266                 .buffer = buffer,
2267                 .offset = offset,
2268                 .length = length,
2269         };
2270
2271         fib6_clean_all(rt6_info_route, 0, &arg);
2272
2273         *start = buffer;
2274         if (offset)
2275                 *start += offset % RT6_INFO_LEN;
2276
2277         arg.len -= offset % RT6_INFO_LEN;
2278
2279         if (arg.len > length)
2280                 arg.len = length;
2281         if (arg.len < 0)
2282                 arg.len = 0;
2283
2284         return arg.len;
2285 }
2286
2287 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2288 {
2289         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2290                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2291                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2292                       rt6_stats.fib_rt_cache,
2293                       atomic_read(&ip6_dst_ops.entries),
2294                       rt6_stats.fib_discarded_routes);
2295
2296         return 0;
2297 }
2298
2299 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2300 {
2301         return single_open(file, rt6_stats_seq_show, NULL);
2302 }
2303
2304 static struct file_operations rt6_stats_seq_fops = {
2305         .owner   = THIS_MODULE,
2306         .open    = rt6_stats_seq_open,
2307         .read    = seq_read,
2308         .llseek  = seq_lseek,
2309         .release = single_release,
2310 };
2311 #endif  /* CONFIG_PROC_FS */
2312
2313 #ifdef CONFIG_SYSCTL
2314
2315 static int flush_delay;
2316
2317 static
2318 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2319                               void __user *buffer, size_t *lenp, loff_t *ppos)
2320 {
2321         if (write) {
2322                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2323                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2324                 return 0;
2325         } else
2326                 return -EINVAL;
2327 }
2328
2329 ctl_table ipv6_route_table[] = {
2330         {
2331                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2332                 .procname       =       "flush",
2333                 .data           =       &flush_delay,
2334                 .maxlen         =       sizeof(int),
2335                 .mode           =       0200,
2336                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2337         },
2338         {
2339                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2340                 .procname       =       "gc_thresh",
2341                 .data           =       &ip6_dst_ops.gc_thresh,
2342                 .maxlen         =       sizeof(int),
2343                 .mode           =       0644,
2344                 .proc_handler   =       &proc_dointvec,
2345         },
2346         {
2347                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2348                 .procname       =       "max_size",
2349                 .data           =       &ip6_rt_max_size,
2350                 .maxlen         =       sizeof(int),
2351                 .mode           =       0644,
2352                 .proc_handler   =       &proc_dointvec,
2353         },
2354         {
2355                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2356                 .procname       =       "gc_min_interval",
2357                 .data           =       &ip6_rt_gc_min_interval,
2358                 .maxlen         =       sizeof(int),
2359                 .mode           =       0644,
2360                 .proc_handler   =       &proc_dointvec_jiffies,
2361                 .strategy       =       &sysctl_jiffies,
2362         },
2363         {
2364                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2365                 .procname       =       "gc_timeout",
2366                 .data           =       &ip6_rt_gc_timeout,
2367                 .maxlen         =       sizeof(int),
2368                 .mode           =       0644,
2369                 .proc_handler   =       &proc_dointvec_jiffies,
2370                 .strategy       =       &sysctl_jiffies,
2371         },
2372         {
2373                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2374                 .procname       =       "gc_interval",
2375                 .data           =       &ip6_rt_gc_interval,
2376                 .maxlen         =       sizeof(int),
2377                 .mode           =       0644,
2378                 .proc_handler   =       &proc_dointvec_jiffies,
2379                 .strategy       =       &sysctl_jiffies,
2380         },
2381         {
2382                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2383                 .procname       =       "gc_elasticity",
2384                 .data           =       &ip6_rt_gc_elasticity,
2385                 .maxlen         =       sizeof(int),
2386                 .mode           =       0644,
2387                 .proc_handler   =       &proc_dointvec_jiffies,
2388                 .strategy       =       &sysctl_jiffies,
2389         },
2390         {
2391                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2392                 .procname       =       "mtu_expires",
2393                 .data           =       &ip6_rt_mtu_expires,
2394                 .maxlen         =       sizeof(int),
2395                 .mode           =       0644,
2396                 .proc_handler   =       &proc_dointvec_jiffies,
2397                 .strategy       =       &sysctl_jiffies,
2398         },
2399         {
2400                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2401                 .procname       =       "min_adv_mss",
2402                 .data           =       &ip6_rt_min_advmss,
2403                 .maxlen         =       sizeof(int),
2404                 .mode           =       0644,
2405                 .proc_handler   =       &proc_dointvec_jiffies,
2406                 .strategy       =       &sysctl_jiffies,
2407         },
2408         {
2409                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2410                 .procname       =       "gc_min_interval_ms",
2411                 .data           =       &ip6_rt_gc_min_interval,
2412                 .maxlen         =       sizeof(int),
2413                 .mode           =       0644,
2414                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2415                 .strategy       =       &sysctl_ms_jiffies,
2416         },
2417         { .ctl_name = 0 }
2418 };
2419
2420 #endif
2421
2422 void __init ip6_route_init(void)
2423 {
2424         struct proc_dir_entry *p;
2425
2426         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2427                                                      sizeof(struct rt6_info),
2428                                                      0, SLAB_HWCACHE_ALIGN,
2429                                                      NULL, NULL);
2430         if (!ip6_dst_ops.kmem_cachep)
2431                 panic("cannot create ip6_dst_cache");
2432
2433         fib6_init();
2434 #ifdef  CONFIG_PROC_FS
2435         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2436         if (p)
2437                 p->owner = THIS_MODULE;
2438
2439         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2440 #endif
2441 #ifdef CONFIG_XFRM
2442         xfrm6_init();
2443 #endif
2444 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2445         fib6_rules_init();
2446 #endif
2447 }
2448
2449 void ip6_route_cleanup(void)
2450 {
2451 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2452         fib6_rules_cleanup();
2453 #endif
2454 #ifdef CONFIG_PROC_FS
2455         proc_net_remove("ipv6_route");
2456         proc_net_remove("rt6_stats");
2457 #endif
2458 #ifdef CONFIG_XFRM
2459         xfrm6_fini();
2460 #endif
2461         rt6_ifdown(NULL);
2462         fib6_gc_cleanup();
2463         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2464 }