]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - net/ipv4/route.c
ipv4: Flush per-ns routing cache more sanely.
[mv-sheeva.git] / net / ipv4 / route.c
index 80997333db0cf0c750acdf4d8b2299788546a913..d8b4f4d0d66e5da7f1859ae36d5dcc3bd69c2554 100644 (file)
@@ -140,6 +140,7 @@ static unsigned long expires_ljiffies;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int     ipv4_default_advmss(const struct dst_entry *dst);
+static unsigned int     ipv4_default_mtu(const struct dst_entry *dst);
 static void             ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void             ipv4_link_failure(struct sk_buff *skb);
@@ -157,6 +158,7 @@ static struct dst_ops ipv4_dst_ops = {
        .gc =                   rt_garbage_collect,
        .check =                ipv4_dst_check,
        .default_advmss =       ipv4_default_advmss,
+       .default_mtu =          ipv4_default_mtu,
        .destroy =              ipv4_dst_destroy,
        .ifdown =               ipv4_dst_ifdown,
        .negative_advice =      ipv4_negative_advice,
@@ -715,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
  * Can be called by a softirq or a process.
  * In the later case, we want to be reschedule if necessary
  */
-static void rt_do_flush(int process_context)
+static void rt_do_flush(struct net *net, int process_context)
 {
        unsigned int i;
        struct rtable *rth, *next;
-       struct rtable * tail;
 
        for (i = 0; i <= rt_hash_mask; i++) {
+               struct rtable __rcu **pprev;
+               struct rtable *list;
+
                if (process_context && need_resched())
                        cond_resched();
                rth = rcu_dereference_raw(rt_hash_table[i].chain);
@@ -729,50 +733,32 @@ static void rt_do_flush(int process_context)
                        continue;
 
                spin_lock_bh(rt_hash_lock_addr(i));
-#ifdef CONFIG_NET_NS
-               {
-               struct rtable __rcu **prev;
-               struct rtable *p;
 
-               rth = rcu_dereference_protected(rt_hash_table[i].chain,
+               list = NULL;
+               pprev = &rt_hash_table[i].chain;
+               rth = rcu_dereference_protected(*pprev,
                        lockdep_is_held(rt_hash_lock_addr(i)));
 
-               /* defer releasing the head of the list after spin_unlock */
-               for (tail = rth; tail;
-                    tail = rcu_dereference_protected(tail->dst.rt_next,
-                               lockdep_is_held(rt_hash_lock_addr(i))))
-                       if (!rt_is_expired(tail))
-                               break;
-               if (rth != tail)
-                       rt_hash_table[i].chain = tail;
-
-               /* call rt_free on entries after the tail requiring flush */
-               prev = &rt_hash_table[i].chain;
-               for (p = rcu_dereference_protected(*prev,
+               while (rth) {
+                       next = rcu_dereference_protected(rth->dst.rt_next,
                                lockdep_is_held(rt_hash_lock_addr(i)));
-                    p != NULL;
-                    p = next) {
-                       next = rcu_dereference_protected(p->dst.rt_next,
-                               lockdep_is_held(rt_hash_lock_addr(i)));
-                       if (!rt_is_expired(p)) {
-                               prev = &p->dst.rt_next;
+
+                       if (!net ||
+                           net_eq(dev_net(rth->dst.dev), net)) {
+                               rcu_assign_pointer(*pprev, next);
+                               rcu_assign_pointer(rth->dst.rt_next, list);
+                               list = rth;
                        } else {
-                               *prev = next;
-                               rt_free(p);
+                               pprev = &rth->dst.rt_next;
                        }
+                       rth = next;
                }
-               }
-#else
-               rth = rcu_dereference_protected(rt_hash_table[i].chain,
-                       lockdep_is_held(rt_hash_lock_addr(i)));
-               rcu_assign_pointer(rt_hash_table[i].chain, NULL);
-               tail = NULL;
-#endif
+
                spin_unlock_bh(rt_hash_lock_addr(i));
 
-               for (; rth != tail; rth = next) {
-                       next = rcu_dereference_protected(rth->dst.rt_next, 1);
-                       rt_free(rth);
+               for (; list; list = next) {
+                       next = rcu_dereference_protected(list->dst.rt_next, 1);
+                       rt_free(list);
                }
        }
 }
@@ -920,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
 {
        rt_cache_invalidate(net);
        if (delay >= 0)
-               rt_do_flush(!in_softirq());
+               rt_do_flush(net, !in_softirq());
 }
 
 /* Flush previous cache invalidated entries from the cache */
-void rt_cache_flush_batch(void)
+void rt_cache_flush_batch(struct net *net)
 {
-       rt_do_flush(!in_softirq());
+       rt_do_flush(net, !in_softirq());
 }
 
 static void rt_emergency_hash_rebuild(struct net *net)
@@ -1812,6 +1798,23 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
        return advmss;
 }
 
+static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+{
+       unsigned int mtu = dst->dev->mtu;
+
+       if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+               const struct rtable *rt = (const struct rtable *) dst;
+
+               if (rt->rt_gateway != rt->rt_dst && mtu > 576)
+                       mtu = 576;
+       }
+
+       if (mtu > IP_MAX_MTU)
+               mtu = IP_MAX_MTU;
+
+       return mtu;
+}
+
 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 {
        struct dst_entry *dst = &rt->dst;
@@ -1822,18 +1825,10 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
                    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
                        rt->rt_gateway = FIB_RES_GW(*res);
                dst_import_metrics(dst, fi->fib_metrics);
-               if (fi->fib_mtu == 0) {
-                       dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
-                       if (dst_metric_locked(dst, RTAX_MTU) &&
-                           rt->rt_gateway != rt->rt_dst &&
-                           dst->dev->mtu > 576)
-                               dst_metric_set(dst, RTAX_MTU, 576);
-               }
 #ifdef CONFIG_NET_CLS_ROUTE
                dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
-       } else
-               dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+       }
 
        if (dst_mtu(dst) > IP_MAX_MTU)
                dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);