flow: virtualize flow cache entry methods

author Timo Teräs <timo.teras@iki.fi>

Wed, 7 Apr 2010 00:30:04 +0000 (00:30 +0000)

committer David S. Miller <davem@davemloft.net>

Wed, 7 Apr 2010 10:43:18 +0000 (03:43 -0700)
author Timo Teräs <timo.teras@iki.fi>
Wed, 7 Apr 2010 00:30:04 +0000 (00:30 +0000)
committer David S. Miller <davem@davemloft.net>
Wed, 7 Apr 2010 10:43:18 +0000 (03:43 -0700)
diff --git a/include/net/flow.h b/include/net/flow.h

index 809970b7dfee71670ac9a8c1bf753923a96a264e..bb08692a20b08841ead94acce78c9ead82293d94 100644 (file)
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -86,11 +86,26 @@ struct flowi {
  
  struct net;
  struct sock;
-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
-                             u8 dir, void **objp, atomic_t **obj_refp);
+struct flow_cache_ops;
+
+struct flow_cache_object {
+       const struct flow_cache_ops *ops;
+};
+
+struct flow_cache_ops {
+       struct flow_cache_object *(*get)(struct flow_cache_object *);
+       int (*check)(struct flow_cache_object *);
+       void (*delete)(struct flow_cache_object *);
+};
+
+typedef struct flow_cache_object *(*flow_resolve_t)(
+               struct net *net, struct flowi *key, u16 family,
+               u8 dir, struct flow_cache_object *oldobj, void *ctx);
+
+extern struct flow_cache_object *flow_cache_lookup(
+               struct net *net, struct flowi *key, u16 family,
+               u8 dir, flow_resolve_t resolver, void *ctx);
  
-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
-                              u8 dir, flow_resolve_t resolver);
  extern void flow_cache_flush(void);
  extern atomic_t flow_cache_genid;
  
diff --git a/include/net/xfrm.h b/include/net/xfrm.h

index d74e080ba6c9c0a0ff14be2019eed6aeef659c68..35396e2dd1dce4cddc247b07af97fca47e9956b7 100644 (file)
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,6 +19,7 @@
  #include <net/route.h>
  #include <net/ipv6.h>
  #include <net/ip6_fib.h>
+#include <net/flow.h>
  
  #include <linux/interrupt.h>
  
@@ -481,6 +482,7 @@ struct xfrm_policy {
         atomic_t                refcnt;
         struct timer_list       timer;
  
+       struct flow_cache_object flo;
         u32                     priority;
         u32                     index;
         struct xfrm_mark        mark;
diff --git a/net/core/flow.c b/net/core/flow.c

index 1d27ca6b421d9d25a39272cc4ee3cece8c2866d0..521df52a77d2be17518ba317d551755afac6fa4b 100644 (file)
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,17 +26,16 @@
  #include <linux/security.h>
  
  struct flow_cache_entry {
-       struct flow_cache_entry *next;
-       u16                     family;
-       u8                      dir;
-       u32                     genid;
-       struct flowi            key;
-       void                    *object;
-       atomic_t                *object_ref;
+       struct flow_cache_entry         *next;
+       u16                             family;
+       u8                              dir;
+       u32                             genid;
+       struct flowi                    key;
+       struct flow_cache_object        *object;
  };
  
  struct flow_cache_percpu {
-       struct flow_cache_entry **      hash_table;
+       struct flow_cache_entry         **hash_table;
         int                             hash_count;
         u32                             hash_rnd;
         int                             hash_rnd_recalc;
@@ -44,7 +43,7 @@ struct flow_cache_percpu {
  };
  
  struct flow_flush_info {
-       struct flow_cache *             cache;
+       struct flow_cache               *cache;
         atomic_t                        cpuleft;
         struct completion               completion;
  };
@@ -52,7 +51,7 @@ struct flow_flush_info {
  struct flow_cache {
         u32                             hash_shift;
         unsigned long                   order;
-       struct flow_cache_percpu *      percpu;
+       struct flow_cache_percpu        *percpu;
         struct notifier_block           hotcpu_notifier;
         int                             low_watermark;
         int                             high_watermark;
@@ -78,12 +77,21 @@ static void flow_cache_new_hashrnd(unsigned long arg)
         add_timer(&fc->rnd_timer);
  }
  
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+       if (atomic_read(&flow_cache_genid) != fle->genid)
+               return 0;
+       if (fle->object && !fle->object->ops->check(fle->object))
+               return 0;
+       return 1;
+}
+
  static void flow_entry_kill(struct flow_cache *fc,
                             struct flow_cache_percpu *fcp,
                             struct flow_cache_entry *fle)
  {
         if (fle->object)
-               atomic_dec(fle->object_ref);
+               fle->object->ops->delete(fle->object);
         kmem_cache_free(flow_cachep, fle);
         fcp->hash_count--;
  }
@@ -96,16 +104,18 @@ static void __flow_cache_shrink(struct flow_cache *fc,
         int i;
  
         for (i = 0; i < flow_cache_hash_size(fc); i++) {
-               int k = 0;
+               int saved = 0;
  
                 flp = &fcp->hash_table[i];
-               while ((fle = *flp) != NULL && k < shrink_to) {
-                       k++;
-                       flp = &fle->next;
-               }
                 while ((fle = *flp) != NULL) {
-                       *flp = fle->next;
-                       flow_entry_kill(fc, fcp, fle);
+                       if (saved < shrink_to &&
+                           flow_entry_valid(fle)) {
+                               saved++;
+                               flp = &fle->next;
+                       } else {
+                               *flp = fle->next;
+                               flow_entry_kill(fc, fcp, fle);
+                       }
                 }
         }
  }
@@ -166,18 +176,21 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
         return 0;
  }
  
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
-                       flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+                 flow_resolve_t resolver, void *ctx)
  {
         struct flow_cache *fc = &flow_cache_global;
         struct flow_cache_percpu *fcp;
         struct flow_cache_entry *fle, **head;
+       struct flow_cache_object *flo;
         unsigned int hash;
  
         local_bh_disable();
         fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
  
         fle = NULL;
+       flo = NULL;
         /* Packet really early in init?  Making flow_cache_init a
          * pre-smp initcall would solve this.  --RR */
         if (!fcp->hash_table)
@@ -185,27 +198,17 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
  
         if (fcp->hash_rnd_recalc)
                 flow_new_hash_rnd(fc, fcp);
-       hash = flow_hash_code(fc, fcp, key);
  
+       hash = flow_hash_code(fc, fcp, key);
         head = &fcp->hash_table[hash];
         for (fle = *head; fle; fle = fle->next) {
                 if (fle->family == family &&
                     fle->dir == dir &&
-                   flow_key_compare(key, &fle->key) == 0) {
-                       if (fle->genid == atomic_read(&flow_cache_genid)) {
-                               void *ret = fle->object;
-
-                               if (ret)
-                                       atomic_inc(fle->object_ref);
-                               local_bh_enable();
-
-                               return ret;
-                       }
+                   flow_key_compare(key, &fle->key) == 0)
                         break;
-               }
         }
  
-       if (!fle) {
+       if (unlikely(!fle)) {
                 if (fcp->hash_count > fc->high_watermark)
                         flow_cache_shrink(fc, fcp);
  
@@ -219,33 +222,39 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
                         fle->object = NULL;
                         fcp->hash_count++;
                 }
+       } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+               flo = fle->object;
+               if (!flo)
+                       goto ret_object;
+               flo = flo->ops->get(flo);
+               if (flo)
+                       goto ret_object;
+       } else if (fle->object) {
+               flo = fle->object;
+               flo->ops->delete(flo);
+               fle->object = NULL;
         }
  
  nocache:
-       {
-               int err;
-               void *obj;
-               atomic_t *obj_ref;
-
-               err = resolver(net, key, family, dir, &obj, &obj_ref);
-
-               if (fle && !err) {
-                       fle->genid = atomic_read(&flow_cache_genid);
-
-                       if (fle->object)
-                               atomic_dec(fle->object_ref);
-
-                       fle->object = obj;
-                       fle->object_ref = obj_ref;
-                       if (obj)
-                               atomic_inc(fle->object_ref);
-               }
-               local_bh_enable();
-
-               if (err)
-                       obj = ERR_PTR(err);
-               return obj;
+       flo = NULL;
+       if (fle) {
+               flo = fle->object;
+               fle->object = NULL;
+       }
+       flo = resolver(net, key, family, dir, flo, ctx);
+       if (fle) {
+               fle->genid = atomic_read(&flow_cache_genid);
+               if (!IS_ERR(flo))
+                       fle->object = flo;
+               else
+                       fle->genid--;
+       } else {
+               if (flo && !IS_ERR(flo))
+                       flo->ops->delete(flo);
         }
+ret_object:
+       local_bh_enable();
+       return flo;
  }
  
  static void flow_cache_flush_tasklet(unsigned long data)
@@ -261,13 +270,12 @@ static void flow_cache_flush_tasklet(unsigned long data)
  
                 fle = fcp->hash_table[i];
                 for (; fle; fle = fle->next) {
-                       unsigned genid = atomic_read(&flow_cache_genid);
-
-                       if (!fle->object || fle->genid == genid)
+                       if (flow_entry_valid(fle))
                                 continue;
  
+                       if (fle->object)
+                               fle->object->ops->delete(fle->object);
                         fle->object = NULL;
-                       atomic_dec(fle->object_ref);
                 }
         }
  
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index 82789cf1c63206bed7c652a8a5688d007761a9d5..7722baeb140dc2c279607a5807c370ca1245a75d 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,6 +216,35 @@ expired:
         xfrm_pol_put(xp);
  }
  
+static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
+{
+       struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+       if (unlikely(pol->walk.dead))
+               flo = NULL;
+       else
+               xfrm_pol_hold(pol);
+
+       return flo;
+}
+
+static int xfrm_policy_flo_check(struct flow_cache_object *flo)
+{
+       struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+       return !pol->walk.dead;
+}
+
+static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
+{
+       xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
+}
+
+static const struct flow_cache_ops xfrm_policy_fc_ops = {
+       .get = xfrm_policy_flo_get,
+       .check = xfrm_policy_flo_check,
+       .delete = xfrm_policy_flo_delete,
+};
  
  /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
   * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
                 atomic_set(&policy->refcnt, 1);
                 setup_timer(&policy->timer, xfrm_policy_timer,
                                 (unsigned long)policy);
+               policy->flo.ops = &xfrm_policy_fc_ops;
         }
         return policy;
  }
@@ -269,9 +299,6 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
         if (del_timer(&policy->timer))
                 atomic_dec(&policy->refcnt);
  
-       if (atomic_read(&policy->refcnt) > 1)
-               flow_cache_flush();
-
         xfrm_pol_put(policy);
  }
  
@@ -661,10 +688,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
         }
         write_unlock_bh(&xfrm_policy_lock);
  
-       if (ret && delete) {
-               atomic_inc(&flow_cache_genid);
+       if (ret && delete)
                 xfrm_policy_kill(ret);
-       }
         return ret;
  }
  EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -703,10 +728,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
         }
         write_unlock_bh(&xfrm_policy_lock);
  
-       if (ret && delete) {
-               atomic_inc(&flow_cache_genid);
+       if (ret && delete)
                 xfrm_policy_kill(ret);
-       }
         return ret;
  }
  EXPORT_SYMBOL(xfrm_policy_byid);
@@ -822,7 +845,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
         }
         if (!cnt)
                 err = -ESRCH;
-       atomic_inc(&flow_cache_genid);
  out:
         write_unlock_bh(&xfrm_policy_lock);
         return err;
@@ -976,32 +998,35 @@ fail:
         return ret;
  }
  
-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
-                             u8 dir, void **objp, atomic_t **obj_refp)
+static struct flow_cache_object *
+xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+                  u8 dir, struct flow_cache_object *old_obj, void *ctx)
  {
         struct xfrm_policy *pol;
-       int err = 0;
+
+       if (old_obj)
+               xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
  
  #ifdef CONFIG_XFRM_SUB_POLICY
         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-       if (IS_ERR(pol)) {
-               err = PTR_ERR(pol);
-               pol = NULL;
-       }
-       if (pol || err)
-               goto end;
+       if (IS_ERR(pol))
+               return ERR_CAST(pol);
+       if (pol)
+               goto found;
  #endif
         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-       if (IS_ERR(pol)) {
-               err = PTR_ERR(pol);
-               pol = NULL;
-       }
-#ifdef CONFIG_XFRM_SUB_POLICY
-end:
-#endif
-       if ((*objp = (void *) pol) != NULL)
-               *obj_refp = &pol->refcnt;
-       return err;
+       if (IS_ERR(pol))
+               return ERR_CAST(pol);
+       if (pol)
+               goto found;
+       return NULL;
+
+found:
+       /* Resolver returns two references:
+        * one for cache and one for caller of flow_cache_lookup() */
+       xfrm_pol_hold(pol);
+
+       return &pol->flo;
  }
  
  static inline int policy_to_flow_dir(int dir)
@@ -1091,8 +1116,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
         pol = __xfrm_policy_unlink(pol, dir);
         write_unlock_bh(&xfrm_policy_lock);
         if (pol) {
-               if (dir < XFRM_POLICY_MAX)
-                       atomic_inc(&flow_cache_genid);
                 xfrm_policy_kill(pol);
                 return 0;
         }
@@ -1578,18 +1601,24 @@ restart:
         }
  
         if (!policy) {
+               struct flow_cache_object *flo;
+
                 /* To accelerate a bit...  */
                 if ((dst_orig->flags & DST_NOXFRM) ||
                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
                         goto nopol;
  
-               policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
-                                          dir, xfrm_policy_lookup);
-               err = PTR_ERR(policy);
-               if (IS_ERR(policy)) {
+               flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
+                                       dir, xfrm_policy_lookup, NULL);
+               err = PTR_ERR(flo);
+               if (IS_ERR(flo)) {
                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
                         goto dropdst;
                 }
+               if (flo)
+                       policy = container_of(flo, struct xfrm_policy, flo);
+               else
+                       policy = NULL;
         }
  
         if (!policy)
@@ -1939,9 +1968,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
                 }
         }
  
-       if (!pol)
-               pol = flow_cache_lookup(net, &fl, family, fl_dir,
-                                       xfrm_policy_lookup);
+       if (!pol) {
+               struct flow_cache_object *flo;
+
+               flo = flow_cache_lookup(net, &fl, family, fl_dir,
+                                       xfrm_policy_lookup, NULL);
+               if (IS_ERR_OR_NULL(flo))
+                       pol = ERR_CAST(flo);
+               else
+                       pol = container_of(flo, struct xfrm_policy, flo);
+       }
  
         if (IS_ERR(pol)) {
                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
author	Timo Teräs <timo.teras@iki.fi>
	Wed, 7 Apr 2010 00:30:04 +0000 (00:30 +0000)
committer	David S. Miller <davem@davemloft.net>
	Wed, 7 Apr 2010 10:43:18 +0000 (03:43 -0700)
include/net/flow.h		patch \| blob \| history
include/net/xfrm.h		patch \| blob \| history
net/core/flow.c		patch \| blob \| history
net/xfrm/xfrm_policy.c		patch \| blob \| history