This allows to validate the cached object before returning it.
It also allows to destruct object properly, if the last reference
was held in flow cache. This is also a prepartion for caching
bundles in the flow cache.
In return for virtualizing the methods, we save on:
- not having to regenerate the whole flow cache on policy removal:
each flow matching a killed policy gets refreshed as the getter
function notices it smartly.
- we do not have to call flow_cache_flush from policy gc, since the
flow cache now properly deletes the object if it had any references
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
struct net;
struct sock;
-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
- u8 dir, void **objp, atomic_t **obj_refp);
+struct flow_cache_ops;
+
+struct flow_cache_object {
+ const struct flow_cache_ops *ops;
+};
+
+struct flow_cache_ops {
+ struct flow_cache_object *(*get)(struct flow_cache_object *);
+ int (*check)(struct flow_cache_object *);
+ void (*delete)(struct flow_cache_object *);
+};
+
+typedef struct flow_cache_object *(*flow_resolve_t)(
+ struct net *net, struct flowi *key, u16 family,
+ u8 dir, struct flow_cache_object *oldobj, void *ctx);
+
+extern struct flow_cache_object *flow_cache_lookup(
+ struct net *net, struct flowi *key, u16 family,
+ u8 dir, flow_resolve_t resolver, void *ctx);
-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
- u8 dir, flow_resolve_t resolver);
extern void flow_cache_flush(void);
extern atomic_t flow_cache_genid;
#include <net/route.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
+#include <net/flow.h>
#include <linux/interrupt.h>
atomic_t refcnt;
struct timer_list timer;
+ struct flow_cache_object flo;
u32 priority;
u32 index;
struct xfrm_mark mark;
#include <linux/security.h>
struct flow_cache_entry {
- struct flow_cache_entry *next;
- u16 family;
- u8 dir;
- u32 genid;
- struct flowi key;
- void *object;
- atomic_t *object_ref;
+ struct flow_cache_entry *next;
+ u16 family;
+ u8 dir;
+ u32 genid;
+ struct flowi key;
+ struct flow_cache_object *object;
};
struct flow_cache_percpu {
- struct flow_cache_entry ** hash_table;
+ struct flow_cache_entry **hash_table;
int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
};
struct flow_flush_info {
- struct flow_cache * cache;
+ struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
struct flow_cache {
u32 hash_shift;
unsigned long order;
- struct flow_cache_percpu * percpu;
+ struct flow_cache_percpu *percpu;
struct notifier_block hotcpu_notifier;
int low_watermark;
int high_watermark;
add_timer(&fc->rnd_timer);
}
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+ if (atomic_read(&flow_cache_genid) != fle->genid)
+ return 0;
+ if (fle->object && !fle->object->ops->check(fle->object))
+ return 0;
+ return 1;
+}
+
static void flow_entry_kill(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
struct flow_cache_entry *fle)
{
if (fle->object)
- atomic_dec(fle->object_ref);
+ fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
fcp->hash_count--;
}
int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
- int k = 0;
+ int saved = 0;
flp = &fcp->hash_table[i];
- while ((fle = *flp) != NULL && k < shrink_to) {
- k++;
- flp = &fle->next;
- }
while ((fle = *flp) != NULL) {
- *flp = fle->next;
- flow_entry_kill(fc, fcp, fle);
+ if (saved < shrink_to &&
+ flow_entry_valid(fle)) {
+ saved++;
+ flp = &fle->next;
+ } else {
+ *flp = fle->next;
+ flow_entry_kill(fc, fcp, fle);
+ }
}
}
}
return 0;
}
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
- flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ flow_resolve_t resolver, void *ctx)
{
struct flow_cache *fc = &flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, **head;
+ struct flow_cache_object *flo;
unsigned int hash;
local_bh_disable();
fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
fle = NULL;
+ flo = NULL;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!fcp->hash_table)
if (fcp->hash_rnd_recalc)
flow_new_hash_rnd(fc, fcp);
- hash = flow_hash_code(fc, fcp, key);
+ hash = flow_hash_code(fc, fcp, key);
head = &fcp->hash_table[hash];
for (fle = *head; fle; fle = fle->next) {
if (fle->family == family &&
fle->dir == dir &&
- flow_key_compare(key, &fle->key) == 0) {
- if (fle->genid == atomic_read(&flow_cache_genid)) {
- void *ret = fle->object;
-
- if (ret)
- atomic_inc(fle->object_ref);
- local_bh_enable();
-
- return ret;
- }
+ flow_key_compare(key, &fle->key) == 0)
break;
- }
}
- if (!fle) {
+ if (unlikely(!fle)) {
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
fle->object = NULL;
fcp->hash_count++;
}
+ } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ flo = fle->object;
+ if (!flo)
+ goto ret_object;
+ flo = flo->ops->get(flo);
+ if (flo)
+ goto ret_object;
+ } else if (fle->object) {
+ flo = fle->object;
+ flo->ops->delete(flo);
+ fle->object = NULL;
}
nocache:
- {
- int err;
- void *obj;
- atomic_t *obj_ref;
-
- err = resolver(net, key, family, dir, &obj, &obj_ref);
-
- if (fle && !err) {
- fle->genid = atomic_read(&flow_cache_genid);
-
- if (fle->object)
- atomic_dec(fle->object_ref);
-
- fle->object = obj;
- fle->object_ref = obj_ref;
- if (obj)
- atomic_inc(fle->object_ref);
- }
- local_bh_enable();
-
- if (err)
- obj = ERR_PTR(err);
- return obj;
+ flo = NULL;
+ if (fle) {
+ flo = fle->object;
+ fle->object = NULL;
+ }
+ flo = resolver(net, key, family, dir, flo, ctx);
+ if (fle) {
+ fle->genid = atomic_read(&flow_cache_genid);
+ if (!IS_ERR(flo))
+ fle->object = flo;
+ else
+ fle->genid--;
+ } else {
+ if (flo && !IS_ERR(flo))
+ flo->ops->delete(flo);
}
+ret_object:
+ local_bh_enable();
+ return flo;
}
static void flow_cache_flush_tasklet(unsigned long data)
fle = fcp->hash_table[i];
for (; fle; fle = fle->next) {
- unsigned genid = atomic_read(&flow_cache_genid);
-
- if (!fle->object || fle->genid == genid)
+ if (flow_entry_valid(fle))
continue;
+ if (fle->object)
+ fle->object->ops->delete(fle->object);
fle->object = NULL;
- atomic_dec(fle->object_ref);
}
}
xfrm_pol_put(xp);
}
+static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
+{
+ struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+ if (unlikely(pol->walk.dead))
+ flo = NULL;
+ else
+ xfrm_pol_hold(pol);
+
+ return flo;
+}
+
+static int xfrm_policy_flo_check(struct flow_cache_object *flo)
+{
+ struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+ return !pol->walk.dead;
+}
+
+static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
+{
+ xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
+}
+
+static const struct flow_cache_ops xfrm_policy_fc_ops = {
+ .get = xfrm_policy_flo_get,
+ .check = xfrm_policy_flo_check,
+ .delete = xfrm_policy_flo_delete,
+};
/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
* SPD calls.
atomic_set(&policy->refcnt, 1);
setup_timer(&policy->timer, xfrm_policy_timer,
(unsigned long)policy);
+ policy->flo.ops = &xfrm_policy_fc_ops;
}
return policy;
}
if (del_timer(&policy->timer))
atomic_dec(&policy->refcnt);
- if (atomic_read(&policy->refcnt) > 1)
- flow_cache_flush();
-
xfrm_pol_put(policy);
}
}
write_unlock_bh(&xfrm_policy_lock);
- if (ret && delete) {
- atomic_inc(&flow_cache_genid);
+ if (ret && delete)
xfrm_policy_kill(ret);
- }
return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
}
write_unlock_bh(&xfrm_policy_lock);
- if (ret && delete) {
- atomic_inc(&flow_cache_genid);
+ if (ret && delete)
xfrm_policy_kill(ret);
- }
return ret;
}
EXPORT_SYMBOL(xfrm_policy_byid);
}
if (!cnt)
err = -ESRCH;
- atomic_inc(&flow_cache_genid);
out:
write_unlock_bh(&xfrm_policy_lock);
return err;
return ret;
}
-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
- u8 dir, void **objp, atomic_t **obj_refp)
+static struct flow_cache_object *
+xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+ u8 dir, struct flow_cache_object *old_obj, void *ctx)
{
struct xfrm_policy *pol;
- int err = 0;
+
+ if (old_obj)
+ xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
#ifdef CONFIG_XFRM_SUB_POLICY
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
- if (IS_ERR(pol)) {
- err = PTR_ERR(pol);
- pol = NULL;
- }
- if (pol || err)
- goto end;
+ if (IS_ERR(pol))
+ return ERR_CAST(pol);
+ if (pol)
+ goto found;
#endif
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
- if (IS_ERR(pol)) {
- err = PTR_ERR(pol);
- pol = NULL;
- }
-#ifdef CONFIG_XFRM_SUB_POLICY
-end:
-#endif
- if ((*objp = (void *) pol) != NULL)
- *obj_refp = &pol->refcnt;
- return err;
+ if (IS_ERR(pol))
+ return ERR_CAST(pol);
+ if (pol)
+ goto found;
+ return NULL;
+
+found:
+ /* Resolver returns two references:
+ * one for cache and one for caller of flow_cache_lookup() */
+ xfrm_pol_hold(pol);
+
+ return &pol->flo;
}
static inline int policy_to_flow_dir(int dir)
pol = __xfrm_policy_unlink(pol, dir);
write_unlock_bh(&xfrm_policy_lock);
if (pol) {
- if (dir < XFRM_POLICY_MAX)
- atomic_inc(&flow_cache_genid);
xfrm_policy_kill(pol);
return 0;
}
}
if (!policy) {
+ struct flow_cache_object *flo;
+
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
- policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
- dir, xfrm_policy_lookup);
- err = PTR_ERR(policy);
- if (IS_ERR(policy)) {
+ flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
+ dir, xfrm_policy_lookup, NULL);
+ err = PTR_ERR(flo);
+ if (IS_ERR(flo)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
goto dropdst;
}
+ if (flo)
+ policy = container_of(flo, struct xfrm_policy, flo);
+ else
+ policy = NULL;
}
if (!policy)
}
}
- if (!pol)
- pol = flow_cache_lookup(net, &fl, family, fl_dir,
- xfrm_policy_lookup);
+ if (!pol) {
+ struct flow_cache_object *flo;
+
+ flo = flow_cache_lookup(net, &fl, family, fl_dir,
+ xfrm_policy_lookup, NULL);
+ if (IS_ERR_OR_NULL(flo))
+ pol = ERR_CAST(flo);
+ else
+ pol = container_of(flo, struct xfrm_policy, flo);
+ }
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);