If we have an output route that lacks nexthop exceptions, we can cache
it in the FIB info nexthop.
Such routes will have DST_HOST cleared because such routes refer to a
family of destinations, rather than just one.
The sequence of the handling of exceptions during route lookup is
adjusted to make the logic work properly.
Before we allocate the route, we lookup the exception.
Then we know if we will cache this route or not, and therefore whether
DST_HOST should be set on the allocated route.
Then we use DST_HOST to key off whether we should store the resulting
route, during rt_set_nexthop(), in the FIB nexthop cache.
With help from Eric Dumazet.
Signed-off-by: David S. Miller <davem@davemloft.net>
struct fib_nh_exception {
struct fib_nh_exception __rcu *fnhe_next;
struct fib_nh_exception {
struct fib_nh_exception __rcu *fnhe_next;
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
+ struct rtable *nh_rth_output;
struct fnhe_hash_bucket *nh_exceptions;
};
struct fnhe_hash_bucket *nh_exceptions;
};
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
+ if (nexthop_nh->nh_rth_output)
+ dst_release(&nexthop_nh->nh_rth_output->dst);
} endfor_nexthops(fi);
release_net(fi->fib_net);
} endfor_nexthops(fi);
release_net(fi->fib_net);
-static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
- struct fib_info *fi)
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
{
if (fi->fib_metrics != (u32 *) dst_default_metrics) {
rt->fi = fi;
{
if (fi->fib_metrics != (u32 *) dst_default_metrics) {
rt->fi = fi;
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
struct fib_nh_exception *fnhe;
u32 hval;
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
struct fib_nh_exception *fnhe;
u32 hval;
+ if (!hash)
+ return NULL;
+
hval = fnhe_hashfun(daddr);
hval = fnhe_hashfun(daddr);
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
- if (daddr != fnhe_daddr)
- continue;
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (fnhe->fnhe_daddr == daddr)
+ return fnhe;
+ }
+ return NULL;
+}
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
- }
- }
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
+static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+ __be32 daddr)
+{
+ __be32 fnhe_daddr, gw;
+ unsigned long expires;
+ unsigned int seq;
+ u32 pmtu;
+
+restart:
+ seq = read_seqbegin(&fnhe_seqlock);
+ fnhe_daddr = fnhe->fnhe_daddr;
+ gw = fnhe->fnhe_gw;
+ pmtu = fnhe->fnhe_pmtu;
+ expires = fnhe->fnhe_expires;
+ if (read_seqretry(&fnhe_seqlock, seq))
+ goto restart;
+
+ if (daddr != fnhe_daddr)
+ return;
+
+ if (pmtu) {
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = pmtu;
+ dst_set_expires(&rt->dst, diff);
- fnhe->fnhe_stamp = jiffies;
- break;
+ }
+ if (gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = gw;
+ }
+ fnhe->fnhe_stamp = jiffies;
+}
+
+static inline void rt_release_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+ dst_release(dst);
+}
+
+static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+{
+ struct rtable *orig, *prev, **p = &nh->nh_rth_output;
+
+ orig = *p;
+
+ prev = cmpxchg(p, orig, rt);
+ if (prev == orig) {
+ dst_clone(&rt->dst);
+ if (orig)
+ call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu);
-static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
+static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
const struct fib_result *res,
const struct fib_result *res,
+ struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
if (fi) {
struct fib_info *fi, u16 type, u32 itag)
{
if (fi) {
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(nh->nh_exceptions))
- rt_bind_exception(rt, nh, fl4->daddr);
- rt_init_metrics(rt, fl4, fi);
+ if (unlikely(fnhe))
+ rt_bind_exception(rt, fnhe, daddr);
+ rt_init_metrics(rt, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_ROUTE_CLASSID
- rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+ rt->dst.tclassid = nh->nh_tclassid;
+ if (!(rt->dst.flags & DST_HOST) &&
+ rt_is_output_route(rt))
+ rt_cache_route(nh, rt);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
}
#ifdef CONFIG_IP_ROUTE_CLASSID
}
static struct rtable *rt_dst_alloc(struct net_device *dev,
}
static struct rtable *rt_dst_alloc(struct net_device *dev,
- bool nopolicy, bool noxfrm)
+ bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- DST_HOST | DST_NOCACHE |
+ (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
goto e_err;
}
rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
goto e_err;
}
rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
+ struct fib_nh_exception *fnhe;
struct rtable *rth;
int err;
struct in_device *out_dev;
struct rtable *rth;
int err;
struct in_device *out_dev;
+ fnhe = NULL;
+ if (res->fi)
+ fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+
rth = rt_dst_alloc(out_dev->dev,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
rth = rt_dst_alloc(out_dev->dev,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(out_dev, NOXFRM));
+ IN_DEV_CONF_GET(out_dev, NOXFRM), false);
if (!rth) {
err = -ENOBUFS;
goto cleanup;
if (!rth) {
err = -ENOBUFS;
goto cleanup;
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
+ rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
local_input:
rth = rt_dst_alloc(net->loopback_dev,
local_input:
rth = rt_dst_alloc(net->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
unsigned int flags)
{
struct fib_info *fi = res->fi;
unsigned int flags)
{
struct fib_info *fi = res->fi;
+ struct fib_nh_exception *fnhe;
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
+ fnhe = NULL;
+ if (fi) {
+ fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ if (!fnhe) {
+ rth = FIB_RES_NH(*res).nh_rth_output;
+ if (rth &&
+ rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) {
+ dst_use(&rth->dst, jiffies);
+ return rth;
+ }
+ }
+ }
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(in_dev, NOXFRM));
+ IN_DEV_CONF_GET(in_dev, NOXFRM),
+ fi && !fnhe);
if (!rth)
return ERR_PTR(-ENOBUFS);
if (!rth)
return ERR_PTR(-ENOBUFS);
- rt_set_nexthop(rth, fl4, res, fi, type, 0);
+ rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
rth->dst.flags |= DST_NOCACHE;
if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
rth->dst.flags |= DST_NOCACHE;