struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
-#define GC_MAX_BUCKETS 8192u
-#define GC_INTERVAL (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX (2 * HZ)
+/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
static struct conntrack_gc_work conntrack_gc_work;
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned long next_run = GC_INTERVAL;
- unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
+ unsigned int ratio, scanned = 0;
+ unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
do {
if (gc_work->exiting)
return;
+ /*
+ * Eviction will normally happen from the packet path, and not
+ * from this gc worker.
+ *
+ * This worker is only here to reap expired entries when system went
+ * idle after a busy period.
+ *
+ * The heuristics below are supposed to balance conflicting goals:
+ *
+ * 1. Minimize time until we notice a stale entry
+ * 2. Maximize scan intervals to not waste cycles
+ *
+ * Normally, expired_count will be 0, this increases the next_run time
+ * to priorize 2) above.
+ *
+ * As soon as a timed-out entry is found, move towards 1) and increase
+ * the scan frequency.
+ * In case we have lots of evictions next scan is done immediately.
+ */
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
+ if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+ gc_work->next_gc_run = 0;
next_run = 0;
+ } else if (expired_count) {
+ gc_work->next_gc_run /= 2U;
+ next_run = msecs_to_jiffies(1);
+ } else {
+ if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+ gc_work->next_gc_run += msecs_to_jiffies(1);
+
+ next_run = gc_work->next_gc_run;
+ }
gc_work->last_bucket = i;
- schedule_delayed_work(&gc_work->dwork, next_run);
+ queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
return 0;