netfilter: conntrack: refine gc worker heuristics

author Florian Westphal <fw@strlen.de>

Fri, 4 Nov 2016 15:54:58 +0000 (16:54 +0100)

committer Pablo Neira Ayuso <pablo@netfilter.org>

Tue, 8 Nov 2016 22:53:38 +0000 (23:53 +0100)
author Florian Westphal <fw@strlen.de>
Fri, 4 Nov 2016 15:54:58 +0000 (16:54 +0100)
committer Pablo Neira Ayuso <pablo@netfilter.org>
Tue, 8 Nov 2016 22:53:38 +0000 (23:53 +0100)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index df2f5a3901dfce580e051bf9a52d2e71da0f8e9b..0f87e5d21be7161f6d885fd3ab40f1e752e68a66 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
         struct delayed_work     dwork;
         u32                     last_bucket;
         bool                    exiting;
+       long                    next_gc_run;
  };
  
  static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
  static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
  static __read_mostly bool nf_conntrack_locks_all;
  
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
  #define GC_MAX_BUCKETS_DIV     64u
-#define GC_MAX_BUCKETS         8192u
-#define GC_INTERVAL            (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX                (2 * HZ)
+/* maximum conntracks to evict per gc run */
  #define GC_MAX_EVICTS          256u
  
  static struct conntrack_gc_work conntrack_gc_work;
@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
  static void gc_worker(struct work_struct *work)
  {
         unsigned int i, goal, buckets = 0, expired_count = 0;
-       unsigned long next_run = GC_INTERVAL;
-       unsigned int ratio, scanned = 0;
         struct conntrack_gc_work *gc_work;
+       unsigned int ratio, scanned = 0;
+       unsigned long next_run;
  
         gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
  
-       goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+       goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
         i = gc_work->last_bucket;
  
         do {
@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
         if (gc_work->exiting)
                 return;
  
+       /*
+        * Eviction will normally happen from the packet path, and not
+        * from this gc worker.
+        *
+        * This worker is only here to reap expired entries when system went
+        * idle after a busy period.
+        *
+        * The heuristics below are supposed to balance conflicting goals:
+        *
+        * 1. Minimize time until we notice a stale entry
+        * 2. Maximize scan intervals to not waste cycles
+        *
+        * Normally, expired_count will be 0, this increases the next_run time
+        * to priorize 2) above.
+        *
+        * As soon as a timed-out entry is found, move towards 1) and increase
+        * the scan frequency.
+        * In case we have lots of evictions next scan is done immediately.
+        */
         ratio = scanned ? expired_count * 100 / scanned : 0;
-       if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
+       if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+               gc_work->next_gc_run = 0;
                 next_run = 0;
+       } else if (expired_count) {
+               gc_work->next_gc_run /= 2U;
+               next_run = msecs_to_jiffies(1);
+       } else {
+               if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+                       gc_work->next_gc_run += msecs_to_jiffies(1);
+
+               next_run = gc_work->next_gc_run;
+       }
  
         gc_work->last_bucket = i;
-       schedule_delayed_work(&gc_work->dwork, next_run);
+       queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
  }
  
  static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
  {
         INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+       gc_work->next_gc_run = GC_INTERVAL_MAX;
         gc_work->exiting = false;
  }
  
@@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
         nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
  
         conntrack_gc_work_init(&conntrack_gc_work);
-       schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+       queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
  
         return 0;
author	Florian Westphal <fw@strlen.de>
	Fri, 4 Nov 2016 15:54:58 +0000 (16:54 +0100)
committer	Pablo Neira Ayuso <pablo@netfilter.org>
	Tue, 8 Nov 2016 22:53:38 +0000 (23:53 +0100)