netfilter: xt_hashlimit: perform garbage collection from process context
authorEric Dumazet <edumazet@google.com>
Thu, 24 Jul 2014 04:36:50 +0000 (06:36 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Thu, 24 Jul 2014 11:07:25 +0000 (13:07 +0200)
xt_hashlimit cannot be used with large hash tables, because garbage
collector is run from a timer. If table is really big, its possible
to hold cpu for more than 500 msec, which is unacceptable.

Switch to a work queue, and use proper scheduling points to remove
latencies spikes.

Later, we also could switch to a smoother garbage collection done
at lookup time, one bucket at a time...

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Patrick McHardy <kaber@trash.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
net/netfilter/xt_hashlimit.c

index a3910fc..47dc683 100644 (file)
@@ -104,7 +104,7 @@ struct xt_hashlimit_htable {
        spinlock_t lock;                /* lock for list_head */
        u_int32_t rnd;                  /* random seed for hash */
        unsigned int count;             /* number entries in table */
-       struct timer_list timer;        /* timer for gc */
+       struct delayed_work gc_work;
 
        /* seq_file stuff */
        struct proc_dir_entry *pde;
@@ -213,7 +213,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
        call_rcu_bh(&ent->rcu, dsthash_free_rcu);
        ht->count--;
 }
-static void htable_gc(unsigned long htlong);
+static void htable_gc(struct work_struct *work);
 
 static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
                         u_int8_t family)
@@ -273,9 +273,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
        }
        hinfo->net = net;
 
-       setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
-       hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
-       add_timer(&hinfo->timer);
+       INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc);
+       queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work,
+                          msecs_to_jiffies(hinfo->cfg.gc_interval));
 
        hlist_add_head(&hinfo->node, &hashlimit_net->htables);
 
@@ -300,29 +300,30 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
 {
        unsigned int i;
 
-       /* lock hash table and iterate over it */
-       spin_lock_bh(&ht->lock);
        for (i = 0; i < ht->cfg.size; i++) {
                struct dsthash_ent *dh;
                struct hlist_node *n;
+
+               spin_lock_bh(&ht->lock);
                hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
                        if ((*select)(ht, dh))
                                dsthash_free(ht, dh);
                }
+               spin_unlock_bh(&ht->lock);
+               cond_resched();
        }
-       spin_unlock_bh(&ht->lock);
 }
 
-/* hash table garbage collector, run by timer */
-static void htable_gc(unsigned long htlong)
+static void htable_gc(struct work_struct *work)
 {
-       struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
+       struct xt_hashlimit_htable *ht;
+
+       ht = container_of(work, struct xt_hashlimit_htable, gc_work.work);
 
        htable_selective_cleanup(ht, select_gc);
 
-       /* re-add the timer accordingly */
-       ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
-       add_timer(&ht->timer);
+       queue_delayed_work(system_power_efficient_wq,
+                          &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval));
 }
 
 static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
@@ -341,7 +342,7 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
 
 static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 {
-       del_timer_sync(&hinfo->timer);
+       cancel_delayed_work_sync(&hinfo->gc_work);
        htable_remove_proc_entry(hinfo);
        htable_selective_cleanup(hinfo, select_all);
        kfree(hinfo->name);