netfilter: x_tables: dont block BH while reading counters
authorEric Dumazet <eric.dumazet@gmail.com>
Mon, 10 Jan 2011 19:11:38 +0000 (20:11 +0100)
committerPablo Neira Ayuso <pablo@netfilter.org>
Mon, 10 Jan 2011 19:11:38 +0000 (20:11 +0100)
Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.

Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).

This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.

Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
include/linux/netfilter/x_tables.h
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/x_tables.c

index 742bec0..6712e71 100644 (file)
@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
  *  necessary for reading the counters.
  */
 struct xt_info_lock {
-       spinlock_t lock;
+       seqlock_t lock;
        unsigned char readers;
 };
 DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
        local_bh_disable();
        lock = &__get_cpu_var(xt_info_locks);
        if (likely(!lock->readers++))
-               spin_lock(&lock->lock);
+               write_seqlock(&lock->lock);
 }
 
 static inline void xt_info_rdunlock_bh(void)
@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
        struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
 
        if (likely(!--lock->readers))
-               spin_unlock(&lock->lock);
+               write_sequnlock(&lock->lock);
        local_bh_enable();
 }
 
@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
  */
 static inline void xt_info_wrlock(unsigned int cpu)
 {
-       spin_lock(&per_cpu(xt_info_locks, cpu).lock);
+       write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 static inline void xt_info_wrunlock(unsigned int cpu)
 {
-       spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
+       write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 /*
index 3fac340..e855fff 100644 (file)
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
        struct arpt_entry *iter;
        unsigned int cpu;
        unsigned int i;
-       unsigned int curcpu = get_cpu();
-
-       /* Instead of clearing (by a previous call to memset())
-        * the counters and using adds, we set the counters
-        * with data used by 'current' CPU
-        *
-        * Bottom half has to be disabled to prevent deadlock
-        * if new softirq were to run and call ipt_do_table
-        */
-       local_bh_disable();
-       i = 0;
-       xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-               SET_COUNTER(counters[i], iter->counters.bcnt,
-                           iter->counters.pcnt);
-               ++i;
-       }
-       local_bh_enable();
-       /* Processing counters from other cpus, we can let bottom half enabled,
-        * (preemption is disabled)
-        */
 
        for_each_possible_cpu(cpu) {
-               if (cpu == curcpu)
-                       continue;
+               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
                i = 0;
-               local_bh_disable();
-               xt_info_wrlock(cpu);
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
-                       ADD_COUNTER(counters[i], iter->counters.bcnt,
-                                   iter->counters.pcnt);
+                       u64 bcnt, pcnt;
+                       unsigned int start;
+
+                       do {
+                               start = read_seqbegin(lock);
+                               bcnt = iter->counters.bcnt;
+                               pcnt = iter->counters.pcnt;
+                       } while (read_seqretry(lock, start));
+
+                       ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i;
                }
-               xt_info_wrunlock(cpu);
-               local_bh_enable();
        }
-       put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
         * about).
         */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc(countersize);
+       counters = vzalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
        struct arpt_entry *iter;
 
        ret = 0;
-       counters = vmalloc(num_counters * sizeof(struct xt_counters));
+       counters = vzalloc(num_counters * sizeof(struct xt_counters));
        if (!counters) {
                ret = -ENOMEM;
                goto out;
index a846d63..652efea 100644 (file)
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
        struct ipt_entry *iter;
        unsigned int cpu;
        unsigned int i;
-       unsigned int curcpu = get_cpu();
-
-       /* Instead of clearing (by a previous call to memset())
-        * the counters and using adds, we set the counters
-        * with data used by 'current' CPU.
-        *
-        * Bottom half has to be disabled to prevent deadlock
-        * if new softirq were to run and call ipt_do_table
-        */
-       local_bh_disable();
-       i = 0;
-       xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-               SET_COUNTER(counters[i], iter->counters.bcnt,
-                           iter->counters.pcnt);
-               ++i;
-       }
-       local_bh_enable();
-       /* Processing counters from other cpus, we can let bottom half enabled,
-        * (preemption is disabled)
-        */
 
        for_each_possible_cpu(cpu) {
-               if (cpu == curcpu)
-                       continue;
+               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
                i = 0;
-               local_bh_disable();
-               xt_info_wrlock(cpu);
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
-                       ADD_COUNTER(counters[i], iter->counters.bcnt,
-                                   iter->counters.pcnt);
+                       u64 bcnt, pcnt;
+                       unsigned int start;
+
+                       do {
+                               start = read_seqbegin(lock);
+                               bcnt = iter->counters.bcnt;
+                               pcnt = iter->counters.pcnt;
+                       } while (read_seqretry(lock, start));
+
+                       ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i; /* macro does multi eval of i */
                }
-               xt_info_wrunlock(cpu);
-               local_bh_enable();
        }
-       put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
           (other than comefrom, which userspace doesn't care
           about). */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc(countersize);
+       counters = vzalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        struct ipt_entry *iter;
 
        ret = 0;
-       counters = vmalloc(num_counters * sizeof(struct xt_counters));
+       counters = vzalloc(num_counters * sizeof(struct xt_counters));
        if (!counters) {
                ret = -ENOMEM;
                goto out;
index 4555823..7d227c6 100644 (file)
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
        struct ip6t_entry *iter;
        unsigned int cpu;
        unsigned int i;
-       unsigned int curcpu = get_cpu();
-
-       /* Instead of clearing (by a previous call to memset())
-        * the counters and using adds, we set the counters
-        * with data used by 'current' CPU
-        *
-        * Bottom half has to be disabled to prevent deadlock
-        * if new softirq were to run and call ipt_do_table
-        */
-       local_bh_disable();
-       i = 0;
-       xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-               SET_COUNTER(counters[i], iter->counters.bcnt,
-                           iter->counters.pcnt);
-               ++i;
-       }
-       local_bh_enable();
-       /* Processing counters from other cpus, we can let bottom half enabled,
-        * (preemption is disabled)
-        */
 
        for_each_possible_cpu(cpu) {
-               if (cpu == curcpu)
-                       continue;
+               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
                i = 0;
-               local_bh_disable();
-               xt_info_wrlock(cpu);
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
-                       ADD_COUNTER(counters[i], iter->counters.bcnt,
-                                   iter->counters.pcnt);
+                       u64 bcnt, pcnt;
+                       unsigned int start;
+
+                       do {
+                               start = read_seqbegin(lock);
+                               bcnt = iter->counters.bcnt;
+                               pcnt = iter->counters.pcnt;
+                       } while (read_seqretry(lock, start));
+
+                       ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i;
                }
-               xt_info_wrunlock(cpu);
-               local_bh_enable();
        }
-       put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
           (other than comefrom, which userspace doesn't care
           about). */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc(countersize);
+       counters = vzalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        struct ip6t_entry *iter;
 
        ret = 0;
-       counters = vmalloc(num_counters * sizeof(struct xt_counters));
+       counters = vzalloc(num_counters * sizeof(struct xt_counters));
        if (!counters) {
                ret = -ENOMEM;
                goto out;
index 8046350..c942376 100644 (file)
@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
 
        for_each_possible_cpu(i) {
                struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-               spin_lock_init(&lock->lock);
+
+               seqlock_init(&lock->lock);
                lock->readers = 0;
        }