Merge branch 'master' of /repos/git/net-next-2.6
[pandora-kernel.git] / net / netfilter / nf_conntrack_core.c
index 27a5ea6..1909311 100644 (file)
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -65,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
-static unsigned int nf_conntrack_hash_rnd __read_mostly;
+unsigned int nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
 {
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 static void death_by_timeout(unsigned long ul_conntrack)
 {
        struct nf_conn *ct = (void *)ul_conntrack;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
 
        if (!test_bit(IPS_DYING_BIT, &ct->status) &&
            unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
        struct nf_conn_help *help;
+       struct nf_conn_tstamp *tstamp;
        struct hlist_nulls_node *n;
        enum ip_conntrack_info ctinfo;
        struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        ct->timeout.expires += jiffies;
        add_timer(&ct->timeout);
        atomic_inc(&ct->ct_general.use);
-       set_bit(IPS_CONFIRMED_BIT, &ct->status);
+       ct->status |= IPS_CONFIRMED;
+
+       /* set conntrack timestamp, if enabled. */
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+               if (skb->tstamp.tv64 == 0)
+                       __net_timestamp((struct sk_buff *)skb);
 
+               tstamp->start = ktime_to_ns(skb->tstamp);
+       }
        /* Since the lookup is lockless, hash insertion must be done after
         * starting the timer and setting the CONFIRMED bit. The RCU barriers
         * guarantee that no other CPU can find the conntrack before the above
@@ -596,6 +611,21 @@ static noinline int early_drop(struct net *net, unsigned int hash)
        return dropped;
 }
 
+void init_nf_conntrack_hash_rnd(void)
+{
+       unsigned int rand;
+
+       /*
+        * Why not initialize nf_conntrack_rnd in a "init()" function ?
+        * Because there isn't enough entropy when system initializing,
+        * and we initialize it as late as possible.
+        */
+       do {
+               get_random_bytes(&rand, sizeof(rand));
+       } while (!rand);
+       cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(struct net *net, u16 zone,
                     const struct nf_conntrack_tuple *orig,
@@ -605,18 +635,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
        struct nf_conn *ct;
 
        if (unlikely(!nf_conntrack_hash_rnd)) {
-               unsigned int rand;
-
-               /*
-                * Why not initialize nf_conntrack_rnd in a "init()" function ?
-                * Because there isn't enough entropy when system initializing,
-                * and we initialize it as late as possible.
-                */
-               do {
-                       get_random_bytes(&rand, sizeof(rand));
-               } while (!rand);
-               cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
-
+               init_nf_conntrack_hash_rnd();
                /* recompute the hash as nf_conntrack_hash_rnd is initialized */
                hash = hash_conntrack_raw(orig, zone);
        }
@@ -651,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
         * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
         */
        memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-              sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+              offsetof(struct nf_conn, proto) -
+              offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
        spin_lock_init(&ct->lock);
        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
        ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -741,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        }
 
        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+       nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 
        ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
        nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1181,6 +1202,11 @@ struct __nf_ct_flush_report {
 static int kill_report(struct nf_conn *i, void *data)
 {
        struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(i);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
 
        /* If we fail to deliver the event, death_by_timeout() will retry */
        if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1197,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data)
        return 1;
 }
 
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
 {
-       if (vmalloced)
+       if (is_vmalloc_addr(hash))
                vfree(hash);
        else
                free_pages((unsigned long)hash,
@@ -1266,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
                goto i_see_dead_people;
        }
 
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
        nf_conntrack_ecache_fini(net);
        nf_conntrack_acct_fini(net);
        nf_conntrack_expect_fini(net);
@@ -1296,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net)
        }
 }
 
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
        struct hlist_nulls_head *hash;
        unsigned int nr_slots, i;
        size_t sz;
 
-       *vmalloced = 0;
-
        BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
        nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
        sz = nr_slots * sizeof(struct hlist_nulls_head);
        hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                        get_order(sz));
        if (!hash) {
-               *vmalloced = 1;
                printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
                hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                                 PAGE_KERNEL);
@@ -1326,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
-       int i, bucket, vmalloced, old_vmalloced;
+       int i, bucket;
        unsigned int hashsize, old_size;
        struct hlist_nulls_head *hash, *old_hash;
        struct nf_conntrack_tuple_hash *h;
@@ -1343,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
        if (!hashsize)
                return -EINVAL;
 
-       hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+       hash = nf_ct_alloc_hashtable(&hashsize, 1);
        if (!hash)
                return -ENOMEM;
 
@@ -1365,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
                }
        }
        old_size = init_net.ct.htable_size;
-       old_vmalloced = init_net.ct.hash_vmalloc;
        old_hash = init_net.ct.hash;
 
        init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-       init_net.ct.hash_vmalloc = vmalloced;
        init_net.ct.hash = hash;
        spin_unlock_bh(&nf_conntrack_lock);
 
-       nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+       nf_ct_free_hashtable(old_hash, old_size);
        return 0;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1486,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net)
        }
 
        net->ct.htable_size = nf_conntrack_htable_size;
-       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
-                                            &net->ct.hash_vmalloc, 1);
+       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
        if (!net->ct.hash) {
                ret = -ENOMEM;
                printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1499,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
        ret = nf_conntrack_acct_init(net);
        if (ret < 0)
                goto err_acct;
+       ret = nf_conntrack_tstamp_init(net);
+       if (ret < 0)
+               goto err_tstamp;
        ret = nf_conntrack_ecache_init(net);
        if (ret < 0)
                goto err_ecache;
@@ -1506,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net)
        return 0;
 
 err_ecache:
+       nf_conntrack_tstamp_fini(net);
+err_tstamp:
        nf_conntrack_acct_fini(net);
 err_acct:
        nf_conntrack_expect_fini(net);
 err_expect:
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
        kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache: