1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
24 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
27 #include <linux/config.h>
28 #include <linux/types.h>
29 #include <linux/netfilter.h>
30 #include <linux/module.h>
31 #include <linux/skbuff.h>
32 #include <linux/proc_fs.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stddef.h>
35 #include <linux/slab.h>
36 #include <linux/random.h>
37 #include <linux/jhash.h>
38 #include <linux/err.h>
39 #include <linux/percpu.h>
40 #include <linux/moduleparam.h>
41 #include <linux/notifier.h>
42 #include <linux/kernel.h>
43 #include <linux/netdevice.h>
44 #include <linux/socket.h>
46 /* This rwlock protects the main hash table, protocol/helper/expected
47 registrations, conntrack timers*/
48 #define ASSERT_READ_LOCK(x)
49 #define ASSERT_WRITE_LOCK(x)
51 #include <net/netfilter/nf_conntrack.h>
52 #include <net/netfilter/nf_conntrack_l3proto.h>
53 #include <net/netfilter/nf_conntrack_protocol.h>
54 #include <net/netfilter/nf_conntrack_helper.h>
55 #include <net/netfilter/nf_conntrack_core.h>
56 #include <linux/netfilter_ipv4/listhelp.h>
58 #define NF_CONNTRACK_VERSION "0.4.1"
63 #define DEBUGP(format, args...)
66 DEFINE_RWLOCK(nf_conntrack_lock);
68 /* nf_conntrack_standalone needs this */
69 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
71 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
72 LIST_HEAD(nf_conntrack_expect_list);
73 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
74 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
75 static LIST_HEAD(helpers);
76 unsigned int nf_conntrack_htable_size = 0;
78 struct list_head *nf_conntrack_hash;
79 static kmem_cache_t *nf_conntrack_expect_cachep;
80 struct nf_conn nf_conntrack_untracked;
81 unsigned int nf_ct_log_invalid;
82 static LIST_HEAD(unconfirmed);
83 static int nf_conntrack_vmalloc;
85 static unsigned int nf_conntrack_next_id = 1;
86 static unsigned int nf_conntrack_expect_next_id = 1;
87 #ifdef CONFIG_NF_CONNTRACK_EVENTS
88 struct notifier_block *nf_conntrack_chain;
89 struct notifier_block *nf_conntrack_expect_chain;
91 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
93 /* deliver cached events and clear cache entry - must be called with locally
94 * disabled softirqs */
96 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
98 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
99 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
101 notifier_call_chain(&nf_conntrack_chain, ecache->events,
105 nf_ct_put(ecache->ct);
109 /* Deliver all cached events for a particular conntrack. This is called
110 * by code prior to async packet handling for freeing the skb */
111 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
113 struct nf_conntrack_ecache *ecache;
116 ecache = &__get_cpu_var(nf_conntrack_ecache);
117 if (ecache->ct == ct)
118 __nf_ct_deliver_cached_events(ecache);
122 /* Deliver cached events for old pending events, if current conntrack != old */
123 void __nf_ct_event_cache_init(struct nf_conn *ct)
125 struct nf_conntrack_ecache *ecache;
127 /* take care of delivering potentially old events */
128 ecache = &__get_cpu_var(nf_conntrack_ecache);
129 BUG_ON(ecache->ct == ct);
131 __nf_ct_deliver_cached_events(ecache);
132 /* initialize for this conntrack/packet */
134 nf_conntrack_get(&ct->ct_general);
137 /* flush the event cache - touches other CPU's data and must not be called
138 * while packets are still passing through the code */
139 static void nf_ct_event_cache_flush(void)
141 struct nf_conntrack_ecache *ecache;
145 ecache = &per_cpu(nf_conntrack_ecache, cpu);
147 nf_ct_put(ecache->ct);
151 static inline void nf_ct_event_cache_flush(void) {}
152 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
154 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
155 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
158 * This scheme offers various size of "struct nf_conn" dependent on
159 * features(helper, nat, ...)
162 #define NF_CT_FEATURES_NAMELEN 256
164 /* name of slab cache. printed in /proc/slabinfo */
167 /* size of slab cache */
170 /* slab cache pointer */
171 kmem_cache_t *cachep;
173 /* allocated slab cache + modules which uses this slab cache */
177 int (*init_conntrack)(struct nf_conn *, u_int32_t);
179 } nf_ct_cache[NF_CT_F_NUM];
181 /* protect members of nf_ct_cache except of "use" */
182 DEFINE_RWLOCK(nf_ct_cache_lock);
184 /* This avoids calling kmem_cache_create() with same name simultaneously */
185 DECLARE_MUTEX(nf_ct_cache_mutex);
187 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
188 struct nf_conntrack_protocol *
189 __nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
191 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
192 return &nf_conntrack_generic_protocol;
194 return nf_ct_protos[l3proto][protocol];
197 /* this is guaranteed to always return a valid protocol helper, since
198 * it falls back to generic_protocol */
199 struct nf_conntrack_protocol *
200 nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
202 struct nf_conntrack_protocol *p;
205 p = __nf_ct_proto_find(l3proto, protocol);
207 if (!try_module_get(p->me))
208 p = &nf_conntrack_generic_protocol;
215 void nf_ct_proto_put(struct nf_conntrack_protocol *p)
220 struct nf_conntrack_l3proto *
221 nf_ct_l3proto_find_get(u_int16_t l3proto)
223 struct nf_conntrack_l3proto *p;
226 p = __nf_ct_l3proto_find(l3proto);
228 if (!try_module_get(p->me))
229 p = &nf_conntrack_generic_l3proto;
236 void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
241 static int nf_conntrack_hash_rnd_initted;
242 static unsigned int nf_conntrack_hash_rnd;
244 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
245 unsigned int size, unsigned int rnd)
248 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
249 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
250 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
251 (tuple->src.u.all << 16) | tuple->dst.u.all);
253 return jhash_2words(a, b, rnd) % size;
256 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
258 return __hash_conntrack(tuple, nf_conntrack_htable_size,
259 nf_conntrack_hash_rnd);
262 /* Initialize "struct nf_conn" which has spaces for helper */
264 init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features)
267 conntrack->help = (union nf_conntrack_help *)
268 (((unsigned long)conntrack->data
269 + (__alignof__(union nf_conntrack_help) - 1))
270 & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1))));
274 int nf_conntrack_register_cache(u_int32_t features, const char *name,
276 int (*init)(struct nf_conn *, u_int32_t))
280 kmem_cache_t *cachep;
282 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
283 features, name, size);
285 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
286 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
291 down(&nf_ct_cache_mutex);
293 write_lock_bh(&nf_ct_cache_lock);
294 /* e.g: multiple helpers are loaded */
295 if (nf_ct_cache[features].use > 0) {
296 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
297 if ((!strncmp(nf_ct_cache[features].name, name,
298 NF_CT_FEATURES_NAMELEN))
299 && nf_ct_cache[features].size == size
300 && nf_ct_cache[features].init_conntrack == init) {
301 DEBUGP("nf_conntrack_register_cache: reusing.\n");
302 nf_ct_cache[features].use++;
307 write_unlock_bh(&nf_ct_cache_lock);
308 up(&nf_ct_cache_mutex);
311 write_unlock_bh(&nf_ct_cache_lock);
314 * The memory space for name of slab cache must be alive until
315 * cache is destroyed.
317 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
318 if (cache_name == NULL) {
319 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
324 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
325 >= NF_CT_FEATURES_NAMELEN) {
326 printk("nf_conntrack_register_cache: name too long\n");
331 cachep = kmem_cache_create(cache_name, size, 0, 0,
334 printk("nf_conntrack_register_cache: Can't create slab cache "
335 "for the features = 0x%x\n", features);
340 write_lock_bh(&nf_ct_cache_lock);
341 nf_ct_cache[features].use = 1;
342 nf_ct_cache[features].size = size;
343 nf_ct_cache[features].init_conntrack = init;
344 nf_ct_cache[features].cachep = cachep;
345 nf_ct_cache[features].name = cache_name;
346 write_unlock_bh(&nf_ct_cache_lock);
353 up(&nf_ct_cache_mutex);
357 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
358 void nf_conntrack_unregister_cache(u_int32_t features)
360 kmem_cache_t *cachep;
364 * This assures that kmem_cache_create() isn't called before destroying
367 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
368 down(&nf_ct_cache_mutex);
370 write_lock_bh(&nf_ct_cache_lock);
371 if (--nf_ct_cache[features].use > 0) {
372 write_unlock_bh(&nf_ct_cache_lock);
373 up(&nf_ct_cache_mutex);
376 cachep = nf_ct_cache[features].cachep;
377 name = nf_ct_cache[features].name;
378 nf_ct_cache[features].cachep = NULL;
379 nf_ct_cache[features].name = NULL;
380 nf_ct_cache[features].init_conntrack = NULL;
381 nf_ct_cache[features].size = 0;
382 write_unlock_bh(&nf_ct_cache_lock);
386 kmem_cache_destroy(cachep);
389 up(&nf_ct_cache_mutex);
393 nf_ct_get_tuple(const struct sk_buff *skb,
395 unsigned int dataoff,
398 struct nf_conntrack_tuple *tuple,
399 const struct nf_conntrack_l3proto *l3proto,
400 const struct nf_conntrack_protocol *protocol)
402 NF_CT_TUPLE_U_BLANK(tuple);
404 tuple->src.l3num = l3num;
405 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
408 tuple->dst.protonum = protonum;
409 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
411 return protocol->pkt_to_tuple(skb, dataoff, tuple);
415 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
416 const struct nf_conntrack_tuple *orig,
417 const struct nf_conntrack_l3proto *l3proto,
418 const struct nf_conntrack_protocol *protocol)
420 NF_CT_TUPLE_U_BLANK(inverse);
422 inverse->src.l3num = orig->src.l3num;
423 if (l3proto->invert_tuple(inverse, orig) == 0)
426 inverse->dst.dir = !orig->dst.dir;
428 inverse->dst.protonum = orig->dst.protonum;
429 return protocol->invert_tuple(inverse, orig);
432 /* nf_conntrack_expect helper functions */
433 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
435 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
436 NF_CT_ASSERT(!timer_pending(&exp->timeout));
437 list_del(&exp->list);
438 NF_CT_STAT_INC(expect_delete);
439 exp->master->expecting--;
440 nf_conntrack_expect_put(exp);
443 static void expectation_timed_out(unsigned long ul_expect)
445 struct nf_conntrack_expect *exp = (void *)ul_expect;
447 write_lock_bh(&nf_conntrack_lock);
448 nf_ct_unlink_expect(exp);
449 write_unlock_bh(&nf_conntrack_lock);
450 nf_conntrack_expect_put(exp);
453 struct nf_conntrack_expect *
454 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
456 struct nf_conntrack_expect *i;
458 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
459 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
467 /* Just find a expectation corresponding to a tuple. */
468 struct nf_conntrack_expect *
469 nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
471 struct nf_conntrack_expect *i;
473 read_lock_bh(&nf_conntrack_lock);
474 i = __nf_conntrack_expect_find(tuple);
475 read_unlock_bh(&nf_conntrack_lock);
480 /* If an expectation for this connection is found, it gets delete from
481 * global list then returned. */
482 static struct nf_conntrack_expect *
483 find_expectation(const struct nf_conntrack_tuple *tuple)
485 struct nf_conntrack_expect *i;
487 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
488 /* If master is not in hash table yet (ie. packet hasn't left
489 this machine yet), how can other end know about expected?
490 Hence these are not the droids you are looking for (if
491 master ct never got confirmed, we'd hold a reference to it
492 and weird things would happen to future packets). */
493 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
494 && nf_ct_is_confirmed(i->master)) {
495 if (i->flags & NF_CT_EXPECT_PERMANENT) {
498 } else if (del_timer(&i->timeout)) {
499 nf_ct_unlink_expect(i);
507 /* delete all expectations for this conntrack */
508 void nf_ct_remove_expectations(struct nf_conn *ct)
510 struct nf_conntrack_expect *i, *tmp;
512 /* Optimization: most connection never expect any others. */
513 if (ct->expecting == 0)
516 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
517 if (i->master == ct && del_timer(&i->timeout)) {
518 nf_ct_unlink_expect(i);
519 nf_conntrack_expect_put(i);
525 clean_from_lists(struct nf_conn *ct)
529 DEBUGP("clean_from_lists(%p)\n", ct);
530 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
532 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
533 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
534 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
535 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
537 /* Destroy all pending expectations */
538 nf_ct_remove_expectations(ct);
542 destroy_conntrack(struct nf_conntrack *nfct)
544 struct nf_conn *ct = (struct nf_conn *)nfct;
545 struct nf_conntrack_l3proto *l3proto;
546 struct nf_conntrack_protocol *proto;
548 DEBUGP("destroy_conntrack(%p)\n", ct);
549 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
550 NF_CT_ASSERT(!timer_pending(&ct->timeout));
552 nf_conntrack_event(IPCT_DESTROY, ct);
553 set_bit(IPS_DYING_BIT, &ct->status);
555 /* To make sure we don't get any weird locking issues here:
556 * destroy_conntrack() MUST NOT be called with a write lock
557 * to nf_conntrack_lock!!! -HW */
558 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
559 if (l3proto && l3proto->destroy)
560 l3proto->destroy(ct);
562 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
563 if (proto && proto->destroy)
566 if (nf_conntrack_destroyed)
567 nf_conntrack_destroyed(ct);
569 write_lock_bh(&nf_conntrack_lock);
570 /* Expectations will have been removed in clean_from_lists,
571 * except TFTP can create an expectation on the first packet,
572 * before connection is in the list, so we need to clean here,
574 nf_ct_remove_expectations(ct);
576 /* We overload first tuple to link into unconfirmed list. */
577 if (!nf_ct_is_confirmed(ct)) {
578 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
579 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
582 NF_CT_STAT_INC(delete);
583 write_unlock_bh(&nf_conntrack_lock);
586 nf_ct_put(ct->master);
588 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
589 nf_conntrack_free(ct);
592 static void death_by_timeout(unsigned long ul_conntrack)
594 struct nf_conn *ct = (void *)ul_conntrack;
596 write_lock_bh(&nf_conntrack_lock);
597 /* Inside lock so preempt is disabled on module removal path.
598 * Otherwise we can get spurious warnings. */
599 NF_CT_STAT_INC(delete_list);
600 clean_from_lists(ct);
601 write_unlock_bh(&nf_conntrack_lock);
606 conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
607 const struct nf_conntrack_tuple *tuple,
608 const struct nf_conn *ignored_conntrack)
610 ASSERT_READ_LOCK(&nf_conntrack_lock);
611 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
612 && nf_ct_tuple_equal(tuple, &i->tuple);
615 struct nf_conntrack_tuple_hash *
616 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
617 const struct nf_conn *ignored_conntrack)
619 struct nf_conntrack_tuple_hash *h;
620 unsigned int hash = hash_conntrack(tuple);
622 ASSERT_READ_LOCK(&nf_conntrack_lock);
623 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
624 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
625 NF_CT_STAT_INC(found);
628 NF_CT_STAT_INC(searched);
634 /* Find a connection corresponding to a tuple. */
635 struct nf_conntrack_tuple_hash *
636 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
637 const struct nf_conn *ignored_conntrack)
639 struct nf_conntrack_tuple_hash *h;
641 read_lock_bh(&nf_conntrack_lock);
642 h = __nf_conntrack_find(tuple, ignored_conntrack);
644 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
645 read_unlock_bh(&nf_conntrack_lock);
650 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
652 unsigned int repl_hash)
654 ct->id = ++nf_conntrack_next_id;
655 list_prepend(&nf_conntrack_hash[hash],
656 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
657 list_prepend(&nf_conntrack_hash[repl_hash],
658 &ct->tuplehash[IP_CT_DIR_REPLY].list);
661 void nf_conntrack_hash_insert(struct nf_conn *ct)
663 unsigned int hash, repl_hash;
665 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
666 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
668 write_lock_bh(&nf_conntrack_lock);
669 __nf_conntrack_hash_insert(ct, hash, repl_hash);
670 write_unlock_bh(&nf_conntrack_lock);
673 /* Confirm a connection given skb; places it in hash table */
675 __nf_conntrack_confirm(struct sk_buff **pskb)
677 unsigned int hash, repl_hash;
679 enum ip_conntrack_info ctinfo;
681 ct = nf_ct_get(*pskb, &ctinfo);
683 /* ipt_REJECT uses nf_conntrack_attach to attach related
684 ICMP/TCP RST packets in other direction. Actual packet
685 which created connection will be IP_CT_NEW or for an
686 expected connection, IP_CT_RELATED. */
687 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
690 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
691 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
693 /* We're not in hash table, and we refuse to set up related
694 connections for unconfirmed conns. But packet copies and
695 REJECT will give spurious warnings here. */
696 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
698 /* No external references means noone else could have
700 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
701 DEBUGP("Confirming conntrack %p\n", ct);
703 write_lock_bh(&nf_conntrack_lock);
705 /* See if there's one in the list already, including reverse:
706 NAT could have grabbed it without realizing, since we're
707 not in the hash. If there is, we lost race. */
708 if (!LIST_FIND(&nf_conntrack_hash[hash],
710 struct nf_conntrack_tuple_hash *,
711 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
712 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
714 struct nf_conntrack_tuple_hash *,
715 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
716 /* Remove from unconfirmed list */
717 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
719 __nf_conntrack_hash_insert(ct, hash, repl_hash);
720 /* Timer relative to confirmation time, not original
721 setting time, otherwise we'd get timer wrap in
722 weird delay cases. */
723 ct->timeout.expires += jiffies;
724 add_timer(&ct->timeout);
725 atomic_inc(&ct->ct_general.use);
726 set_bit(IPS_CONFIRMED_BIT, &ct->status);
727 NF_CT_STAT_INC(insert);
728 write_unlock_bh(&nf_conntrack_lock);
730 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
731 #ifdef CONFIG_NF_NAT_NEEDED
732 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
733 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
734 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
736 nf_conntrack_event_cache(master_ct(ct) ?
737 IPCT_RELATED : IPCT_NEW, *pskb);
741 NF_CT_STAT_INC(insert_failed);
742 write_unlock_bh(&nf_conntrack_lock);
746 /* Returns true if a connection correspondings to the tuple (required
749 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
750 const struct nf_conn *ignored_conntrack)
752 struct nf_conntrack_tuple_hash *h;
754 read_lock_bh(&nf_conntrack_lock);
755 h = __nf_conntrack_find(tuple, ignored_conntrack);
756 read_unlock_bh(&nf_conntrack_lock);
761 /* There's a small race here where we may free a just-assured
762 connection. Too bad: we're in trouble anyway. */
763 static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
765 return !(test_bit(IPS_ASSURED_BIT,
766 &nf_ct_tuplehash_to_ctrack(i)->status));
769 static int early_drop(struct list_head *chain)
771 /* Traverse backwards: gives us oldest, which is roughly LRU */
772 struct nf_conntrack_tuple_hash *h;
773 struct nf_conn *ct = NULL;
776 read_lock_bh(&nf_conntrack_lock);
777 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
779 ct = nf_ct_tuplehash_to_ctrack(h);
780 atomic_inc(&ct->ct_general.use);
782 read_unlock_bh(&nf_conntrack_lock);
787 if (del_timer(&ct->timeout)) {
788 death_by_timeout((unsigned long)ct);
790 NF_CT_STAT_INC(early_drop);
796 static inline int helper_cmp(const struct nf_conntrack_helper *i,
797 const struct nf_conntrack_tuple *rtuple)
799 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
802 static struct nf_conntrack_helper *
803 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
805 return LIST_FIND(&helpers, helper_cmp,
806 struct nf_conntrack_helper *,
810 struct nf_conntrack_helper *
811 nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
813 struct nf_conntrack_helper *helper;
815 /* need nf_conntrack_lock to assure that helper exists until
816 * try_module_get() is called */
817 read_lock_bh(&nf_conntrack_lock);
819 helper = __nf_ct_helper_find(tuple);
821 /* need to increase module usage count to assure helper will
822 * not go away while the caller is e.g. busy putting a
823 * conntrack in the hash that uses the helper */
824 if (!try_module_get(helper->me))
828 read_unlock_bh(&nf_conntrack_lock);
833 void nf_ct_helper_put(struct nf_conntrack_helper *helper)
835 module_put(helper->me);
838 static struct nf_conn *
839 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
840 const struct nf_conntrack_tuple *repl,
841 const struct nf_conntrack_l3proto *l3proto)
843 struct nf_conn *conntrack = NULL;
844 u_int32_t features = 0;
846 if (!nf_conntrack_hash_rnd_initted) {
847 get_random_bytes(&nf_conntrack_hash_rnd, 4);
848 nf_conntrack_hash_rnd_initted = 1;
852 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
853 unsigned int hash = hash_conntrack(orig);
854 /* Try dropping from this hash chain. */
855 if (!early_drop(&nf_conntrack_hash[hash])) {
858 "nf_conntrack: table full, dropping"
860 return ERR_PTR(-ENOMEM);
864 /* find features needed by this conntrack. */
865 features = l3proto->get_features(orig);
866 read_lock_bh(&nf_conntrack_lock);
867 if (__nf_ct_helper_find(repl) != NULL)
868 features |= NF_CT_F_HELP;
869 read_unlock_bh(&nf_conntrack_lock);
871 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
873 read_lock_bh(&nf_ct_cache_lock);
875 if (!nf_ct_cache[features].use) {
876 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
881 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
882 if (conntrack == NULL) {
883 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
887 memset(conntrack, 0, nf_ct_cache[features].size);
888 conntrack->features = features;
889 if (nf_ct_cache[features].init_conntrack &&
890 nf_ct_cache[features].init_conntrack(conntrack, features) < 0) {
891 DEBUGP("nf_conntrack_alloc: failed to init\n");
892 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
897 atomic_set(&conntrack->ct_general.use, 1);
898 conntrack->ct_general.destroy = destroy_conntrack;
899 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
900 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
901 /* Don't set timer yet: wait for confirmation */
902 init_timer(&conntrack->timeout);
903 conntrack->timeout.data = (unsigned long)conntrack;
904 conntrack->timeout.function = death_by_timeout;
906 atomic_inc(&nf_conntrack_count);
908 read_unlock_bh(&nf_ct_cache_lock);
912 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
913 const struct nf_conntrack_tuple *repl)
915 struct nf_conntrack_l3proto *l3proto;
917 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
918 return __nf_conntrack_alloc(orig, repl, l3proto);
921 void nf_conntrack_free(struct nf_conn *conntrack)
923 u_int32_t features = conntrack->features;
924 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
925 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
927 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
928 atomic_dec(&nf_conntrack_count);
931 /* Allocate a new conntrack: we return -ENOMEM if classification
932 failed due to stress. Otherwise it really is unclassifiable. */
933 static struct nf_conntrack_tuple_hash *
934 init_conntrack(const struct nf_conntrack_tuple *tuple,
935 struct nf_conntrack_l3proto *l3proto,
936 struct nf_conntrack_protocol *protocol,
938 unsigned int dataoff)
940 struct nf_conn *conntrack;
941 struct nf_conntrack_tuple repl_tuple;
942 struct nf_conntrack_expect *exp;
944 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
945 DEBUGP("Can't invert tuple.\n");
949 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
950 if (conntrack == NULL || IS_ERR(conntrack)) {
951 DEBUGP("Can't allocate conntrack.\n");
952 return (struct nf_conntrack_tuple_hash *)conntrack;
955 if (!protocol->new(conntrack, skb, dataoff)) {
956 nf_conntrack_free(conntrack);
957 DEBUGP("init conntrack: can't track with proto module\n");
961 write_lock_bh(&nf_conntrack_lock);
962 exp = find_expectation(tuple);
965 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
967 /* Welcome, Mr. Bond. We've been expecting you... */
968 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
969 conntrack->master = exp->master;
970 #ifdef CONFIG_NF_CONNTRACK_MARK
971 conntrack->mark = exp->master->mark;
973 nf_conntrack_get(&conntrack->master->ct_general);
974 NF_CT_STAT_INC(expect_new);
976 conntrack->helper = __nf_ct_helper_find(&repl_tuple);
981 /* Overload tuple linked list to put us in unconfirmed list. */
982 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
984 write_unlock_bh(&nf_conntrack_lock);
988 exp->expectfn(conntrack, exp);
989 nf_conntrack_expect_put(exp);
992 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
995 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
996 static inline struct nf_conn *
997 resolve_normal_ct(struct sk_buff *skb,
998 unsigned int dataoff,
1001 struct nf_conntrack_l3proto *l3proto,
1002 struct nf_conntrack_protocol *proto,
1004 enum ip_conntrack_info *ctinfo)
1006 struct nf_conntrack_tuple tuple;
1007 struct nf_conntrack_tuple_hash *h;
1010 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1011 dataoff, l3num, protonum, &tuple, l3proto,
1013 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1017 /* look for tuple match */
1018 h = nf_conntrack_find_get(&tuple, NULL);
1020 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1026 ct = nf_ct_tuplehash_to_ctrack(h);
1028 /* It exists; we have (non-exclusive) reference. */
1029 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1030 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1031 /* Please set reply bit if this packet OK */
1034 /* Once we've had two way comms, always ESTABLISHED. */
1035 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1037 *ctinfo = IP_CT_ESTABLISHED;
1038 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1039 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1040 *ctinfo = IP_CT_RELATED;
1042 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1043 *ctinfo = IP_CT_NEW;
1047 skb->nfct = &ct->ct_general;
1048 skb->nfctinfo = *ctinfo;
1053 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1056 enum ip_conntrack_info ctinfo;
1057 struct nf_conntrack_l3proto *l3proto;
1058 struct nf_conntrack_protocol *proto;
1059 unsigned int dataoff;
1064 /* Previously seen (loopback or untracked)? Ignore. */
1065 if ((*pskb)->nfct) {
1066 NF_CT_STAT_INC(ignore);
1070 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
1071 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1072 DEBUGP("not prepared to track yet or error occured\n");
1076 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
1078 /* It may be an special packet, error, unclean...
1079 * inverse of the return code tells to the netfilter
1080 * core what to do with the packet. */
1081 if (proto->error != NULL &&
1082 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1083 NF_CT_STAT_INC(error);
1084 NF_CT_STAT_INC(invalid);
1088 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1089 &set_reply, &ctinfo);
1091 /* Not valid part of a connection */
1092 NF_CT_STAT_INC(invalid);
1097 /* Too stressed to deal. */
1098 NF_CT_STAT_INC(drop);
1102 NF_CT_ASSERT((*pskb)->nfct);
1104 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1106 /* Invalid: inverse of the return code tells
1107 * the netfilter core what to do */
1108 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1109 nf_conntrack_put((*pskb)->nfct);
1110 (*pskb)->nfct = NULL;
1111 NF_CT_STAT_INC(invalid);
1115 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1116 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1121 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1122 const struct nf_conntrack_tuple *orig)
1124 return nf_ct_invert_tuple(inverse, orig,
1125 __nf_ct_l3proto_find(orig->src.l3num),
1126 __nf_ct_proto_find(orig->src.l3num,
1127 orig->dst.protonum));
1130 /* Would two expected things clash? */
1131 static inline int expect_clash(const struct nf_conntrack_expect *a,
1132 const struct nf_conntrack_expect *b)
1134 /* Part covered by intersection of masks must be unequal,
1135 otherwise they clash */
1136 struct nf_conntrack_tuple intersect_mask;
1139 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1140 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1141 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1142 intersect_mask.dst.protonum = a->mask.dst.protonum
1143 & b->mask.dst.protonum;
1145 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1146 intersect_mask.src.u3.all[count] =
1147 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1150 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1151 intersect_mask.dst.u3.all[count] =
1152 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1155 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1158 static inline int expect_matches(const struct nf_conntrack_expect *a,
1159 const struct nf_conntrack_expect *b)
1161 return a->master == b->master
1162 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1163 && nf_ct_tuple_equal(&a->mask, &b->mask);
1166 /* Generally a bad idea to call this: could have matched already. */
1167 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1169 struct nf_conntrack_expect *i;
1171 write_lock_bh(&nf_conntrack_lock);
1172 /* choose the the oldest expectation to evict */
1173 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1174 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1175 nf_ct_unlink_expect(i);
1176 write_unlock_bh(&nf_conntrack_lock);
1177 nf_conntrack_expect_put(i);
1181 write_unlock_bh(&nf_conntrack_lock);
1184 /* We don't increase the master conntrack refcount for non-fulfilled
1185 * conntracks. During the conntrack destruction, the expectations are
1186 * always killed before the conntrack itself */
1187 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1189 struct nf_conntrack_expect *new;
1191 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1193 DEBUGP("expect_related: OOM allocating expect\n");
1197 atomic_set(&new->use, 1);
1201 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1203 if (atomic_dec_and_test(&exp->use))
1204 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1207 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1209 atomic_inc(&exp->use);
1210 exp->master->expecting++;
1211 list_add(&exp->list, &nf_conntrack_expect_list);
1213 init_timer(&exp->timeout);
1214 exp->timeout.data = (unsigned long)exp;
1215 exp->timeout.function = expectation_timed_out;
1216 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
1217 add_timer(&exp->timeout);
1219 exp->id = ++nf_conntrack_expect_next_id;
1220 atomic_inc(&exp->use);
1221 NF_CT_STAT_INC(expect_create);
1224 /* Race with expectations being used means we could have none to find; OK. */
1225 static void evict_oldest_expect(struct nf_conn *master)
1227 struct nf_conntrack_expect *i;
1229 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1230 if (i->master == master) {
1231 if (del_timer(&i->timeout)) {
1232 nf_ct_unlink_expect(i);
1233 nf_conntrack_expect_put(i);
1240 static inline int refresh_timer(struct nf_conntrack_expect *i)
1242 if (!del_timer(&i->timeout))
1245 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1246 add_timer(&i->timeout);
1250 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1252 struct nf_conntrack_expect *i;
1253 struct nf_conn *master = expect->master;
1256 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1257 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1258 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1260 write_lock_bh(&nf_conntrack_lock);
1261 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1262 if (expect_matches(i, expect)) {
1263 /* Refresh timer: if it's dying, ignore.. */
1264 if (refresh_timer(i)) {
1268 } else if (expect_clash(i, expect)) {
1273 /* Will be over limit? */
1274 if (master->helper->max_expected &&
1275 master->expecting >= master->helper->max_expected)
1276 evict_oldest_expect(master);
1278 nf_conntrack_expect_insert(expect);
1279 nf_conntrack_expect_event(IPEXP_NEW, expect);
1282 write_unlock_bh(&nf_conntrack_lock);
1286 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1287 implicitly racy: see __nf_conntrack_confirm */
1288 void nf_conntrack_alter_reply(struct nf_conn *conntrack,
1289 const struct nf_conntrack_tuple *newreply)
1291 write_lock_bh(&nf_conntrack_lock);
1292 /* Should be unconfirmed, so not in hash table yet */
1293 NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack));
1295 DEBUGP("Altering reply tuple of %p to ", conntrack);
1296 NF_CT_DUMP_TUPLE(newreply);
1298 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1299 if (!conntrack->master && conntrack->expecting == 0)
1300 conntrack->helper = __nf_ct_helper_find(newreply);
1301 write_unlock_bh(&nf_conntrack_lock);
1304 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1307 BUG_ON(me->timeout == 0);
1309 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1310 sizeof(struct nf_conn)
1311 + sizeof(union nf_conntrack_help)
1312 + __alignof__(union nf_conntrack_help),
1313 init_conntrack_for_helper);
1315 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1318 write_lock_bh(&nf_conntrack_lock);
1319 list_prepend(&helpers, me);
1320 write_unlock_bh(&nf_conntrack_lock);
1325 struct nf_conntrack_helper *
1326 __nf_conntrack_helper_find_byname(const char *name)
1328 struct nf_conntrack_helper *h;
1330 list_for_each_entry(h, &helpers, list) {
1331 if (!strcmp(h->name, name))
1338 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1339 const struct nf_conntrack_helper *me)
1341 if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
1342 nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
1343 nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
1348 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1351 struct nf_conntrack_expect *exp, *tmp;
1353 /* Need write lock here, to delete helper. */
1354 write_lock_bh(&nf_conntrack_lock);
1355 LIST_DELETE(&helpers, me);
1357 /* Get rid of expectations */
1358 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1359 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1360 nf_ct_unlink_expect(exp);
1361 nf_conntrack_expect_put(exp);
1365 /* Get rid of expecteds, set helpers to NULL. */
1366 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1367 for (i = 0; i < nf_conntrack_htable_size; i++)
1368 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1369 struct nf_conntrack_tuple_hash *, me);
1370 write_unlock_bh(&nf_conntrack_lock);
1372 /* Someone could be still looking at the helper in a bh. */
1376 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1377 void __nf_ct_refresh_acct(struct nf_conn *ct,
1378 enum ip_conntrack_info ctinfo,
1379 const struct sk_buff *skb,
1380 unsigned long extra_jiffies,
1385 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1388 write_lock_bh(&nf_conntrack_lock);
1390 /* If not in hash table, timer will not be active yet */
1391 if (!nf_ct_is_confirmed(ct)) {
1392 ct->timeout.expires = extra_jiffies;
1393 event = IPCT_REFRESH;
1395 /* Need del_timer for race avoidance (may already be dying). */
1396 if (del_timer(&ct->timeout)) {
1397 ct->timeout.expires = jiffies + extra_jiffies;
1398 add_timer(&ct->timeout);
1399 event = IPCT_REFRESH;
1403 #ifdef CONFIG_NF_CT_ACCT
1405 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1406 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1407 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1408 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1409 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1410 event |= IPCT_COUNTER_FILLING;
1414 write_unlock_bh(&nf_conntrack_lock);
1416 /* must be unlocked when calling event cache */
1418 nf_conntrack_event_cache(event, skb);
1421 #if defined(CONFIG_NF_CT_NETLINK) || \
1422 defined(CONFIG_NF_CT_NETLINK_MODULE)
1424 #include <linux/netfilter/nfnetlink.h>
1425 #include <linux/netfilter/nfnetlink_conntrack.h>
1427 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1428 * in ip_conntrack_core, since we don't want the protocols to autoload
1429 * or depend on ctnetlink */
1430 int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1431 const struct nf_conntrack_tuple *tuple)
1433 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1434 &tuple->src.u.tcp.port);
1435 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1436 &tuple->dst.u.tcp.port);
1443 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1444 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1445 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1448 int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1449 struct nf_conntrack_tuple *t)
1451 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1454 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1458 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1460 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1466 /* Used by ipt_REJECT and ip6t_REJECT. */
1467 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1470 enum ip_conntrack_info ctinfo;
1472 /* This ICMP is in reverse direction to the packet which caused it */
1473 ct = nf_ct_get(skb, &ctinfo);
1474 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1475 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1477 ctinfo = IP_CT_RELATED;
1479 /* Attach to new skbuff, and increment count */
1480 nskb->nfct = &ct->ct_general;
1481 nskb->nfctinfo = ctinfo;
1482 nf_conntrack_get(nskb->nfct);
1486 do_iter(const struct nf_conntrack_tuple_hash *i,
1487 int (*iter)(struct nf_conn *i, void *data),
1490 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1493 /* Bring out ya dead! */
1494 static struct nf_conntrack_tuple_hash *
1495 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1496 void *data, unsigned int *bucket)
1498 struct nf_conntrack_tuple_hash *h = NULL;
1500 write_lock_bh(&nf_conntrack_lock);
1501 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1502 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1503 struct nf_conntrack_tuple_hash *, iter, data);
1508 h = LIST_FIND_W(&unconfirmed, do_iter,
1509 struct nf_conntrack_tuple_hash *, iter, data);
1511 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1512 write_unlock_bh(&nf_conntrack_lock);
1518 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1520 struct nf_conntrack_tuple_hash *h;
1521 unsigned int bucket = 0;
1523 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1524 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1525 /* Time to push up daises... */
1526 if (del_timer(&ct->timeout))
1527 death_by_timeout((unsigned long)ct);
1528 /* ... else the timer will get him soon. */
1534 static int kill_all(struct nf_conn *i, void *data)
1539 static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1544 free_pages((unsigned long)hash,
1545 get_order(sizeof(struct list_head) * size));
1548 void nf_conntrack_flush()
1550 nf_ct_iterate_cleanup(kill_all, NULL);
1553 /* Mishearing the voices in his head, our hero wonders how he's
1554 supposed to kill the mall. */
1555 void nf_conntrack_cleanup(void)
1559 ip_ct_attach = NULL;
1561 /* This makes sure all current packets have passed through
1562 netfilter framework. Roll on, two-stage module
1566 nf_ct_event_cache_flush();
1568 nf_conntrack_flush();
1569 if (atomic_read(&nf_conntrack_count) != 0) {
1571 goto i_see_dead_people;
1573 /* wait until all references to nf_conntrack_untracked are dropped */
1574 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1577 for (i = 0; i < NF_CT_F_NUM; i++) {
1578 if (nf_ct_cache[i].use == 0)
1581 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1582 nf_ct_cache[i].use = 1;
1583 nf_conntrack_unregister_cache(i);
1585 kmem_cache_destroy(nf_conntrack_expect_cachep);
1586 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1587 nf_conntrack_htable_size);
1589 /* free l3proto protocol tables */
1590 for (i = 0; i < PF_MAX; i++)
1591 if (nf_ct_protos[i]) {
1592 kfree(nf_ct_protos[i]);
1593 nf_ct_protos[i] = NULL;
1597 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1599 struct list_head *hash;
1603 hash = (void*)__get_free_pages(GFP_KERNEL,
1604 get_order(sizeof(struct list_head)
1608 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1609 hash = vmalloc(sizeof(struct list_head) * size);
1613 for (i = 0; i < size; i++)
1614 INIT_LIST_HEAD(&hash[i]);
1619 int set_hashsize(const char *val, struct kernel_param *kp)
1621 int i, bucket, hashsize, vmalloced;
1622 int old_vmalloced, old_size;
1624 struct list_head *hash, *old_hash;
1625 struct nf_conntrack_tuple_hash *h;
1627 /* On boot, we can set this without any fancy locking. */
1628 if (!nf_conntrack_htable_size)
1629 return param_set_uint(val, kp);
1631 hashsize = simple_strtol(val, NULL, 0);
1635 hash = alloc_hashtable(hashsize, &vmalloced);
1639 /* We have to rehahs for the new table anyway, so we also can
1640 * use a newrandom seed */
1641 get_random_bytes(&rnd, 4);
1643 write_lock_bh(&nf_conntrack_lock);
1644 for (i = 0; i < nf_conntrack_htable_size; i++) {
1645 while (!list_empty(&nf_conntrack_hash[i])) {
1646 h = list_entry(nf_conntrack_hash[i].next,
1647 struct nf_conntrack_tuple_hash, list);
1649 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1650 list_add_tail(&h->list, &hash[bucket]);
1653 old_size = nf_conntrack_htable_size;
1654 old_vmalloced = nf_conntrack_vmalloc;
1655 old_hash = nf_conntrack_hash;
1657 nf_conntrack_htable_size = hashsize;
1658 nf_conntrack_vmalloc = vmalloced;
1659 nf_conntrack_hash = hash;
1660 nf_conntrack_hash_rnd = rnd;
1661 write_unlock_bh(&nf_conntrack_lock);
1663 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1667 module_param_call(hashsize, set_hashsize, param_get_uint,
1668 &nf_conntrack_htable_size, 0600);
1670 int __init nf_conntrack_init(void)
1675 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1676 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1677 if (!nf_conntrack_htable_size) {
1678 nf_conntrack_htable_size
1679 = (((num_physpages << PAGE_SHIFT) / 16384)
1680 / sizeof(struct list_head));
1681 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1682 nf_conntrack_htable_size = 8192;
1683 if (nf_conntrack_htable_size < 16)
1684 nf_conntrack_htable_size = 16;
1686 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1688 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1689 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1692 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1693 &nf_conntrack_vmalloc);
1694 if (!nf_conntrack_hash) {
1695 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1699 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1700 sizeof(struct nf_conn), NULL);
1702 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1706 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1707 sizeof(struct nf_conntrack_expect),
1709 if (!nf_conntrack_expect_cachep) {
1710 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1711 goto err_free_conntrack_slab;
1714 /* Don't NEED lock here, but good form anyway. */
1715 write_lock_bh(&nf_conntrack_lock);
1716 for (i = 0; i < PF_MAX; i++)
1717 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1718 write_unlock_bh(&nf_conntrack_lock);
1720 /* For use by REJECT target */
1721 ip_ct_attach = __nf_conntrack_attach;
1723 /* Set up fake conntrack:
1724 - to never be deleted, not in any hashes */
1725 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1726 /* - and look it like as a confirmed connection */
1727 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1731 err_free_conntrack_slab:
1732 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1734 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1735 nf_conntrack_htable_size);