Automatic merge of /spare/repo/netdev-2.6 branch ns83820
[pandora-kernel.git] / net / ipv4 / netfilter / ip_conntrack_core.c
1 /* Connection state tracking for netfilter.  This is separated from,
2    but required by, the NAT layer; it can also be used by an iptables
3    extension. */
4
5 /* (C) 1999-2001 Paul `Rusty' Russell  
6  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13  *      - new API and handling of conntrack/nat helpers
14  *      - now capable of multiple expectations for one master
15  * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16  *      - add usage/reference counts to ip_conntrack_expect
17  *      - export ip_conntrack[_expect]_{find_get,put} functions
18  * */
19
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
23 #include <linux/ip.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <net/ip.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40
41 /* This rwlock protects the main hash table, protocol/helper/expected
42    registrations, conntrack timers*/
43 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
44 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45
46 #include <linux/netfilter_ipv4/ip_conntrack.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
50 #include <linux/netfilter_ipv4/listhelp.h>
51
52 #define IP_CONNTRACK_VERSION    "2.1"
53
54 #if 0
55 #define DEBUGP printk
56 #else
57 #define DEBUGP(format, args...)
58 #endif
59
60 DECLARE_RWLOCK(ip_conntrack_lock);
61
62 /* ip_conntrack_standalone needs this */
63 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64
65 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
66 LIST_HEAD(ip_conntrack_expect_list);
67 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
68 static LIST_HEAD(helpers);
69 unsigned int ip_conntrack_htable_size = 0;
70 int ip_conntrack_max;
71 struct list_head *ip_conntrack_hash;
72 static kmem_cache_t *ip_conntrack_cachep;
73 static kmem_cache_t *ip_conntrack_expect_cachep;
74 struct ip_conntrack ip_conntrack_untracked;
75 unsigned int ip_ct_log_invalid;
76 static LIST_HEAD(unconfirmed);
77 static int ip_conntrack_vmalloc;
78
79 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
80
81 void 
82 ip_conntrack_put(struct ip_conntrack *ct)
83 {
84         IP_NF_ASSERT(ct);
85         nf_conntrack_put(&ct->ct_general);
86 }
87
88 static int ip_conntrack_hash_rnd_initted;
89 static unsigned int ip_conntrack_hash_rnd;
90
91 static u_int32_t
92 hash_conntrack(const struct ip_conntrack_tuple *tuple)
93 {
94 #if 0
95         dump_tuple(tuple);
96 #endif
97         return (jhash_3words(tuple->src.ip,
98                              (tuple->dst.ip ^ tuple->dst.protonum),
99                              (tuple->src.u.all | (tuple->dst.u.all << 16)),
100                              ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
101 }
102
103 int
104 ip_ct_get_tuple(const struct iphdr *iph,
105                 const struct sk_buff *skb,
106                 unsigned int dataoff,
107                 struct ip_conntrack_tuple *tuple,
108                 const struct ip_conntrack_protocol *protocol)
109 {
110         /* Never happen */
111         if (iph->frag_off & htons(IP_OFFSET)) {
112                 printk("ip_conntrack_core: Frag of proto %u.\n",
113                        iph->protocol);
114                 return 0;
115         }
116
117         tuple->src.ip = iph->saddr;
118         tuple->dst.ip = iph->daddr;
119         tuple->dst.protonum = iph->protocol;
120         tuple->dst.dir = IP_CT_DIR_ORIGINAL;
121
122         return protocol->pkt_to_tuple(skb, dataoff, tuple);
123 }
124
125 int
126 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
127                    const struct ip_conntrack_tuple *orig,
128                    const struct ip_conntrack_protocol *protocol)
129 {
130         inverse->src.ip = orig->dst.ip;
131         inverse->dst.ip = orig->src.ip;
132         inverse->dst.protonum = orig->dst.protonum;
133         inverse->dst.dir = !orig->dst.dir;
134
135         return protocol->invert_tuple(inverse, orig);
136 }
137
138
139 /* ip_conntrack_expect helper functions */
140 static void destroy_expect(struct ip_conntrack_expect *exp)
141 {
142         ip_conntrack_put(exp->master);
143         IP_NF_ASSERT(!timer_pending(&exp->timeout));
144         kmem_cache_free(ip_conntrack_expect_cachep, exp);
145         CONNTRACK_STAT_INC(expect_delete);
146 }
147
148 static void unlink_expect(struct ip_conntrack_expect *exp)
149 {
150         MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
151         list_del(&exp->list);
152         /* Logically in destroy_expect, but we hold the lock here. */
153         exp->master->expecting--;
154 }
155
156 static void expectation_timed_out(unsigned long ul_expect)
157 {
158         struct ip_conntrack_expect *exp = (void *)ul_expect;
159
160         WRITE_LOCK(&ip_conntrack_lock);
161         unlink_expect(exp);
162         WRITE_UNLOCK(&ip_conntrack_lock);
163         destroy_expect(exp);
164 }
165
166 /* If an expectation for this connection is found, it gets delete from
167  * global list then returned. */
168 static struct ip_conntrack_expect *
169 find_expectation(const struct ip_conntrack_tuple *tuple)
170 {
171         struct ip_conntrack_expect *i;
172
173         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
174                 /* If master is not in hash table yet (ie. packet hasn't left
175                    this machine yet), how can other end know about expected?
176                    Hence these are not the droids you are looking for (if
177                    master ct never got confirmed, we'd hold a reference to it
178                    and weird things would happen to future packets). */
179                 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
180                     && is_confirmed(i->master)
181                     && del_timer(&i->timeout)) {
182                         unlink_expect(i);
183                         return i;
184                 }
185         }
186         return NULL;
187 }
188
189 /* delete all expectations for this conntrack */
190 static void remove_expectations(struct ip_conntrack *ct)
191 {
192         struct ip_conntrack_expect *i, *tmp;
193
194         /* Optimization: most connection never expect any others. */
195         if (ct->expecting == 0)
196                 return;
197
198         list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
199                 if (i->master == ct && del_timer(&i->timeout)) {
200                         unlink_expect(i);
201                         destroy_expect(i);
202                 }
203         }
204 }
205
206 static void
207 clean_from_lists(struct ip_conntrack *ct)
208 {
209         unsigned int ho, hr;
210         
211         DEBUGP("clean_from_lists(%p)\n", ct);
212         MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
213
214         ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
215         hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
216         LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
217         LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
218
219         /* Destroy all pending expectations */
220         remove_expectations(ct);
221 }
222
223 static void
224 destroy_conntrack(struct nf_conntrack *nfct)
225 {
226         struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
227         struct ip_conntrack_protocol *proto;
228
229         DEBUGP("destroy_conntrack(%p)\n", ct);
230         IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
231         IP_NF_ASSERT(!timer_pending(&ct->timeout));
232
233         /* To make sure we don't get any weird locking issues here:
234          * destroy_conntrack() MUST NOT be called with a write lock
235          * to ip_conntrack_lock!!! -HW */
236         proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
237         if (proto && proto->destroy)
238                 proto->destroy(ct);
239
240         if (ip_conntrack_destroyed)
241                 ip_conntrack_destroyed(ct);
242
243         WRITE_LOCK(&ip_conntrack_lock);
244         /* Expectations will have been removed in clean_from_lists,
245          * except TFTP can create an expectation on the first packet,
246          * before connection is in the list, so we need to clean here,
247          * too. */
248         remove_expectations(ct);
249
250         /* We overload first tuple to link into unconfirmed list. */
251         if (!is_confirmed(ct)) {
252                 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
253                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
254         }
255
256         CONNTRACK_STAT_INC(delete);
257         WRITE_UNLOCK(&ip_conntrack_lock);
258
259         if (ct->master)
260                 ip_conntrack_put(ct->master);
261
262         DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
263         kmem_cache_free(ip_conntrack_cachep, ct);
264         atomic_dec(&ip_conntrack_count);
265 }
266
267 static void death_by_timeout(unsigned long ul_conntrack)
268 {
269         struct ip_conntrack *ct = (void *)ul_conntrack;
270
271         WRITE_LOCK(&ip_conntrack_lock);
272         /* Inside lock so preempt is disabled on module removal path.
273          * Otherwise we can get spurious warnings. */
274         CONNTRACK_STAT_INC(delete_list);
275         clean_from_lists(ct);
276         WRITE_UNLOCK(&ip_conntrack_lock);
277         ip_conntrack_put(ct);
278 }
279
280 static inline int
281 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
282                     const struct ip_conntrack_tuple *tuple,
283                     const struct ip_conntrack *ignored_conntrack)
284 {
285         MUST_BE_READ_LOCKED(&ip_conntrack_lock);
286         return tuplehash_to_ctrack(i) != ignored_conntrack
287                 && ip_ct_tuple_equal(tuple, &i->tuple);
288 }
289
290 static struct ip_conntrack_tuple_hash *
291 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
292                     const struct ip_conntrack *ignored_conntrack)
293 {
294         struct ip_conntrack_tuple_hash *h;
295         unsigned int hash = hash_conntrack(tuple);
296
297         MUST_BE_READ_LOCKED(&ip_conntrack_lock);
298         list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
299                 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
300                         CONNTRACK_STAT_INC(found);
301                         return h;
302                 }
303                 CONNTRACK_STAT_INC(searched);
304         }
305
306         return NULL;
307 }
308
309 /* Find a connection corresponding to a tuple. */
310 struct ip_conntrack_tuple_hash *
311 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
312                       const struct ip_conntrack *ignored_conntrack)
313 {
314         struct ip_conntrack_tuple_hash *h;
315
316         READ_LOCK(&ip_conntrack_lock);
317         h = __ip_conntrack_find(tuple, ignored_conntrack);
318         if (h)
319                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
320         READ_UNLOCK(&ip_conntrack_lock);
321
322         return h;
323 }
324
325 /* Confirm a connection given skb; places it in hash table */
326 int
327 __ip_conntrack_confirm(struct sk_buff **pskb)
328 {
329         unsigned int hash, repl_hash;
330         struct ip_conntrack *ct;
331         enum ip_conntrack_info ctinfo;
332
333         ct = ip_conntrack_get(*pskb, &ctinfo);
334
335         /* ipt_REJECT uses ip_conntrack_attach to attach related
336            ICMP/TCP RST packets in other direction.  Actual packet
337            which created connection will be IP_CT_NEW or for an
338            expected connection, IP_CT_RELATED. */
339         if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
340                 return NF_ACCEPT;
341
342         hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
343         repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
344
345         /* We're not in hash table, and we refuse to set up related
346            connections for unconfirmed conns.  But packet copies and
347            REJECT will give spurious warnings here. */
348         /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
349
350         /* No external references means noone else could have
351            confirmed us. */
352         IP_NF_ASSERT(!is_confirmed(ct));
353         DEBUGP("Confirming conntrack %p\n", ct);
354
355         WRITE_LOCK(&ip_conntrack_lock);
356
357         /* See if there's one in the list already, including reverse:
358            NAT could have grabbed it without realizing, since we're
359            not in the hash.  If there is, we lost race. */
360         if (!LIST_FIND(&ip_conntrack_hash[hash],
361                        conntrack_tuple_cmp,
362                        struct ip_conntrack_tuple_hash *,
363                        &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
364             && !LIST_FIND(&ip_conntrack_hash[repl_hash],
365                           conntrack_tuple_cmp,
366                           struct ip_conntrack_tuple_hash *,
367                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
368                 /* Remove from unconfirmed list */
369                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
370
371                 list_prepend(&ip_conntrack_hash[hash],
372                              &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
373                 list_prepend(&ip_conntrack_hash[repl_hash],
374                              &ct->tuplehash[IP_CT_DIR_REPLY]);
375                 /* Timer relative to confirmation time, not original
376                    setting time, otherwise we'd get timer wrap in
377                    weird delay cases. */
378                 ct->timeout.expires += jiffies;
379                 add_timer(&ct->timeout);
380                 atomic_inc(&ct->ct_general.use);
381                 set_bit(IPS_CONFIRMED_BIT, &ct->status);
382                 CONNTRACK_STAT_INC(insert);
383                 WRITE_UNLOCK(&ip_conntrack_lock);
384                 return NF_ACCEPT;
385         }
386
387         CONNTRACK_STAT_INC(insert_failed);
388         WRITE_UNLOCK(&ip_conntrack_lock);
389
390         return NF_DROP;
391 }
392
393 /* Returns true if a connection correspondings to the tuple (required
394    for NAT). */
395 int
396 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
397                          const struct ip_conntrack *ignored_conntrack)
398 {
399         struct ip_conntrack_tuple_hash *h;
400
401         READ_LOCK(&ip_conntrack_lock);
402         h = __ip_conntrack_find(tuple, ignored_conntrack);
403         READ_UNLOCK(&ip_conntrack_lock);
404
405         return h != NULL;
406 }
407
408 /* There's a small race here where we may free a just-assured
409    connection.  Too bad: we're in trouble anyway. */
410 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
411 {
412         return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
413 }
414
415 static int early_drop(struct list_head *chain)
416 {
417         /* Traverse backwards: gives us oldest, which is roughly LRU */
418         struct ip_conntrack_tuple_hash *h;
419         struct ip_conntrack *ct = NULL;
420         int dropped = 0;
421
422         READ_LOCK(&ip_conntrack_lock);
423         h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
424         if (h) {
425                 ct = tuplehash_to_ctrack(h);
426                 atomic_inc(&ct->ct_general.use);
427         }
428         READ_UNLOCK(&ip_conntrack_lock);
429
430         if (!ct)
431                 return dropped;
432
433         if (del_timer(&ct->timeout)) {
434                 death_by_timeout((unsigned long)ct);
435                 dropped = 1;
436                 CONNTRACK_STAT_INC(early_drop);
437         }
438         ip_conntrack_put(ct);
439         return dropped;
440 }
441
442 static inline int helper_cmp(const struct ip_conntrack_helper *i,
443                              const struct ip_conntrack_tuple *rtuple)
444 {
445         return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
446 }
447
448 static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
449 {
450         return LIST_FIND(&helpers, helper_cmp,
451                          struct ip_conntrack_helper *,
452                          tuple);
453 }
454
455 /* Allocate a new conntrack: we return -ENOMEM if classification
456    failed due to stress.  Otherwise it really is unclassifiable. */
457 static struct ip_conntrack_tuple_hash *
458 init_conntrack(const struct ip_conntrack_tuple *tuple,
459                struct ip_conntrack_protocol *protocol,
460                struct sk_buff *skb)
461 {
462         struct ip_conntrack *conntrack;
463         struct ip_conntrack_tuple repl_tuple;
464         size_t hash;
465         struct ip_conntrack_expect *exp;
466
467         if (!ip_conntrack_hash_rnd_initted) {
468                 get_random_bytes(&ip_conntrack_hash_rnd, 4);
469                 ip_conntrack_hash_rnd_initted = 1;
470         }
471
472         hash = hash_conntrack(tuple);
473
474         if (ip_conntrack_max
475             && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
476                 /* Try dropping from this hash chain. */
477                 if (!early_drop(&ip_conntrack_hash[hash])) {
478                         if (net_ratelimit())
479                                 printk(KERN_WARNING
480                                        "ip_conntrack: table full, dropping"
481                                        " packet.\n");
482                         return ERR_PTR(-ENOMEM);
483                 }
484         }
485
486         if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
487                 DEBUGP("Can't invert tuple.\n");
488                 return NULL;
489         }
490
491         conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
492         if (!conntrack) {
493                 DEBUGP("Can't allocate conntrack.\n");
494                 return ERR_PTR(-ENOMEM);
495         }
496
497         memset(conntrack, 0, sizeof(*conntrack));
498         atomic_set(&conntrack->ct_general.use, 1);
499         conntrack->ct_general.destroy = destroy_conntrack;
500         conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
501         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
502         if (!protocol->new(conntrack, skb)) {
503                 kmem_cache_free(ip_conntrack_cachep, conntrack);
504                 return NULL;
505         }
506         /* Don't set timer yet: wait for confirmation */
507         init_timer(&conntrack->timeout);
508         conntrack->timeout.data = (unsigned long)conntrack;
509         conntrack->timeout.function = death_by_timeout;
510
511         WRITE_LOCK(&ip_conntrack_lock);
512         exp = find_expectation(tuple);
513
514         if (exp) {
515                 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
516                         conntrack, exp);
517                 /* Welcome, Mr. Bond.  We've been expecting you... */
518                 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
519                 conntrack->master = exp->master;
520 #if CONFIG_IP_NF_CONNTRACK_MARK
521                 conntrack->mark = exp->master->mark;
522 #endif
523                 nf_conntrack_get(&conntrack->master->ct_general);
524                 CONNTRACK_STAT_INC(expect_new);
525         } else {
526                 conntrack->helper = ip_ct_find_helper(&repl_tuple);
527
528                 CONNTRACK_STAT_INC(new);
529         }
530
531         /* Overload tuple linked list to put us in unconfirmed list. */
532         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
533
534         atomic_inc(&ip_conntrack_count);
535         WRITE_UNLOCK(&ip_conntrack_lock);
536
537         if (exp) {
538                 if (exp->expectfn)
539                         exp->expectfn(conntrack, exp);
540                 destroy_expect(exp);
541         }
542
543         return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
544 }
545
546 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
547 static inline struct ip_conntrack *
548 resolve_normal_ct(struct sk_buff *skb,
549                   struct ip_conntrack_protocol *proto,
550                   int *set_reply,
551                   unsigned int hooknum,
552                   enum ip_conntrack_info *ctinfo)
553 {
554         struct ip_conntrack_tuple tuple;
555         struct ip_conntrack_tuple_hash *h;
556         struct ip_conntrack *ct;
557
558         IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
559
560         if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, 
561                                 &tuple,proto))
562                 return NULL;
563
564         /* look for tuple match */
565         h = ip_conntrack_find_get(&tuple, NULL);
566         if (!h) {
567                 h = init_conntrack(&tuple, proto, skb);
568                 if (!h)
569                         return NULL;
570                 if (IS_ERR(h))
571                         return (void *)h;
572         }
573         ct = tuplehash_to_ctrack(h);
574
575         /* It exists; we have (non-exclusive) reference. */
576         if (DIRECTION(h) == IP_CT_DIR_REPLY) {
577                 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
578                 /* Please set reply bit if this packet OK */
579                 *set_reply = 1;
580         } else {
581                 /* Once we've had two way comms, always ESTABLISHED. */
582                 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
583                         DEBUGP("ip_conntrack_in: normal packet for %p\n",
584                                ct);
585                         *ctinfo = IP_CT_ESTABLISHED;
586                 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
587                         DEBUGP("ip_conntrack_in: related packet for %p\n",
588                                ct);
589                         *ctinfo = IP_CT_RELATED;
590                 } else {
591                         DEBUGP("ip_conntrack_in: new packet for %p\n",
592                                ct);
593                         *ctinfo = IP_CT_NEW;
594                 }
595                 *set_reply = 0;
596         }
597         skb->nfct = &ct->ct_general;
598         skb->nfctinfo = *ctinfo;
599         return ct;
600 }
601
602 /* Netfilter hook itself. */
603 unsigned int ip_conntrack_in(unsigned int hooknum,
604                              struct sk_buff **pskb,
605                              const struct net_device *in,
606                              const struct net_device *out,
607                              int (*okfn)(struct sk_buff *))
608 {
609         struct ip_conntrack *ct;
610         enum ip_conntrack_info ctinfo;
611         struct ip_conntrack_protocol *proto;
612         int set_reply;
613         int ret;
614
615         /* Previously seen (loopback or untracked)?  Ignore. */
616         if ((*pskb)->nfct) {
617                 CONNTRACK_STAT_INC(ignore);
618                 return NF_ACCEPT;
619         }
620
621         /* Never happen */
622         if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
623                 if (net_ratelimit()) {
624                 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
625                        (*pskb)->nh.iph->protocol, hooknum);
626                 }
627                 return NF_DROP;
628         }
629
630         /* FIXME: Do this right please. --RR */
631         (*pskb)->nfcache |= NFC_UNKNOWN;
632
633 /* Doesn't cover locally-generated broadcast, so not worth it. */
634 #if 0
635         /* Ignore broadcast: no `connection'. */
636         if ((*pskb)->pkt_type == PACKET_BROADCAST) {
637                 printk("Broadcast packet!\n");
638                 return NF_ACCEPT;
639         } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) 
640                    == htonl(0x000000FF)) {
641                 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
642                        NIPQUAD((*pskb)->nh.iph->saddr),
643                        NIPQUAD((*pskb)->nh.iph->daddr),
644                        (*pskb)->sk, (*pskb)->pkt_type);
645         }
646 #endif
647
648         proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
649
650         /* It may be an special packet, error, unclean...
651          * inverse of the return code tells to the netfilter
652          * core what to do with the packet. */
653         if (proto->error != NULL 
654             && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
655                 CONNTRACK_STAT_INC(error);
656                 CONNTRACK_STAT_INC(invalid);
657                 return -ret;
658         }
659
660         if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
661                 /* Not valid part of a connection */
662                 CONNTRACK_STAT_INC(invalid);
663                 return NF_ACCEPT;
664         }
665
666         if (IS_ERR(ct)) {
667                 /* Too stressed to deal. */
668                 CONNTRACK_STAT_INC(drop);
669                 return NF_DROP;
670         }
671
672         IP_NF_ASSERT((*pskb)->nfct);
673
674         ret = proto->packet(ct, *pskb, ctinfo);
675         if (ret < 0) {
676                 /* Invalid: inverse of the return code tells
677                  * the netfilter core what to do*/
678                 nf_conntrack_put((*pskb)->nfct);
679                 (*pskb)->nfct = NULL;
680                 CONNTRACK_STAT_INC(invalid);
681                 return -ret;
682         }
683
684         if (set_reply)
685                 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
686
687         return ret;
688 }
689
690 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
691                    const struct ip_conntrack_tuple *orig)
692 {
693         return ip_ct_invert_tuple(inverse, orig, 
694                                   ip_ct_find_proto(orig->dst.protonum));
695 }
696
697 /* Would two expected things clash? */
698 static inline int expect_clash(const struct ip_conntrack_expect *a,
699                                const struct ip_conntrack_expect *b)
700 {
701         /* Part covered by intersection of masks must be unequal,
702            otherwise they clash */
703         struct ip_conntrack_tuple intersect_mask
704                 = { { a->mask.src.ip & b->mask.src.ip,
705                       { a->mask.src.u.all & b->mask.src.u.all } },
706                     { a->mask.dst.ip & b->mask.dst.ip,
707                       { a->mask.dst.u.all & b->mask.dst.u.all },
708                       a->mask.dst.protonum & b->mask.dst.protonum } };
709
710         return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
711 }
712
713 static inline int expect_matches(const struct ip_conntrack_expect *a,
714                                  const struct ip_conntrack_expect *b)
715 {
716         return a->master == b->master
717                 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
718                 && ip_ct_tuple_equal(&a->mask, &b->mask);
719 }
720
721 /* Generally a bad idea to call this: could have matched already. */
722 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
723 {
724         struct ip_conntrack_expect *i;
725
726         WRITE_LOCK(&ip_conntrack_lock);
727         /* choose the the oldest expectation to evict */
728         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
729                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
730                         unlink_expect(i);
731                         WRITE_UNLOCK(&ip_conntrack_lock);
732                         destroy_expect(i);
733                         return;
734                 }
735         }
736         WRITE_UNLOCK(&ip_conntrack_lock);
737 }
738
739 struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
740 {
741         struct ip_conntrack_expect *new;
742
743         new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
744         if (!new) {
745                 DEBUGP("expect_related: OOM allocating expect\n");
746                 return NULL;
747         }
748         new->master = NULL;
749         return new;
750 }
751
752 void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
753 {
754         kmem_cache_free(ip_conntrack_expect_cachep, expect);
755 }
756
757 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
758 {
759         atomic_inc(&exp->master->ct_general.use);
760         exp->master->expecting++;
761         list_add(&exp->list, &ip_conntrack_expect_list);
762
763         if (exp->master->helper->timeout) {
764                 init_timer(&exp->timeout);
765                 exp->timeout.data = (unsigned long)exp;
766                 exp->timeout.function = expectation_timed_out;
767                 exp->timeout.expires
768                         = jiffies + exp->master->helper->timeout * HZ;
769                 add_timer(&exp->timeout);
770         } else
771                 exp->timeout.function = NULL;
772
773         CONNTRACK_STAT_INC(expect_create);
774 }
775
776 /* Race with expectations being used means we could have none to find; OK. */
777 static void evict_oldest_expect(struct ip_conntrack *master)
778 {
779         struct ip_conntrack_expect *i;
780
781         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
782                 if (i->master == master) {
783                         if (del_timer(&i->timeout)) {
784                                 unlink_expect(i);
785                                 destroy_expect(i);
786                         }
787                         break;
788                 }
789         }
790 }
791
792 static inline int refresh_timer(struct ip_conntrack_expect *i)
793 {
794         if (!del_timer(&i->timeout))
795                 return 0;
796
797         i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
798         add_timer(&i->timeout);
799         return 1;
800 }
801
802 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
803 {
804         struct ip_conntrack_expect *i;
805         int ret;
806
807         DEBUGP("ip_conntrack_expect_related %p\n", related_to);
808         DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
809         DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
810
811         WRITE_LOCK(&ip_conntrack_lock);
812         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
813                 if (expect_matches(i, expect)) {
814                         /* Refresh timer: if it's dying, ignore.. */
815                         if (refresh_timer(i)) {
816                                 ret = 0;
817                                 /* We don't need the one they've given us. */
818                                 ip_conntrack_expect_free(expect);
819                                 goto out;
820                         }
821                 } else if (expect_clash(i, expect)) {
822                         ret = -EBUSY;
823                         goto out;
824                 }
825         }
826
827         /* Will be over limit? */
828         if (expect->master->helper->max_expected && 
829             expect->master->expecting >= expect->master->helper->max_expected)
830                 evict_oldest_expect(expect->master);
831
832         ip_conntrack_expect_insert(expect);
833         ret = 0;
834 out:
835         WRITE_UNLOCK(&ip_conntrack_lock);
836         return ret;
837 }
838
839 /* Alter reply tuple (maybe alter helper).  This is for NAT, and is
840    implicitly racy: see __ip_conntrack_confirm */
841 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
842                               const struct ip_conntrack_tuple *newreply)
843 {
844         WRITE_LOCK(&ip_conntrack_lock);
845         /* Should be unconfirmed, so not in hash table yet */
846         IP_NF_ASSERT(!is_confirmed(conntrack));
847
848         DEBUGP("Altering reply tuple of %p to ", conntrack);
849         DUMP_TUPLE(newreply);
850
851         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
852         if (!conntrack->master && conntrack->expecting == 0)
853                 conntrack->helper = ip_ct_find_helper(newreply);
854         WRITE_UNLOCK(&ip_conntrack_lock);
855 }
856
857 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
858 {
859         BUG_ON(me->timeout == 0);
860         WRITE_LOCK(&ip_conntrack_lock);
861         list_prepend(&helpers, me);
862         WRITE_UNLOCK(&ip_conntrack_lock);
863
864         return 0;
865 }
866
867 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
868                          const struct ip_conntrack_helper *me)
869 {
870         if (tuplehash_to_ctrack(i)->helper == me)
871                 tuplehash_to_ctrack(i)->helper = NULL;
872         return 0;
873 }
874
875 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
876 {
877         unsigned int i;
878         struct ip_conntrack_expect *exp, *tmp;
879
880         /* Need write lock here, to delete helper. */
881         WRITE_LOCK(&ip_conntrack_lock);
882         LIST_DELETE(&helpers, me);
883
884         /* Get rid of expectations */
885         list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
886                 if (exp->master->helper == me && del_timer(&exp->timeout)) {
887                         unlink_expect(exp);
888                         destroy_expect(exp);
889                 }
890         }
891         /* Get rid of expecteds, set helpers to NULL. */
892         LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
893         for (i = 0; i < ip_conntrack_htable_size; i++)
894                 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
895                             struct ip_conntrack_tuple_hash *, me);
896         WRITE_UNLOCK(&ip_conntrack_lock);
897
898         /* Someone could be still looking at the helper in a bh. */
899         synchronize_net();
900 }
901
902 static inline void ct_add_counters(struct ip_conntrack *ct,
903                                    enum ip_conntrack_info ctinfo,
904                                    const struct sk_buff *skb)
905 {
906 #ifdef CONFIG_IP_NF_CT_ACCT
907         if (skb) {
908                 ct->counters[CTINFO2DIR(ctinfo)].packets++;
909                 ct->counters[CTINFO2DIR(ctinfo)].bytes += 
910                                         ntohs(skb->nh.iph->tot_len);
911         }
912 #endif
913 }
914
915 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
916 void ip_ct_refresh_acct(struct ip_conntrack *ct, 
917                         enum ip_conntrack_info ctinfo,
918                         const struct sk_buff *skb,
919                         unsigned long extra_jiffies)
920 {
921         IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
922
923         /* If not in hash table, timer will not be active yet */
924         if (!is_confirmed(ct)) {
925                 ct->timeout.expires = extra_jiffies;
926                 ct_add_counters(ct, ctinfo, skb);
927         } else {
928                 WRITE_LOCK(&ip_conntrack_lock);
929                 /* Need del_timer for race avoidance (may already be dying). */
930                 if (del_timer(&ct->timeout)) {
931                         ct->timeout.expires = jiffies + extra_jiffies;
932                         add_timer(&ct->timeout);
933                 }
934                 ct_add_counters(ct, ctinfo, skb);
935                 WRITE_UNLOCK(&ip_conntrack_lock);
936         }
937 }
938
939 /* Returns new sk_buff, or NULL */
940 struct sk_buff *
941 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
942 {
943 #ifdef CONFIG_NETFILTER_DEBUG
944         unsigned int olddebug = skb->nf_debug;
945 #endif
946
947         skb_orphan(skb);
948
949         local_bh_disable(); 
950         skb = ip_defrag(skb, user);
951         local_bh_enable();
952
953         if (skb) {
954                 ip_send_check(skb->nh.iph);
955                 skb->nfcache |= NFC_ALTERED;
956 #ifdef CONFIG_NETFILTER_DEBUG
957                 /* Packet path as if nothing had happened. */
958                 skb->nf_debug = olddebug;
959 #endif
960         }
961
962         return skb;
963 }
964
965 /* Used by ipt_REJECT. */
966 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
967 {
968         struct ip_conntrack *ct;
969         enum ip_conntrack_info ctinfo;
970
971         /* This ICMP is in reverse direction to the packet which caused it */
972         ct = ip_conntrack_get(skb, &ctinfo);
973         
974         if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
975                 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
976         else
977                 ctinfo = IP_CT_RELATED;
978
979         /* Attach to new skbuff, and increment count */
980         nskb->nfct = &ct->ct_general;
981         nskb->nfctinfo = ctinfo;
982         nf_conntrack_get(nskb->nfct);
983 }
984
985 static inline int
986 do_iter(const struct ip_conntrack_tuple_hash *i,
987         int (*iter)(struct ip_conntrack *i, void *data),
988         void *data)
989 {
990         return iter(tuplehash_to_ctrack(i), data);
991 }
992
993 /* Bring out ya dead! */
994 static struct ip_conntrack_tuple_hash *
995 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
996                 void *data, unsigned int *bucket)
997 {
998         struct ip_conntrack_tuple_hash *h = NULL;
999
1000         WRITE_LOCK(&ip_conntrack_lock);
1001         for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1002                 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1003                                 struct ip_conntrack_tuple_hash *, iter, data);
1004                 if (h)
1005                         break;
1006         }
1007         if (!h)
1008                 h = LIST_FIND_W(&unconfirmed, do_iter,
1009                                 struct ip_conntrack_tuple_hash *, iter, data);
1010         if (h)
1011                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1012         WRITE_UNLOCK(&ip_conntrack_lock);
1013
1014         return h;
1015 }
1016
1017 void
1018 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1019 {
1020         struct ip_conntrack_tuple_hash *h;
1021         unsigned int bucket = 0;
1022
1023         while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1024                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1025                 /* Time to push up daises... */
1026                 if (del_timer(&ct->timeout))
1027                         death_by_timeout((unsigned long)ct);
1028                 /* ... else the timer will get him soon. */
1029
1030                 ip_conntrack_put(ct);
1031         }
1032 }
1033
1034 /* Fast function for those who don't want to parse /proc (and I don't
1035    blame them). */
1036 /* Reversing the socket's dst/src point of view gives us the reply
1037    mapping. */
1038 static int
1039 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1040 {
1041         struct inet_sock *inet = inet_sk(sk);
1042         struct ip_conntrack_tuple_hash *h;
1043         struct ip_conntrack_tuple tuple;
1044         
1045         IP_CT_TUPLE_U_BLANK(&tuple);
1046         tuple.src.ip = inet->rcv_saddr;
1047         tuple.src.u.tcp.port = inet->sport;
1048         tuple.dst.ip = inet->daddr;
1049         tuple.dst.u.tcp.port = inet->dport;
1050         tuple.dst.protonum = IPPROTO_TCP;
1051
1052         /* We only do TCP at the moment: is there a better way? */
1053         if (strcmp(sk->sk_prot->name, "TCP")) {
1054                 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1055                 return -ENOPROTOOPT;
1056         }
1057
1058         if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1059                 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1060                        *len, sizeof(struct sockaddr_in));
1061                 return -EINVAL;
1062         }
1063
1064         h = ip_conntrack_find_get(&tuple, NULL);
1065         if (h) {
1066                 struct sockaddr_in sin;
1067                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1068
1069                 sin.sin_family = AF_INET;
1070                 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1071                         .tuple.dst.u.tcp.port;
1072                 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1073                         .tuple.dst.ip;
1074
1075                 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1076                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1077                 ip_conntrack_put(ct);
1078                 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1079                         return -EFAULT;
1080                 else
1081                         return 0;
1082         }
1083         DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1084                NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1085                NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1086         return -ENOENT;
1087 }
1088
1089 static struct nf_sockopt_ops so_getorigdst = {
1090         .pf             = PF_INET,
1091         .get_optmin     = SO_ORIGINAL_DST,
1092         .get_optmax     = SO_ORIGINAL_DST+1,
1093         .get            = &getorigdst,
1094 };
1095
1096 static int kill_all(struct ip_conntrack *i, void *data)
1097 {
1098         return 1;
1099 }
1100
1101 static void free_conntrack_hash(void)
1102 {
1103         if (ip_conntrack_vmalloc)
1104                 vfree(ip_conntrack_hash);
1105         else
1106                 free_pages((unsigned long)ip_conntrack_hash, 
1107                            get_order(sizeof(struct list_head)
1108                                      * ip_conntrack_htable_size));
1109 }
1110
1111 /* Mishearing the voices in his head, our hero wonders how he's
1112    supposed to kill the mall. */
1113 void ip_conntrack_cleanup(void)
1114 {
1115         ip_ct_attach = NULL;
1116         /* This makes sure all current packets have passed through
1117            netfilter framework.  Roll on, two-stage module
1118            delete... */
1119         synchronize_net();
1120  
1121  i_see_dead_people:
1122         ip_ct_iterate_cleanup(kill_all, NULL);
1123         if (atomic_read(&ip_conntrack_count) != 0) {
1124                 schedule();
1125                 goto i_see_dead_people;
1126         }
1127
1128         kmem_cache_destroy(ip_conntrack_cachep);
1129         kmem_cache_destroy(ip_conntrack_expect_cachep);
1130         free_conntrack_hash();
1131         nf_unregister_sockopt(&so_getorigdst);
1132 }
1133
1134 static int hashsize;
1135 module_param(hashsize, int, 0400);
1136
1137 int __init ip_conntrack_init(void)
1138 {
1139         unsigned int i;
1140         int ret;
1141
1142         /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
1143          * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
1144         if (hashsize) {
1145                 ip_conntrack_htable_size = hashsize;
1146         } else {
1147                 ip_conntrack_htable_size
1148                         = (((num_physpages << PAGE_SHIFT) / 16384)
1149                            / sizeof(struct list_head));
1150                 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1151                         ip_conntrack_htable_size = 8192;
1152                 if (ip_conntrack_htable_size < 16)
1153                         ip_conntrack_htable_size = 16;
1154         }
1155         ip_conntrack_max = 8 * ip_conntrack_htable_size;
1156
1157         printk("ip_conntrack version %s (%u buckets, %d max)"
1158                " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1159                ip_conntrack_htable_size, ip_conntrack_max,
1160                sizeof(struct ip_conntrack));
1161
1162         ret = nf_register_sockopt(&so_getorigdst);
1163         if (ret != 0) {
1164                 printk(KERN_ERR "Unable to register netfilter socket option\n");
1165                 return ret;
1166         }
1167
1168         /* AK: the hash table is twice as big than needed because it
1169            uses list_head.  it would be much nicer to caches to use a
1170            single pointer list head here. */
1171         ip_conntrack_vmalloc = 0; 
1172         ip_conntrack_hash 
1173                 =(void*)__get_free_pages(GFP_KERNEL, 
1174                                          get_order(sizeof(struct list_head)
1175                                                    *ip_conntrack_htable_size));
1176         if (!ip_conntrack_hash) { 
1177                 ip_conntrack_vmalloc = 1;
1178                 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1179                 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1180                                             * ip_conntrack_htable_size);
1181         }
1182         if (!ip_conntrack_hash) {
1183                 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1184                 goto err_unreg_sockopt;
1185         }
1186
1187         ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1188                                                 sizeof(struct ip_conntrack), 0,
1189                                                 0, NULL, NULL);
1190         if (!ip_conntrack_cachep) {
1191                 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1192                 goto err_free_hash;
1193         }
1194
1195         ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1196                                         sizeof(struct ip_conntrack_expect),
1197                                         0, 0, NULL, NULL);
1198         if (!ip_conntrack_expect_cachep) {
1199                 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1200                 goto err_free_conntrack_slab;
1201         }
1202
1203         /* Don't NEED lock here, but good form anyway. */
1204         WRITE_LOCK(&ip_conntrack_lock);
1205         for (i = 0; i < MAX_IP_CT_PROTO; i++)
1206                 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1207         /* Sew in builtin protocols. */
1208         ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1209         ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1210         ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1211         WRITE_UNLOCK(&ip_conntrack_lock);
1212
1213         for (i = 0; i < ip_conntrack_htable_size; i++)
1214                 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1215
1216         /* For use by ipt_REJECT */
1217         ip_ct_attach = ip_conntrack_attach;
1218
1219         /* Set up fake conntrack:
1220             - to never be deleted, not in any hashes */
1221         atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1222         /*  - and look it like as a confirmed connection */
1223         set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1224
1225         return ret;
1226
1227 err_free_conntrack_slab:
1228         kmem_cache_destroy(ip_conntrack_cachep);
1229 err_free_hash:
1230         free_conntrack_hash();
1231 err_unreg_sockopt:
1232         nf_unregister_sockopt(&so_getorigdst);
1233
1234         return -ENOMEM;
1235 }