sch_atm.c: get rid of poinless extern
[pandora-kernel.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <linux/moduleparam.h>
24 #include <linux/export.h>
25 #include <net/net_namespace.h>
26
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_tuple.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37 unsigned int nf_ct_expect_max __read_mostly;
38
39 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40
41 /* nf_conntrack_expect helper functions */
42 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43                                 u32 pid, int report)
44 {
45         struct nf_conn_help *master_help = nfct_help(exp->master);
46         struct net *net = nf_ct_exp_net(exp);
47
48         NF_CT_ASSERT(master_help);
49         NF_CT_ASSERT(!timer_pending(&exp->timeout));
50
51         hlist_del_rcu(&exp->hnode);
52         net->ct.expect_count--;
53
54         hlist_del(&exp->lnode);
55         master_help->expecting[exp->class]--;
56
57         nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58         nf_ct_expect_put(exp);
59
60         NF_CT_STAT_INC(net, expect_delete);
61 }
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
63
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66         struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68         spin_lock_bh(&nf_conntrack_lock);
69         nf_ct_unlink_expect(exp);
70         spin_unlock_bh(&nf_conntrack_lock);
71         nf_ct_expect_put(exp);
72 }
73
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76         unsigned int hash;
77
78         if (unlikely(!nf_conntrack_hash_rnd)) {
79                 init_nf_conntrack_hash_rnd();
80         }
81
82         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84                        (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85         return ((u64)hash * nf_ct_expect_hsize) >> 32;
86 }
87
88 struct nf_conntrack_expect *
89 __nf_ct_expect_find(struct net *net, u16 zone,
90                     const struct nf_conntrack_tuple *tuple)
91 {
92         struct nf_conntrack_expect *i;
93         struct hlist_node *n;
94         unsigned int h;
95
96         if (!net->ct.expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102                     nf_ct_zone(i->master) == zone)
103                         return i;
104         }
105         return NULL;
106 }
107 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109 /* Just find a expectation corresponding to a tuple. */
110 struct nf_conntrack_expect *
111 nf_ct_expect_find_get(struct net *net, u16 zone,
112                       const struct nf_conntrack_tuple *tuple)
113 {
114         struct nf_conntrack_expect *i;
115
116         rcu_read_lock();
117         i = __nf_ct_expect_find(net, zone, tuple);
118         if (i && !atomic_inc_not_zero(&i->use))
119                 i = NULL;
120         rcu_read_unlock();
121
122         return i;
123 }
124 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126 /* If an expectation for this connection is found, it gets delete from
127  * global list then returned. */
128 struct nf_conntrack_expect *
129 nf_ct_find_expectation(struct net *net, u16 zone,
130                        const struct nf_conntrack_tuple *tuple)
131 {
132         struct nf_conntrack_expect *i, *exp = NULL;
133         struct hlist_node *n;
134         unsigned int h;
135
136         if (!net->ct.expect_count)
137                 return NULL;
138
139         h = nf_ct_expect_dst_hash(tuple);
140         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
141                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
142                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
143                     nf_ct_zone(i->master) == zone) {
144                         exp = i;
145                         break;
146                 }
147         }
148         if (!exp)
149                 return NULL;
150
151         /* If master is not in hash table yet (ie. packet hasn't left
152            this machine yet), how can other end know about expected?
153            Hence these are not the droids you are looking for (if
154            master ct never got confirmed, we'd hold a reference to it
155            and weird things would happen to future packets). */
156         if (!nf_ct_is_confirmed(exp->master))
157                 return NULL;
158
159         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
160                 atomic_inc(&exp->use);
161                 return exp;
162         } else if (del_timer(&exp->timeout)) {
163                 nf_ct_unlink_expect(exp);
164                 return exp;
165         }
166
167         return NULL;
168 }
169
170 /* delete all expectations for this conntrack */
171 void nf_ct_remove_expectations(struct nf_conn *ct)
172 {
173         struct nf_conn_help *help = nfct_help(ct);
174         struct nf_conntrack_expect *exp;
175         struct hlist_node *n, *next;
176
177         /* Optimization: most connection never expect any others. */
178         if (!help)
179                 return;
180
181         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
182                 if (del_timer(&exp->timeout)) {
183                         nf_ct_unlink_expect(exp);
184                         nf_ct_expect_put(exp);
185                 }
186         }
187 }
188 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
189
190 /* Would two expected things clash? */
191 static inline int expect_clash(const struct nf_conntrack_expect *a,
192                                const struct nf_conntrack_expect *b)
193 {
194         /* Part covered by intersection of masks must be unequal,
195            otherwise they clash */
196         struct nf_conntrack_tuple_mask intersect_mask;
197         int count;
198
199         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
200
201         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
202                 intersect_mask.src.u3.all[count] =
203                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204         }
205
206         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207 }
208
209 static inline int expect_matches(const struct nf_conntrack_expect *a,
210                                  const struct nf_conntrack_expect *b)
211 {
212         return a->master == b->master && a->class == b->class &&
213                 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214                 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215                 nf_ct_zone(a->master) == nf_ct_zone(b->master);
216 }
217
218 /* Generally a bad idea to call this: could have matched already. */
219 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
220 {
221         spin_lock_bh(&nf_conntrack_lock);
222         if (del_timer(&exp->timeout)) {
223                 nf_ct_unlink_expect(exp);
224                 nf_ct_expect_put(exp);
225         }
226         spin_unlock_bh(&nf_conntrack_lock);
227 }
228 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
229
230 /* We don't increase the master conntrack refcount for non-fulfilled
231  * conntracks. During the conntrack destruction, the expectations are
232  * always killed before the conntrack itself */
233 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
234 {
235         struct nf_conntrack_expect *new;
236
237         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238         if (!new)
239                 return NULL;
240
241         new->master = me;
242         atomic_set(&new->use, 1);
243         return new;
244 }
245 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
246
247 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
248                        u_int8_t family,
249                        const union nf_inet_addr *saddr,
250                        const union nf_inet_addr *daddr,
251                        u_int8_t proto, const __be16 *src, const __be16 *dst)
252 {
253         int len;
254
255         if (family == AF_INET)
256                 len = 4;
257         else
258                 len = 16;
259
260         exp->flags = 0;
261         exp->class = class;
262         exp->expectfn = NULL;
263         exp->helper = NULL;
264         exp->tuple.src.l3num = family;
265         exp->tuple.dst.protonum = proto;
266
267         if (saddr) {
268                 memcpy(&exp->tuple.src.u3, saddr, len);
269                 if (sizeof(exp->tuple.src.u3) > len)
270                         /* address needs to be cleared for nf_ct_tuple_equal */
271                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
272                                sizeof(exp->tuple.src.u3) - len);
273                 memset(&exp->mask.src.u3, 0xFF, len);
274                 if (sizeof(exp->mask.src.u3) > len)
275                         memset((void *)&exp->mask.src.u3 + len, 0x00,
276                                sizeof(exp->mask.src.u3) - len);
277         } else {
278                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280         }
281
282         if (src) {
283                 exp->tuple.src.u.all = *src;
284                 exp->mask.src.u.all = htons(0xFFFF);
285         } else {
286                 exp->tuple.src.u.all = 0;
287                 exp->mask.src.u.all = 0;
288         }
289
290         memcpy(&exp->tuple.dst.u3, daddr, len);
291         if (sizeof(exp->tuple.dst.u3) > len)
292                 /* address needs to be cleared for nf_ct_tuple_equal */
293                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294                        sizeof(exp->tuple.dst.u3) - len);
295
296         exp->tuple.dst.u.all = *dst;
297 }
298 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
299
300 static void nf_ct_expect_free_rcu(struct rcu_head *head)
301 {
302         struct nf_conntrack_expect *exp;
303
304         exp = container_of(head, struct nf_conntrack_expect, rcu);
305         kmem_cache_free(nf_ct_expect_cachep, exp);
306 }
307
308 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
309 {
310         if (atomic_dec_and_test(&exp->use))
311                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312 }
313 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
314
315 static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316 {
317         struct nf_conn_help *master_help = nfct_help(exp->master);
318         struct nf_conntrack_helper *helper;
319         struct net *net = nf_ct_exp_net(exp);
320         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321
322         /* two references : one for hash insert, one for the timer */
323         atomic_add(2, &exp->use);
324
325         hlist_add_head(&exp->lnode, &master_help->expectations);
326         master_help->expecting[exp->class]++;
327
328         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
329         net->ct.expect_count++;
330
331         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
332                     (unsigned long)exp);
333         helper = rcu_dereference_protected(master_help->helper,
334                                            lockdep_is_held(&nf_conntrack_lock));
335         if (helper) {
336                 exp->timeout.expires = jiffies +
337                         helper->expect_policy[exp->class].timeout * HZ;
338         }
339         add_timer(&exp->timeout);
340
341         NF_CT_STAT_INC(net, expect_create);
342         return 0;
343 }
344
345 /* Race with expectations being used means we could have none to find; OK. */
346 static void evict_oldest_expect(struct nf_conn *master,
347                                 struct nf_conntrack_expect *new)
348 {
349         struct nf_conn_help *master_help = nfct_help(master);
350         struct nf_conntrack_expect *exp, *last = NULL;
351         struct hlist_node *n;
352
353         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
354                 if (exp->class == new->class)
355                         last = exp;
356         }
357
358         if (last && del_timer(&last->timeout)) {
359                 nf_ct_unlink_expect(last);
360                 nf_ct_expect_put(last);
361         }
362 }
363
364 static inline int refresh_timer(struct nf_conntrack_expect *i)
365 {
366         struct nf_conn_help *master_help = nfct_help(i->master);
367         const struct nf_conntrack_expect_policy *p;
368
369         if (!del_timer(&i->timeout))
370                 return 0;
371
372         p = &rcu_dereference_protected(
373                 master_help->helper,
374                 lockdep_is_held(&nf_conntrack_lock)
375                 )->expect_policy[i->class];
376         i->timeout.expires = jiffies + p->timeout * HZ;
377         add_timer(&i->timeout);
378         return 1;
379 }
380
381 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
382 {
383         const struct nf_conntrack_expect_policy *p;
384         struct nf_conntrack_expect *i;
385         struct nf_conn *master = expect->master;
386         struct nf_conn_help *master_help = nfct_help(master);
387         struct nf_conntrack_helper *helper;
388         struct net *net = nf_ct_exp_net(expect);
389         struct hlist_node *n;
390         unsigned int h;
391         int ret = 1;
392
393         if (!master_help) {
394                 ret = -ESHUTDOWN;
395                 goto out;
396         }
397         h = nf_ct_expect_dst_hash(&expect->tuple);
398         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
399                 if (expect_matches(i, expect)) {
400                         /* Refresh timer: if it's dying, ignore.. */
401                         if (refresh_timer(i)) {
402                                 ret = 0;
403                                 goto out;
404                         }
405                 } else if (expect_clash(i, expect)) {
406                         ret = -EBUSY;
407                         goto out;
408                 }
409         }
410         /* Will be over limit? */
411         helper = rcu_dereference_protected(master_help->helper,
412                                            lockdep_is_held(&nf_conntrack_lock));
413         if (helper) {
414                 p = &helper->expect_policy[expect->class];
415                 if (p->max_expected &&
416                     master_help->expecting[expect->class] >= p->max_expected) {
417                         evict_oldest_expect(master, expect);
418                         if (master_help->expecting[expect->class]
419                                                 >= p->max_expected) {
420                                 ret = -EMFILE;
421                                 goto out;
422                         }
423                 }
424         }
425
426         if (net->ct.expect_count >= nf_ct_expect_max) {
427                 net_warn_ratelimited("nf_conntrack: expectation table full\n");
428                 ret = -EMFILE;
429         }
430 out:
431         return ret;
432 }
433
434 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
435                                 u32 pid, int report)
436 {
437         int ret;
438
439         spin_lock_bh(&nf_conntrack_lock);
440         ret = __nf_ct_expect_check(expect);
441         if (ret <= 0)
442                 goto out;
443
444         ret = nf_ct_expect_insert(expect);
445         if (ret < 0)
446                 goto out;
447         spin_unlock_bh(&nf_conntrack_lock);
448         nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
449         return ret;
450 out:
451         spin_unlock_bh(&nf_conntrack_lock);
452         return ret;
453 }
454 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
455
456 #ifdef CONFIG_NF_CONNTRACK_PROCFS
457 struct ct_expect_iter_state {
458         struct seq_net_private p;
459         unsigned int bucket;
460 };
461
462 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
463 {
464         struct net *net = seq_file_net(seq);
465         struct ct_expect_iter_state *st = seq->private;
466         struct hlist_node *n;
467
468         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
469                 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
470                 if (n)
471                         return n;
472         }
473         return NULL;
474 }
475
476 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
477                                              struct hlist_node *head)
478 {
479         struct net *net = seq_file_net(seq);
480         struct ct_expect_iter_state *st = seq->private;
481
482         head = rcu_dereference(hlist_next_rcu(head));
483         while (head == NULL) {
484                 if (++st->bucket >= nf_ct_expect_hsize)
485                         return NULL;
486                 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
487         }
488         return head;
489 }
490
491 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
492 {
493         struct hlist_node *head = ct_expect_get_first(seq);
494
495         if (head)
496                 while (pos && (head = ct_expect_get_next(seq, head)))
497                         pos--;
498         return pos ? NULL : head;
499 }
500
501 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
502         __acquires(RCU)
503 {
504         rcu_read_lock();
505         return ct_expect_get_idx(seq, *pos);
506 }
507
508 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
509 {
510         (*pos)++;
511         return ct_expect_get_next(seq, v);
512 }
513
514 static void exp_seq_stop(struct seq_file *seq, void *v)
515         __releases(RCU)
516 {
517         rcu_read_unlock();
518 }
519
520 static int exp_seq_show(struct seq_file *s, void *v)
521 {
522         struct nf_conntrack_expect *expect;
523         struct nf_conntrack_helper *helper;
524         struct hlist_node *n = v;
525         char *delim = "";
526
527         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
528
529         if (expect->timeout.function)
530                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
531                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
532         else
533                 seq_printf(s, "- ");
534         seq_printf(s, "l3proto = %u proto=%u ",
535                    expect->tuple.src.l3num,
536                    expect->tuple.dst.protonum);
537         print_tuple(s, &expect->tuple,
538                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
539                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
540                                        expect->tuple.dst.protonum));
541
542         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
543                 seq_printf(s, "PERMANENT");
544                 delim = ",";
545         }
546         if (expect->flags & NF_CT_EXPECT_INACTIVE) {
547                 seq_printf(s, "%sINACTIVE", delim);
548                 delim = ",";
549         }
550         if (expect->flags & NF_CT_EXPECT_USERSPACE)
551                 seq_printf(s, "%sUSERSPACE", delim);
552
553         helper = rcu_dereference(nfct_help(expect->master)->helper);
554         if (helper) {
555                 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
556                 if (helper->expect_policy[expect->class].name)
557                         seq_printf(s, "/%s",
558                                    helper->expect_policy[expect->class].name);
559         }
560
561         return seq_putc(s, '\n');
562 }
563
564 static const struct seq_operations exp_seq_ops = {
565         .start = exp_seq_start,
566         .next = exp_seq_next,
567         .stop = exp_seq_stop,
568         .show = exp_seq_show
569 };
570
571 static int exp_open(struct inode *inode, struct file *file)
572 {
573         return seq_open_net(inode, file, &exp_seq_ops,
574                         sizeof(struct ct_expect_iter_state));
575 }
576
577 static const struct file_operations exp_file_ops = {
578         .owner   = THIS_MODULE,
579         .open    = exp_open,
580         .read    = seq_read,
581         .llseek  = seq_lseek,
582         .release = seq_release_net,
583 };
584 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
585
586 static int exp_proc_init(struct net *net)
587 {
588 #ifdef CONFIG_NF_CONNTRACK_PROCFS
589         struct proc_dir_entry *proc;
590
591         proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
592         if (!proc)
593                 return -ENOMEM;
594 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
595         return 0;
596 }
597
598 static void exp_proc_remove(struct net *net)
599 {
600 #ifdef CONFIG_NF_CONNTRACK_PROCFS
601         proc_net_remove(net, "nf_conntrack_expect");
602 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
603 }
604
605 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
606
607 int nf_conntrack_expect_init(struct net *net)
608 {
609         int err = -ENOMEM;
610
611         if (net_eq(net, &init_net)) {
612                 if (!nf_ct_expect_hsize) {
613                         nf_ct_expect_hsize = net->ct.htable_size / 256;
614                         if (!nf_ct_expect_hsize)
615                                 nf_ct_expect_hsize = 1;
616                 }
617                 nf_ct_expect_max = nf_ct_expect_hsize * 4;
618         }
619
620         net->ct.expect_count = 0;
621         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
622         if (net->ct.expect_hash == NULL)
623                 goto err1;
624
625         if (net_eq(net, &init_net)) {
626                 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
627                                         sizeof(struct nf_conntrack_expect),
628                                         0, 0, NULL);
629                 if (!nf_ct_expect_cachep)
630                         goto err2;
631         }
632
633         err = exp_proc_init(net);
634         if (err < 0)
635                 goto err3;
636
637         return 0;
638
639 err3:
640         if (net_eq(net, &init_net))
641                 kmem_cache_destroy(nf_ct_expect_cachep);
642 err2:
643         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
644 err1:
645         return err;
646 }
647
648 void nf_conntrack_expect_fini(struct net *net)
649 {
650         exp_proc_remove(net);
651         if (net_eq(net, &init_net)) {
652                 rcu_barrier(); /* Wait for call_rcu() before destroy */
653                 kmem_cache_destroy(nf_ct_expect_cachep);
654         }
655         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
656 }