Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[pandora-kernel.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22
23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_conntrack_core.h>
25 #include <net/netfilter/nf_conntrack_expect.h>
26 #include <net/netfilter/nf_conntrack_helper.h>
27 #include <net/netfilter/nf_conntrack_tuple.h>
28
29 LIST_HEAD(nf_conntrack_expect_list);
30 EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
31
32 struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
33 static unsigned int nf_conntrack_expect_next_id;
34
35 /* nf_conntrack_expect helper functions */
36 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
37 {
38         struct nf_conn_help *master_help = nfct_help(exp->master);
39
40         NF_CT_ASSERT(master_help);
41         NF_CT_ASSERT(!timer_pending(&exp->timeout));
42
43         list_del(&exp->list);
44         NF_CT_STAT_INC(expect_delete);
45         master_help->expecting--;
46         nf_conntrack_expect_put(exp);
47 }
48 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
49
50 static void expectation_timed_out(unsigned long ul_expect)
51 {
52         struct nf_conntrack_expect *exp = (void *)ul_expect;
53
54         write_lock_bh(&nf_conntrack_lock);
55         nf_ct_unlink_expect(exp);
56         write_unlock_bh(&nf_conntrack_lock);
57         nf_conntrack_expect_put(exp);
58 }
59
60 struct nf_conntrack_expect *
61 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
62 {
63         struct nf_conntrack_expect *i;
64
65         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
66                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
67                         return i;
68         }
69         return NULL;
70 }
71 EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
72
73 /* Just find a expectation corresponding to a tuple. */
74 struct nf_conntrack_expect *
75 nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
76 {
77         struct nf_conntrack_expect *i;
78
79         read_lock_bh(&nf_conntrack_lock);
80         i = __nf_conntrack_expect_find(tuple);
81         if (i)
82                 atomic_inc(&i->use);
83         read_unlock_bh(&nf_conntrack_lock);
84
85         return i;
86 }
87 EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
88
89 /* If an expectation for this connection is found, it gets delete from
90  * global list then returned. */
91 struct nf_conntrack_expect *
92 find_expectation(const struct nf_conntrack_tuple *tuple)
93 {
94         struct nf_conntrack_expect *exp;
95
96         exp = __nf_conntrack_expect_find(tuple);
97         if (!exp)
98                 return NULL;
99
100         /* If master is not in hash table yet (ie. packet hasn't left
101            this machine yet), how can other end know about expected?
102            Hence these are not the droids you are looking for (if
103            master ct never got confirmed, we'd hold a reference to it
104            and weird things would happen to future packets). */
105         if (!nf_ct_is_confirmed(exp->master))
106                 return NULL;
107
108         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
109                 atomic_inc(&exp->use);
110                 return exp;
111         } else if (del_timer(&exp->timeout)) {
112                 nf_ct_unlink_expect(exp);
113                 return exp;
114         }
115
116         return NULL;
117 }
118
119 /* delete all expectations for this conntrack */
120 void nf_ct_remove_expectations(struct nf_conn *ct)
121 {
122         struct nf_conntrack_expect *i, *tmp;
123         struct nf_conn_help *help = nfct_help(ct);
124
125         /* Optimization: most connection never expect any others. */
126         if (!help || help->expecting == 0)
127                 return;
128
129         list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
130                 if (i->master == ct && del_timer(&i->timeout)) {
131                         nf_ct_unlink_expect(i);
132                         nf_conntrack_expect_put(i);
133                 }
134         }
135 }
136 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
137
138 /* Would two expected things clash? */
139 static inline int expect_clash(const struct nf_conntrack_expect *a,
140                                const struct nf_conntrack_expect *b)
141 {
142         /* Part covered by intersection of masks must be unequal,
143            otherwise they clash */
144         struct nf_conntrack_tuple intersect_mask;
145         int count;
146
147         intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
148         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
149         intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
150         intersect_mask.dst.protonum = a->mask.dst.protonum
151                                         & b->mask.dst.protonum;
152
153         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
154                 intersect_mask.src.u3.all[count] =
155                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
156         }
157
158         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
159                 intersect_mask.dst.u3.all[count] =
160                         a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
161         }
162
163         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
164 }
165
166 static inline int expect_matches(const struct nf_conntrack_expect *a,
167                                  const struct nf_conntrack_expect *b)
168 {
169         return a->master == b->master
170                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
171                 && nf_ct_tuple_equal(&a->mask, &b->mask);
172 }
173
174 /* Generally a bad idea to call this: could have matched already. */
175 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
176 {
177         struct nf_conntrack_expect *i;
178
179         write_lock_bh(&nf_conntrack_lock);
180         /* choose the the oldest expectation to evict */
181         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
182                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
183                         nf_ct_unlink_expect(i);
184                         write_unlock_bh(&nf_conntrack_lock);
185                         nf_conntrack_expect_put(i);
186                         return;
187                 }
188         }
189         write_unlock_bh(&nf_conntrack_lock);
190 }
191 EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
192
193 /* We don't increase the master conntrack refcount for non-fulfilled
194  * conntracks. During the conntrack destruction, the expectations are
195  * always killed before the conntrack itself */
196 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
197 {
198         struct nf_conntrack_expect *new;
199
200         new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
201         if (!new)
202                 return NULL;
203
204         new->master = me;
205         atomic_set(&new->use, 1);
206         return new;
207 }
208 EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
209
210 void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
211                               union nf_conntrack_address *saddr,
212                               union nf_conntrack_address *daddr,
213                               u_int8_t proto, __be16 *src, __be16 *dst)
214 {
215         int len;
216
217         if (family == AF_INET)
218                 len = 4;
219         else
220                 len = 16;
221
222         exp->flags = 0;
223         exp->expectfn = NULL;
224         exp->helper = NULL;
225         exp->tuple.src.l3num = family;
226         exp->tuple.dst.protonum = proto;
227         exp->mask.src.l3num = 0xFFFF;
228         exp->mask.dst.protonum = 0xFF;
229
230         if (saddr) {
231                 memcpy(&exp->tuple.src.u3, saddr, len);
232                 if (sizeof(exp->tuple.src.u3) > len)
233                         /* address needs to be cleared for nf_ct_tuple_equal */
234                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
235                                sizeof(exp->tuple.src.u3) - len);
236                 memset(&exp->mask.src.u3, 0xFF, len);
237                 if (sizeof(exp->mask.src.u3) > len)
238                         memset((void *)&exp->mask.src.u3 + len, 0x00,
239                                sizeof(exp->mask.src.u3) - len);
240         } else {
241                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
242                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
243         }
244
245         if (daddr) {
246                 memcpy(&exp->tuple.dst.u3, daddr, len);
247                 if (sizeof(exp->tuple.dst.u3) > len)
248                         /* address needs to be cleared for nf_ct_tuple_equal */
249                         memset((void *)&exp->tuple.dst.u3 + len, 0x00,
250                                sizeof(exp->tuple.dst.u3) - len);
251                 memset(&exp->mask.dst.u3, 0xFF, len);
252                 if (sizeof(exp->mask.dst.u3) > len)
253                         memset((void *)&exp->mask.dst.u3 + len, 0x00,
254                                sizeof(exp->mask.dst.u3) - len);
255         } else {
256                 memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
257                 memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
258         }
259
260         if (src) {
261                 exp->tuple.src.u.all = (__force u16)*src;
262                 exp->mask.src.u.all = 0xFFFF;
263         } else {
264                 exp->tuple.src.u.all = 0;
265                 exp->mask.src.u.all = 0;
266         }
267
268         if (dst) {
269                 exp->tuple.dst.u.all = (__force u16)*dst;
270                 exp->mask.dst.u.all = 0xFFFF;
271         } else {
272                 exp->tuple.dst.u.all = 0;
273                 exp->mask.dst.u.all = 0;
274         }
275 }
276 EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
277
278 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
279 {
280         if (atomic_dec_and_test(&exp->use))
281                 kmem_cache_free(nf_conntrack_expect_cachep, exp);
282 }
283 EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
284
285 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
286 {
287         struct nf_conn_help *master_help = nfct_help(exp->master);
288
289         atomic_inc(&exp->use);
290         master_help->expecting++;
291         list_add(&exp->list, &nf_conntrack_expect_list);
292
293         init_timer(&exp->timeout);
294         exp->timeout.data = (unsigned long)exp;
295         exp->timeout.function = expectation_timed_out;
296         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
297         add_timer(&exp->timeout);
298
299         exp->id = ++nf_conntrack_expect_next_id;
300         atomic_inc(&exp->use);
301         NF_CT_STAT_INC(expect_create);
302 }
303
304 /* Race with expectations being used means we could have none to find; OK. */
305 static void evict_oldest_expect(struct nf_conn *master)
306 {
307         struct nf_conntrack_expect *i;
308
309         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
310                 if (i->master == master) {
311                         if (del_timer(&i->timeout)) {
312                                 nf_ct_unlink_expect(i);
313                                 nf_conntrack_expect_put(i);
314                         }
315                         break;
316                 }
317         }
318 }
319
320 static inline int refresh_timer(struct nf_conntrack_expect *i)
321 {
322         struct nf_conn_help *master_help = nfct_help(i->master);
323
324         if (!del_timer(&i->timeout))
325                 return 0;
326
327         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
328         add_timer(&i->timeout);
329         return 1;
330 }
331
332 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
333 {
334         struct nf_conntrack_expect *i;
335         struct nf_conn *master = expect->master;
336         struct nf_conn_help *master_help = nfct_help(master);
337         int ret;
338
339         NF_CT_ASSERT(master_help);
340
341         write_lock_bh(&nf_conntrack_lock);
342         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
343                 if (expect_matches(i, expect)) {
344                         /* Refresh timer: if it's dying, ignore.. */
345                         if (refresh_timer(i)) {
346                                 ret = 0;
347                                 goto out;
348                         }
349                 } else if (expect_clash(i, expect)) {
350                         ret = -EBUSY;
351                         goto out;
352                 }
353         }
354         /* Will be over limit? */
355         if (master_help->helper->max_expected &&
356             master_help->expecting >= master_help->helper->max_expected)
357                 evict_oldest_expect(master);
358
359         nf_conntrack_expect_insert(expect);
360         nf_conntrack_expect_event(IPEXP_NEW, expect);
361         ret = 0;
362 out:
363         write_unlock_bh(&nf_conntrack_lock);
364         return ret;
365 }
366 EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
367
368 #ifdef CONFIG_PROC_FS
369 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
370 {
371         struct list_head *e = &nf_conntrack_expect_list;
372         loff_t i;
373
374         /* strange seq_file api calls stop even if we fail,
375          * thus we need to grab lock since stop unlocks */
376         read_lock_bh(&nf_conntrack_lock);
377
378         if (list_empty(e))
379                 return NULL;
380
381         for (i = 0; i <= *pos; i++) {
382                 e = e->next;
383                 if (e == &nf_conntrack_expect_list)
384                         return NULL;
385         }
386         return e;
387 }
388
389 static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
390 {
391         struct list_head *e = v;
392
393         ++*pos;
394         e = e->next;
395
396         if (e == &nf_conntrack_expect_list)
397                 return NULL;
398
399         return e;
400 }
401
402 static void exp_seq_stop(struct seq_file *s, void *v)
403 {
404         read_unlock_bh(&nf_conntrack_lock);
405 }
406
407 static int exp_seq_show(struct seq_file *s, void *v)
408 {
409         struct nf_conntrack_expect *expect = v;
410
411         if (expect->timeout.function)
412                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
413                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
414         else
415                 seq_printf(s, "- ");
416         seq_printf(s, "l3proto = %u proto=%u ",
417                    expect->tuple.src.l3num,
418                    expect->tuple.dst.protonum);
419         print_tuple(s, &expect->tuple,
420                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
421                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
422                                        expect->tuple.dst.protonum));
423         return seq_putc(s, '\n');
424 }
425
426 static struct seq_operations exp_seq_ops = {
427         .start = exp_seq_start,
428         .next = exp_seq_next,
429         .stop = exp_seq_stop,
430         .show = exp_seq_show
431 };
432
433 static int exp_open(struct inode *inode, struct file *file)
434 {
435         return seq_open(file, &exp_seq_ops);
436 }
437
438 struct file_operations exp_file_ops = {
439         .owner   = THIS_MODULE,
440         .open    = exp_open,
441         .read    = seq_read,
442         .llseek  = seq_lseek,
443         .release = seq_release
444 };
445 #endif /* CONFIG_PROC_FS */