Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / net / netfilter / nf_queue.c
1 #include <linux/kernel.h>
2 #include <linux/slab.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/proc_fs.h>
6 #include <linux/skbuff.h>
7 #include <linux/netfilter.h>
8 #include <linux/seq_file.h>
9 #include <linux/rcupdate.h>
10 #include <net/protocol.h>
11 #include <net/netfilter/nf_queue.h>
12 #include <net/dst.h>
13
14 #include "nf_internals.h"
15
16 /*
17  * A queue handler may be registered for each protocol.  Each is protected by
18  * long term mutex.  The handler must provide an an outfn() to accept packets
19  * for queueing and must reinject all packets it receives, no matter what.
20  */
21 static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
22
23 static DEFINE_MUTEX(queue_handler_mutex);
24
25 /* return EBUSY when somebody else is registered, return EEXIST if the
26  * same handler is registered, return 0 in case of success. */
27 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
28 {
29         int ret;
30         const struct nf_queue_handler *old;
31
32         if (pf >= ARRAY_SIZE(queue_handler))
33                 return -EINVAL;
34
35         mutex_lock(&queue_handler_mutex);
36         old = rcu_dereference_protected(queue_handler[pf],
37                                         lockdep_is_held(&queue_handler_mutex));
38         if (old == qh)
39                 ret = -EEXIST;
40         else if (old)
41                 ret = -EBUSY;
42         else {
43                 rcu_assign_pointer(queue_handler[pf], qh);
44                 ret = 0;
45         }
46         mutex_unlock(&queue_handler_mutex);
47
48         return ret;
49 }
50 EXPORT_SYMBOL(nf_register_queue_handler);
51
52 /* The caller must flush their queue before this */
53 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
54 {
55         const struct nf_queue_handler *old;
56
57         if (pf >= ARRAY_SIZE(queue_handler))
58                 return -EINVAL;
59
60         mutex_lock(&queue_handler_mutex);
61         old = rcu_dereference_protected(queue_handler[pf],
62                                         lockdep_is_held(&queue_handler_mutex));
63         if (old && old != qh) {
64                 mutex_unlock(&queue_handler_mutex);
65                 return -EINVAL;
66         }
67
68         RCU_INIT_POINTER(queue_handler[pf], NULL);
69         mutex_unlock(&queue_handler_mutex);
70
71         synchronize_rcu();
72
73         return 0;
74 }
75 EXPORT_SYMBOL(nf_unregister_queue_handler);
76
77 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
78 {
79         u_int8_t pf;
80
81         mutex_lock(&queue_handler_mutex);
82         for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
83                 if (rcu_dereference_protected(
84                                 queue_handler[pf],
85                                 lockdep_is_held(&queue_handler_mutex)
86                                 ) == qh)
87                         RCU_INIT_POINTER(queue_handler[pf], NULL);
88         }
89         mutex_unlock(&queue_handler_mutex);
90
91         synchronize_rcu();
92 }
93 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
94
95 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
96 {
97         /* Release those devices we held, or Alexey will kill me. */
98         if (entry->indev)
99                 dev_put(entry->indev);
100         if (entry->outdev)
101                 dev_put(entry->outdev);
102 #ifdef CONFIG_BRIDGE_NETFILTER
103         if (entry->skb->nf_bridge) {
104                 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
105
106                 if (nf_bridge->physindev)
107                         dev_put(nf_bridge->physindev);
108                 if (nf_bridge->physoutdev)
109                         dev_put(nf_bridge->physoutdev);
110         }
111 #endif
112         /* Drop reference to owner of hook which queued us. */
113         module_put(entry->elem->owner);
114 }
115
116 /*
117  * Any packet that leaves via this function must come back
118  * through nf_reinject().
119  */
120 static int __nf_queue(struct sk_buff *skb,
121                       struct list_head *elem,
122                       u_int8_t pf, unsigned int hook,
123                       struct net_device *indev,
124                       struct net_device *outdev,
125                       int (*okfn)(struct sk_buff *),
126                       unsigned int queuenum)
127 {
128         int status = -ENOENT;
129         struct nf_queue_entry *entry = NULL;
130 #ifdef CONFIG_BRIDGE_NETFILTER
131         struct net_device *physindev;
132         struct net_device *physoutdev;
133 #endif
134         const struct nf_afinfo *afinfo;
135         const struct nf_queue_handler *qh;
136
137         /* QUEUE == DROP if no one is waiting, to be safe. */
138         rcu_read_lock();
139
140         qh = rcu_dereference(queue_handler[pf]);
141         if (!qh) {
142                 status = -ESRCH;
143                 goto err_unlock;
144         }
145
146         afinfo = nf_get_afinfo(pf);
147         if (!afinfo)
148                 goto err_unlock;
149
150         entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
151         if (!entry) {
152                 status = -ENOMEM;
153                 goto err_unlock;
154         }
155
156         *entry = (struct nf_queue_entry) {
157                 .skb    = skb,
158                 .elem   = list_entry(elem, struct nf_hook_ops, list),
159                 .pf     = pf,
160                 .hook   = hook,
161                 .indev  = indev,
162                 .outdev = outdev,
163                 .okfn   = okfn,
164         };
165
166         /* If it's going away, ignore hook. */
167         if (!try_module_get(entry->elem->owner)) {
168                 status = -ECANCELED;
169                 goto err_unlock;
170         }
171         /* Bump dev refs so they don't vanish while packet is out */
172         if (indev)
173                 dev_hold(indev);
174         if (outdev)
175                 dev_hold(outdev);
176 #ifdef CONFIG_BRIDGE_NETFILTER
177         if (skb->nf_bridge) {
178                 physindev = skb->nf_bridge->physindev;
179                 if (physindev)
180                         dev_hold(physindev);
181                 physoutdev = skb->nf_bridge->physoutdev;
182                 if (physoutdev)
183                         dev_hold(physoutdev);
184         }
185 #endif
186         skb_dst_force(skb);
187         afinfo->saveroute(skb, entry);
188         status = qh->outfn(entry, queuenum);
189
190         rcu_read_unlock();
191
192         if (status < 0) {
193                 nf_queue_entry_release_refs(entry);
194                 goto err;
195         }
196
197         return 0;
198
199 err_unlock:
200         rcu_read_unlock();
201 err:
202         kfree(entry);
203         return status;
204 }
205
206 #ifdef CONFIG_BRIDGE_NETFILTER
207 /* When called from bridge netfilter, skb->data must point to MAC header
208  * before calling skb_gso_segment(). Else, original MAC header is lost
209  * and segmented skbs will be sent to wrong destination.
210  */
211 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
212 {
213         if (skb->nf_bridge)
214                 __skb_push(skb, skb->network_header - skb->mac_header);
215 }
216
217 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
218 {
219         if (skb->nf_bridge)
220                 __skb_pull(skb, skb->network_header - skb->mac_header);
221 }
222 #else
223 #define nf_bridge_adjust_skb_data(s) do {} while (0)
224 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
225 #endif
226
227 int nf_queue(struct sk_buff *skb,
228              struct list_head *elem,
229              u_int8_t pf, unsigned int hook,
230              struct net_device *indev,
231              struct net_device *outdev,
232              int (*okfn)(struct sk_buff *),
233              unsigned int queuenum)
234 {
235         struct sk_buff *segs;
236         int err = -EINVAL;
237         unsigned int queued;
238
239         if (!skb_is_gso(skb))
240                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
241                                   queuenum);
242
243         switch (pf) {
244         case NFPROTO_IPV4:
245                 skb->protocol = htons(ETH_P_IP);
246                 break;
247         case NFPROTO_IPV6:
248                 skb->protocol = htons(ETH_P_IPV6);
249                 break;
250         }
251
252         nf_bridge_adjust_skb_data(skb);
253         segs = skb_gso_segment(skb, 0);
254         /* Does not use PTR_ERR to limit the number of error codes that can be
255          * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
256          * 'ignore this hook'.
257          */
258         if (IS_ERR(segs))
259                 goto out_err;
260         queued = 0;
261         err = 0;
262         do {
263                 struct sk_buff *nskb = segs->next;
264
265                 segs->next = NULL;
266                 if (err == 0) {
267                         nf_bridge_adjust_segmented_data(segs);
268                         err = __nf_queue(segs, elem, pf, hook, indev,
269                                            outdev, okfn, queuenum);
270                 }
271                 if (err == 0)
272                         queued++;
273                 else
274                         kfree_skb(segs);
275                 segs = nskb;
276         } while (segs);
277
278         if (queued) {
279                 kfree_skb(skb);
280                 return 0;
281         }
282   out_err:
283         nf_bridge_adjust_segmented_data(skb);
284         return err;
285 }
286
287 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
288 {
289         struct sk_buff *skb = entry->skb;
290         struct list_head *elem = &entry->elem->list;
291         const struct nf_afinfo *afinfo;
292         int err;
293
294         rcu_read_lock();
295
296         nf_queue_entry_release_refs(entry);
297
298         /* Continue traversal iff userspace said ok... */
299         if (verdict == NF_REPEAT) {
300                 elem = elem->prev;
301                 verdict = NF_ACCEPT;
302         }
303
304         if (verdict == NF_ACCEPT) {
305                 afinfo = nf_get_afinfo(entry->pf);
306                 if (!afinfo || afinfo->reroute(skb, entry) < 0)
307                         verdict = NF_DROP;
308         }
309
310         if (verdict == NF_ACCEPT) {
311         next_hook:
312                 verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
313                                      skb, entry->hook,
314                                      entry->indev, entry->outdev, &elem,
315                                      entry->okfn, INT_MIN);
316         }
317
318         switch (verdict & NF_VERDICT_MASK) {
319         case NF_ACCEPT:
320         case NF_STOP:
321                 local_bh_disable();
322                 entry->okfn(skb);
323                 local_bh_enable();
324                 break;
325         case NF_QUEUE:
326                 err = __nf_queue(skb, elem, entry->pf, entry->hook,
327                                  entry->indev, entry->outdev, entry->okfn,
328                                  verdict >> NF_VERDICT_QBITS);
329                 if (err < 0) {
330                         if (err == -ECANCELED)
331                                 goto next_hook;
332                         if (err == -ESRCH &&
333                            (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
334                                 goto next_hook;
335                         kfree_skb(skb);
336                 }
337                 break;
338         case NF_STOLEN:
339                 break;
340         default:
341                 kfree_skb(skb);
342         }
343         rcu_read_unlock();
344         kfree(entry);
345 }
346 EXPORT_SYMBOL(nf_reinject);
347
348 #ifdef CONFIG_PROC_FS
349 static void *seq_start(struct seq_file *seq, loff_t *pos)
350 {
351         if (*pos >= ARRAY_SIZE(queue_handler))
352                 return NULL;
353
354         return pos;
355 }
356
357 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
358 {
359         (*pos)++;
360
361         if (*pos >= ARRAY_SIZE(queue_handler))
362                 return NULL;
363
364         return pos;
365 }
366
367 static void seq_stop(struct seq_file *s, void *v)
368 {
369
370 }
371
372 static int seq_show(struct seq_file *s, void *v)
373 {
374         int ret;
375         loff_t *pos = v;
376         const struct nf_queue_handler *qh;
377
378         rcu_read_lock();
379         qh = rcu_dereference(queue_handler[*pos]);
380         if (!qh)
381                 ret = seq_printf(s, "%2lld NONE\n", *pos);
382         else
383                 ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
384         rcu_read_unlock();
385
386         return ret;
387 }
388
389 static const struct seq_operations nfqueue_seq_ops = {
390         .start  = seq_start,
391         .next   = seq_next,
392         .stop   = seq_stop,
393         .show   = seq_show,
394 };
395
396 static int nfqueue_open(struct inode *inode, struct file *file)
397 {
398         return seq_open(file, &nfqueue_seq_ops);
399 }
400
401 static const struct file_operations nfqueue_file_ops = {
402         .owner   = THIS_MODULE,
403         .open    = nfqueue_open,
404         .read    = seq_read,
405         .llseek  = seq_lseek,
406         .release = seq_release,
407 };
408 #endif /* PROC_FS */
409
410
411 int __init netfilter_queue_init(void)
412 {
413 #ifdef CONFIG_PROC_FS
414         if (!proc_create("nf_queue", S_IRUGO,
415                          proc_net_netfilter, &nfqueue_file_ops))
416                 return -1;
417 #endif
418         return 0;
419 }
420