2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
47 #define dprintf(format, args...)
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
53 #define duprintf(format, args...)
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
64 #define IP_NF_ASSERT(x)
68 /* All the better to debug you with... */
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
80 Hence the start of any table is given by get_table() below. */
82 /* Returns whether matches rule or not. */
84 ip_packet_match(const struct iphdr *ip,
87 const struct ipt_ip *ipinfo,
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
99 dprintf("Source or dest mismatch.\n");
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
141 /* Check specific protocol */
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
162 ip_checkentry(const struct ipt_ip *ip)
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
210 return (struct ipt_entry *)(base + offset);
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
215 ipt_do_table(struct sk_buff **pskb,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 table_base = (void *)private->entries[smp_processor_id()];
250 e = get_entry(table_base, private->hook_entry[hook]);
252 /* For return from builtin chain */
253 back = get_entry(table_base, private->underflow[hook]);
258 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259 struct ipt_entry_target *t;
261 if (IPT_MATCH_ITERATE(e, do_match,
263 offset, &hotdrop) != 0)
266 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
268 t = ipt_get_target(e);
269 IP_NF_ASSERT(t->u.kernel.target);
270 /* Standard target? */
271 if (!t->u.kernel.target->target) {
274 v = ((struct ipt_standard_target *)t)->verdict;
276 /* Pop from stack? */
277 if (v != IPT_RETURN) {
278 verdict = (unsigned)(-v) - 1;
282 back = get_entry(table_base,
286 if (table_base + v != (void *)e + e->next_offset
287 && !(e->ip.flags & IPT_F_GOTO)) {
288 /* Save old back ptr in next entry */
289 struct ipt_entry *next
290 = (void *)e + e->next_offset;
292 = (void *)back - table_base;
293 /* set back pointer to next entry */
297 e = get_entry(table_base, v);
299 /* Targets which reenter must return
301 #ifdef CONFIG_NETFILTER_DEBUG
302 ((struct ipt_entry *)table_base)->comefrom
305 verdict = t->u.kernel.target->target(pskb,
311 #ifdef CONFIG_NETFILTER_DEBUG
312 if (((struct ipt_entry *)table_base)->comefrom
314 && verdict == IPT_CONTINUE) {
315 printk("Target %s reentered!\n",
316 t->u.kernel.target->name);
319 ((struct ipt_entry *)table_base)->comefrom
322 /* Target might have changed stuff. */
323 ip = (*pskb)->nh.iph;
324 datalen = (*pskb)->len - ip->ihl * 4;
326 if (verdict == IPT_CONTINUE)
327 e = (void *)e + e->next_offset;
335 e = (void *)e + e->next_offset;
339 read_unlock_bh(&table->lock);
341 #ifdef DEBUG_ALLOW_ALL
350 /* All zeroes == unconditional rule. */
352 unconditional(const struct ipt_ip *ip)
356 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357 if (((__u32 *)ip)[i])
363 /* Figures out from what hook each rule can be called: returns 0 if
364 there are loops. Puts hook bitmask in comefrom. */
366 mark_source_chains(struct xt_table_info *newinfo,
367 unsigned int valid_hooks, void *entry0)
371 /* No recursion; use packet counter to save back ptrs (reset
372 to 0 as we leave), and comefrom to save source hook bitmask */
373 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374 unsigned int pos = newinfo->hook_entry[hook];
376 = (struct ipt_entry *)(entry0 + pos);
378 if (!(valid_hooks & (1 << hook)))
381 /* Set initial back pointer. */
382 e->counters.pcnt = pos;
385 struct ipt_standard_target *t
386 = (void *)ipt_get_target(e);
388 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389 printk("iptables: loop hook %u pos %u %08X.\n",
390 hook, pos, e->comefrom);
394 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
396 /* Unconditional return/END. */
397 if (e->target_offset == sizeof(struct ipt_entry)
398 && (strcmp(t->target.u.user.name,
399 IPT_STANDARD_TARGET) == 0)
401 && unconditional(&e->ip)) {
402 unsigned int oldpos, size;
404 /* Return: backtrack through the last
407 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
410 & (1 << NF_IP_NUMHOOKS)) {
411 duprintf("Back unset "
418 pos = e->counters.pcnt;
419 e->counters.pcnt = 0;
421 /* We're at the start. */
425 e = (struct ipt_entry *)
427 } while (oldpos == pos + e->next_offset);
430 size = e->next_offset;
431 e = (struct ipt_entry *)
432 (entry0 + pos + size);
433 e->counters.pcnt = pos;
436 int newpos = t->verdict;
438 if (strcmp(t->target.u.user.name,
439 IPT_STANDARD_TARGET) == 0
441 /* This a jump; chase it. */
442 duprintf("Jump rule %u -> %u\n",
445 /* ... this is a fallthru */
446 newpos = pos + e->next_offset;
448 e = (struct ipt_entry *)
450 e->counters.pcnt = pos;
455 duprintf("Finished chain %u\n", hook);
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
463 if (i && (*i)-- == 0)
466 if (m->u.kernel.match->destroy)
467 m->u.kernel.match->destroy(m->data,
468 m->u.match_size - sizeof(*m));
469 module_put(m->u.kernel.match->me);
474 standard_check(const struct ipt_entry_target *t,
475 unsigned int max_offset)
477 struct ipt_standard_target *targ = (void *)t;
479 /* Check standard info. */
480 if (targ->verdict >= 0
481 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
482 duprintf("ipt_standard_check: bad verdict (%i)\n",
486 if (targ->verdict < -NF_MAX_VERDICT - 1) {
487 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
495 check_match(struct ipt_entry_match *m,
497 const struct ipt_ip *ip,
498 unsigned int hookmask,
501 struct ipt_match *match;
504 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
506 "ipt_%s", m->u.user.name);
507 if (IS_ERR(match) || !match) {
508 duprintf("check_match: `%s' not found\n", m->u.user.name);
509 return match ? PTR_ERR(match) : -ENOENT;
511 m->u.kernel.match = match;
513 ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
514 name, hookmask, ip->proto,
515 ip->invflags & IPT_INV_PROTO);
519 if (m->u.kernel.match->checkentry
520 && !m->u.kernel.match->checkentry(name, ip, m->data,
521 m->u.match_size - sizeof(*m),
523 duprintf("ip_tables: check failed for `%s'.\n",
524 m->u.kernel.match->name);
532 module_put(m->u.kernel.match->me);
536 static struct ipt_target ipt_standard_target;
539 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
542 struct ipt_entry_target *t;
543 struct ipt_target *target;
547 if (!ip_checkentry(&e->ip)) {
548 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
553 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
555 goto cleanup_matches;
557 t = ipt_get_target(e);
558 target = try_then_request_module(xt_find_target(AF_INET,
561 "ipt_%s", t->u.user.name);
562 if (IS_ERR(target) || !target) {
563 duprintf("check_entry: `%s' not found\n", t->u.user.name);
564 ret = target ? PTR_ERR(target) : -ENOENT;
565 goto cleanup_matches;
567 t->u.kernel.target = target;
569 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
570 name, e->comefrom, e->ip.proto,
571 e->ip.invflags & IPT_INV_PROTO);
575 if (t->u.kernel.target == &ipt_standard_target) {
576 if (!standard_check(t, size)) {
578 goto cleanup_matches;
580 } else if (t->u.kernel.target->checkentry
581 && !t->u.kernel.target->checkentry(name, e, t->data,
585 duprintf("ip_tables: check failed for `%s'.\n",
586 t->u.kernel.target->name);
594 module_put(t->u.kernel.target->me);
596 IPT_MATCH_ITERATE(e, cleanup_match, &j);
601 check_entry_size_and_hooks(struct ipt_entry *e,
602 struct xt_table_info *newinfo,
604 unsigned char *limit,
605 const unsigned int *hook_entries,
606 const unsigned int *underflows,
611 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
612 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
613 duprintf("Bad offset %p\n", e);
618 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
619 duprintf("checking: element %p size %u\n",
624 /* Check hooks & underflows */
625 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
626 if ((unsigned char *)e - base == hook_entries[h])
627 newinfo->hook_entry[h] = hook_entries[h];
628 if ((unsigned char *)e - base == underflows[h])
629 newinfo->underflow[h] = underflows[h];
632 /* FIXME: underflows must be unconditional, standard verdicts
633 < 0 (not IPT_RETURN). --RR */
635 /* Clear counters and comefrom */
636 e->counters = ((struct xt_counters) { 0, 0 });
644 cleanup_entry(struct ipt_entry *e, unsigned int *i)
646 struct ipt_entry_target *t;
648 if (i && (*i)-- == 0)
651 /* Cleanup all matches */
652 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
653 t = ipt_get_target(e);
654 if (t->u.kernel.target->destroy)
655 t->u.kernel.target->destroy(t->data,
656 t->u.target_size - sizeof(*t));
657 module_put(t->u.kernel.target->me);
661 /* Checks and translates the user-supplied table segment (held in
664 translate_table(const char *name,
665 unsigned int valid_hooks,
666 struct xt_table_info *newinfo,
670 const unsigned int *hook_entries,
671 const unsigned int *underflows)
676 newinfo->size = size;
677 newinfo->number = number;
679 /* Init all hooks to impossible value. */
680 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
681 newinfo->hook_entry[i] = 0xFFFFFFFF;
682 newinfo->underflow[i] = 0xFFFFFFFF;
685 duprintf("translate_table: size %u\n", newinfo->size);
687 /* Walk through entries, checking offsets. */
688 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
689 check_entry_size_and_hooks,
693 hook_entries, underflows, &i);
698 duprintf("translate_table: %u not %u entries\n",
703 /* Check hooks all assigned */
704 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
705 /* Only hooks which are valid */
706 if (!(valid_hooks & (1 << i)))
708 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
709 duprintf("Invalid hook entry %u %u\n",
713 if (newinfo->underflow[i] == 0xFFFFFFFF) {
714 duprintf("Invalid underflow %u %u\n",
720 if (!mark_source_chains(newinfo, valid_hooks, entry0))
723 /* Finally, each sanity check must pass */
725 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
726 check_entry, name, size, &i);
729 IPT_ENTRY_ITERATE(entry0, newinfo->size,
734 /* And one copy for every other CPU */
736 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
737 memcpy(newinfo->entries[i], entry0, newinfo->size);
745 add_entry_to_counter(const struct ipt_entry *e,
746 struct xt_counters total[],
749 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
756 set_entry_to_counter(const struct ipt_entry *e,
757 struct ipt_counters total[],
760 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
767 get_counters(const struct xt_table_info *t,
768 struct xt_counters counters[])
774 /* Instead of clearing (by a previous call to memset())
775 * the counters and using adds, we set the counters
776 * with data used by 'current' CPU
777 * We dont care about preemption here.
779 curcpu = raw_smp_processor_id();
782 IPT_ENTRY_ITERATE(t->entries[curcpu],
784 set_entry_to_counter,
792 IPT_ENTRY_ITERATE(t->entries[cpu],
794 add_entry_to_counter,
801 copy_entries_to_user(unsigned int total_size,
802 struct ipt_table *table,
803 void __user *userptr)
805 unsigned int off, num, countersize;
807 struct xt_counters *counters;
808 struct xt_table_info *private = table->private;
812 /* We need atomic snapshot of counters: rest doesn't change
813 (other than comefrom, which userspace doesn't care
815 countersize = sizeof(struct xt_counters) * private->number;
816 counters = vmalloc_node(countersize, numa_node_id());
818 if (counters == NULL)
821 /* First, sum counters... */
822 write_lock_bh(&table->lock);
823 get_counters(private, counters);
824 write_unlock_bh(&table->lock);
826 /* choose the copy that is on our node/cpu, ...
827 * This choice is lazy (because current thread is
828 * allowed to migrate to another cpu)
830 loc_cpu_entry = private->entries[raw_smp_processor_id()];
831 /* ... then copy entire thing ... */
832 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
837 /* FIXME: use iterator macros --RR */
838 /* ... then go back and fix counters and names */
839 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
841 struct ipt_entry_match *m;
842 struct ipt_entry_target *t;
844 e = (struct ipt_entry *)(loc_cpu_entry + off);
845 if (copy_to_user(userptr + off
846 + offsetof(struct ipt_entry, counters),
848 sizeof(counters[num])) != 0) {
853 for (i = sizeof(struct ipt_entry);
854 i < e->target_offset;
855 i += m->u.match_size) {
858 if (copy_to_user(userptr + off + i
859 + offsetof(struct ipt_entry_match,
861 m->u.kernel.match->name,
862 strlen(m->u.kernel.match->name)+1)
869 t = ipt_get_target(e);
870 if (copy_to_user(userptr + off + e->target_offset
871 + offsetof(struct ipt_entry_target,
873 t->u.kernel.target->name,
874 strlen(t->u.kernel.target->name)+1) != 0) {
886 get_entries(const struct ipt_get_entries *entries,
887 struct ipt_get_entries __user *uptr)
892 t = xt_find_table_lock(AF_INET, entries->name);
893 if (t && !IS_ERR(t)) {
894 struct xt_table_info *private = t->private;
895 duprintf("t->private->number = %u\n",
897 if (entries->size == private->size)
898 ret = copy_entries_to_user(private->size,
899 t, uptr->entrytable);
901 duprintf("get_entries: I've got %u not %u!\n",
909 ret = t ? PTR_ERR(t) : -ENOENT;
915 do_replace(void __user *user, unsigned int len)
918 struct ipt_replace tmp;
920 struct xt_table_info *newinfo, *oldinfo;
921 struct xt_counters *counters;
922 void *loc_cpu_entry, *loc_cpu_old_entry;
924 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
927 /* Hack: Causes ipchains to give correct error msg --RR */
928 if (len != sizeof(tmp) + tmp.size)
932 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
935 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
938 newinfo = xt_alloc_table_info(tmp.size);
942 /* choose the copy that is our node/cpu */
943 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
944 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
950 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
956 ret = translate_table(tmp.name, tmp.valid_hooks,
957 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
958 tmp.hook_entry, tmp.underflow);
960 goto free_newinfo_counters;
962 duprintf("ip_tables: Translated table\n");
964 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
965 "iptable_%s", tmp.name);
966 if (!t || IS_ERR(t)) {
967 ret = t ? PTR_ERR(t) : -ENOENT;
968 goto free_newinfo_counters_untrans;
972 if (tmp.valid_hooks != t->valid_hooks) {
973 duprintf("Valid hook crap: %08X vs %08X\n",
974 tmp.valid_hooks, t->valid_hooks);
979 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
983 /* Update module usage count based on number of rules */
984 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
985 oldinfo->number, oldinfo->initial_entries, newinfo->number);
986 if ((oldinfo->number > oldinfo->initial_entries) ||
987 (newinfo->number <= oldinfo->initial_entries))
989 if ((oldinfo->number > oldinfo->initial_entries) &&
990 (newinfo->number <= oldinfo->initial_entries))
993 /* Get the old counters. */
994 get_counters(oldinfo, counters);
995 /* Decrease module usage counts and free resource */
996 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
997 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
998 xt_free_table_info(oldinfo);
999 if (copy_to_user(tmp.counters, counters,
1000 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1009 free_newinfo_counters_untrans:
1010 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1011 free_newinfo_counters:
1014 xt_free_table_info(newinfo);
1018 /* We're lazy, and add to the first CPU; overflow works its fey magic
1019 * and everything is OK. */
1021 add_counter_to_entry(struct ipt_entry *e,
1022 const struct xt_counters addme[],
1026 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1028 (long unsigned int)e->counters.pcnt,
1029 (long unsigned int)e->counters.bcnt,
1030 (long unsigned int)addme[*i].pcnt,
1031 (long unsigned int)addme[*i].bcnt);
1034 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1041 do_add_counters(void __user *user, unsigned int len)
1044 struct xt_counters_info tmp, *paddc;
1045 struct ipt_table *t;
1046 struct xt_table_info *private;
1048 void *loc_cpu_entry;
1050 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1053 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1056 paddc = vmalloc_node(len, numa_node_id());
1060 if (copy_from_user(paddc, user, len) != 0) {
1065 t = xt_find_table_lock(AF_INET, tmp.name);
1066 if (!t || IS_ERR(t)) {
1067 ret = t ? PTR_ERR(t) : -ENOENT;
1071 write_lock_bh(&t->lock);
1072 private = t->private;
1073 if (private->number != paddc->num_counters) {
1075 goto unlock_up_free;
1079 /* Choose the copy that is on our node */
1080 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1081 IPT_ENTRY_ITERATE(loc_cpu_entry,
1083 add_counter_to_entry,
1087 write_unlock_bh(&t->lock);
1097 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1101 if (!capable(CAP_NET_ADMIN))
1105 case IPT_SO_SET_REPLACE:
1106 ret = do_replace(user, len);
1109 case IPT_SO_SET_ADD_COUNTERS:
1110 ret = do_add_counters(user, len);
1114 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1122 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1126 if (!capable(CAP_NET_ADMIN))
1130 case IPT_SO_GET_INFO: {
1131 char name[IPT_TABLE_MAXNAMELEN];
1132 struct ipt_table *t;
1134 if (*len != sizeof(struct ipt_getinfo)) {
1135 duprintf("length %u != %u\n", *len,
1136 sizeof(struct ipt_getinfo));
1141 if (copy_from_user(name, user, sizeof(name)) != 0) {
1145 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1147 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1148 "iptable_%s", name);
1149 if (t && !IS_ERR(t)) {
1150 struct ipt_getinfo info;
1151 struct xt_table_info *private = t->private;
1153 info.valid_hooks = t->valid_hooks;
1154 memcpy(info.hook_entry, private->hook_entry,
1155 sizeof(info.hook_entry));
1156 memcpy(info.underflow, private->underflow,
1157 sizeof(info.underflow));
1158 info.num_entries = private->number;
1159 info.size = private->size;
1160 memcpy(info.name, name, sizeof(info.name));
1162 if (copy_to_user(user, &info, *len) != 0)
1169 ret = t ? PTR_ERR(t) : -ENOENT;
1173 case IPT_SO_GET_ENTRIES: {
1174 struct ipt_get_entries get;
1176 if (*len < sizeof(get)) {
1177 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1179 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1181 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1182 duprintf("get_entries: %u != %u\n", *len,
1183 sizeof(struct ipt_get_entries) + get.size);
1186 ret = get_entries(&get, user);
1190 case IPT_SO_GET_REVISION_MATCH:
1191 case IPT_SO_GET_REVISION_TARGET: {
1192 struct ipt_get_revision rev;
1195 if (*len != sizeof(rev)) {
1199 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1204 if (cmd == IPT_SO_GET_REVISION_TARGET)
1209 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1212 "ipt_%s", rev.name);
1217 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1224 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1227 struct xt_table_info *newinfo;
1228 static struct xt_table_info bootstrap
1229 = { 0, 0, 0, { 0 }, { 0 }, { } };
1230 void *loc_cpu_entry;
1232 newinfo = xt_alloc_table_info(repl->size);
1236 /* choose the copy on our node/cpu
1237 * but dont care of preemption
1239 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1240 memcpy(loc_cpu_entry, repl->entries, repl->size);
1242 ret = translate_table(table->name, table->valid_hooks,
1243 newinfo, loc_cpu_entry, repl->size,
1248 xt_free_table_info(newinfo);
1252 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1253 xt_free_table_info(newinfo);
1260 void ipt_unregister_table(struct ipt_table *table)
1262 struct xt_table_info *private;
1263 void *loc_cpu_entry;
1265 private = xt_unregister_table(table);
1267 /* Decrease module usage counts and free resources */
1268 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1269 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1270 xt_free_table_info(private);
1273 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1275 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1276 u_int8_t type, u_int8_t code,
1279 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1284 icmp_match(const struct sk_buff *skb,
1285 const struct net_device *in,
1286 const struct net_device *out,
1287 const void *matchinfo,
1289 unsigned int protoff,
1292 struct icmphdr _icmph, *ic;
1293 const struct ipt_icmp *icmpinfo = matchinfo;
1295 /* Must not be a fragment. */
1299 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1301 /* We've been asked to examine this packet, and we
1302 * can't. Hence, no choice but to drop.
1304 duprintf("Dropping evil ICMP tinygram.\n");
1309 return icmp_type_code_match(icmpinfo->type,
1313 !!(icmpinfo->invflags&IPT_ICMP_INV));
1316 /* Called when user tries to insert an entry of this type. */
1318 icmp_checkentry(const char *tablename,
1321 unsigned int matchsize,
1322 unsigned int hook_mask)
1324 const struct ipt_icmp *icmpinfo = matchinfo;
1326 /* Must specify no unknown invflags */
1327 return !(icmpinfo->invflags & ~IPT_ICMP_INV);
1330 /* The built-in targets: standard (NULL) and error. */
1331 static struct ipt_target ipt_standard_target = {
1332 .name = IPT_STANDARD_TARGET,
1333 .targetsize = sizeof(int),
1336 static struct ipt_target ipt_error_target = {
1337 .name = IPT_ERROR_TARGET,
1338 .target = ipt_error,
1339 .targetsize = IPT_FUNCTION_MAXNAMELEN,
1342 static struct nf_sockopt_ops ipt_sockopts = {
1344 .set_optmin = IPT_BASE_CTL,
1345 .set_optmax = IPT_SO_SET_MAX+1,
1346 .set = do_ipt_set_ctl,
1347 .get_optmin = IPT_BASE_CTL,
1348 .get_optmax = IPT_SO_GET_MAX+1,
1349 .get = do_ipt_get_ctl,
1352 static struct ipt_match icmp_matchstruct = {
1354 .match = icmp_match,
1355 .matchsize = sizeof(struct ipt_icmp),
1356 .proto = IPPROTO_ICMP,
1357 .checkentry = icmp_checkentry,
1360 static int __init init(void)
1364 xt_proto_init(AF_INET);
1366 /* Noone else will be downing sem now, so we won't sleep */
1367 xt_register_target(AF_INET, &ipt_standard_target);
1368 xt_register_target(AF_INET, &ipt_error_target);
1369 xt_register_match(AF_INET, &icmp_matchstruct);
1371 /* Register setsockopt */
1372 ret = nf_register_sockopt(&ipt_sockopts);
1374 duprintf("Unable to register sockopts.\n");
1378 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1382 static void __exit fini(void)
1384 nf_unregister_sockopt(&ipt_sockopts);
1386 xt_unregister_match(AF_INET, &icmp_matchstruct);
1387 xt_unregister_target(AF_INET, &ipt_error_target);
1388 xt_unregister_target(AF_INET, &ipt_standard_target);
1390 xt_proto_fini(AF_INET);
1393 EXPORT_SYMBOL(ipt_register_table);
1394 EXPORT_SYMBOL(ipt_unregister_table);
1395 EXPORT_SYMBOL(ipt_do_table);