netfilter: xt_TCPMSS: Fix missing fragmentation handling
[pandora-kernel.git] / net / netfilter / xt_TCPMSS.c
1 /*
2  * This is a module which is used for setting the MSS option in TCP packets.
3  *
4  * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/ip.h>
14 #include <linux/gfp.h>
15 #include <linux/ipv6.h>
16 #include <linux/tcp.h>
17 #include <net/dst.h>
18 #include <net/flow.h>
19 #include <net/ipv6.h>
20 #include <net/route.h>
21 #include <net/tcp.h>
22
23 #include <linux/netfilter_ipv4/ip_tables.h>
24 #include <linux/netfilter_ipv6/ip6_tables.h>
25 #include <linux/netfilter/x_tables.h>
26 #include <linux/netfilter/xt_tcpudp.h>
27 #include <linux/netfilter/xt_TCPMSS.h>
28
29 MODULE_LICENSE("GPL");
30 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
31 MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
32 MODULE_ALIAS("ipt_TCPMSS");
33 MODULE_ALIAS("ip6t_TCPMSS");
34
35 static inline unsigned int
36 optlen(const u_int8_t *opt, unsigned int offset)
37 {
38         /* Beware zero-length options: make finite progress */
39         if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
40                 return 1;
41         else
42                 return opt[offset+1];
43 }
44
45 static int
46 tcpmss_mangle_packet(struct sk_buff *skb,
47                      const struct xt_action_param *par,
48                      unsigned int in_mtu,
49                      unsigned int tcphoff,
50                      unsigned int minlen)
51 {
52         const struct xt_tcpmss_info *info = par->targinfo;
53         struct tcphdr *tcph;
54         unsigned int tcplen, i;
55         __be16 oldval;
56         u16 newmss;
57         u8 *opt;
58
59         /* This is a fragment, no TCP header is available */
60         if (par->fragoff != 0)
61                 return XT_CONTINUE;
62
63         if (!skb_make_writable(skb, skb->len))
64                 return -1;
65
66         tcplen = skb->len - tcphoff;
67         tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
68
69         /* Header cannot be larger than the packet */
70         if (tcplen < tcph->doff*4)
71                 return -1;
72
73         if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
74                 if (dst_mtu(skb_dst(skb)) <= minlen) {
75                         if (net_ratelimit())
76                                 pr_err("unknown or invalid path-MTU (%u)\n",
77                                        dst_mtu(skb_dst(skb)));
78                         return -1;
79                 }
80                 if (in_mtu <= minlen) {
81                         if (net_ratelimit())
82                                 pr_err("unknown or invalid path-MTU (%u)\n",
83                                        in_mtu);
84                         return -1;
85                 }
86                 newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
87         } else
88                 newmss = info->mss;
89
90         opt = (u_int8_t *)tcph;
91         for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
92                 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
93                     opt[i+1] == TCPOLEN_MSS) {
94                         u_int16_t oldmss;
95
96                         oldmss = (opt[i+2] << 8) | opt[i+3];
97
98                         /* Never increase MSS, even when setting it, as
99                          * doing so results in problems for hosts that rely
100                          * on MSS being set correctly.
101                          */
102                         if (oldmss <= newmss)
103                                 return 0;
104
105                         opt[i+2] = (newmss & 0xff00) >> 8;
106                         opt[i+3] = newmss & 0x00ff;
107
108                         inet_proto_csum_replace2(&tcph->check, skb,
109                                                  htons(oldmss), htons(newmss),
110                                                  0);
111                         return 0;
112                 }
113         }
114
115         /* There is data after the header so the option can't be added
116            without moving it, and doing so may make the SYN packet
117            itself too large. Accept the packet unmodified instead. */
118         if (tcplen > tcph->doff*4)
119                 return 0;
120
121         /*
122          * MSS Option not found ?! add it..
123          */
124         if (skb_tailroom(skb) < TCPOLEN_MSS) {
125                 if (pskb_expand_head(skb, 0,
126                                      TCPOLEN_MSS - skb_tailroom(skb),
127                                      GFP_ATOMIC))
128                         return -1;
129                 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
130         }
131
132         skb_put(skb, TCPOLEN_MSS);
133
134         opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
135         memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
136
137         inet_proto_csum_replace2(&tcph->check, skb,
138                                  htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
139         opt[0] = TCPOPT_MSS;
140         opt[1] = TCPOLEN_MSS;
141         opt[2] = (newmss & 0xff00) >> 8;
142         opt[3] = newmss & 0x00ff;
143
144         inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
145
146         oldval = ((__be16 *)tcph)[6];
147         tcph->doff += TCPOLEN_MSS/4;
148         inet_proto_csum_replace2(&tcph->check, skb,
149                                  oldval, ((__be16 *)tcph)[6], 0);
150         return TCPOLEN_MSS;
151 }
152
153 static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
154                                     unsigned int family)
155 {
156         struct flowi fl;
157         const struct nf_afinfo *ai;
158         struct rtable *rt = NULL;
159         u_int32_t mtu     = ~0U;
160
161         if (family == PF_INET) {
162                 struct flowi4 *fl4 = &fl.u.ip4;
163                 memset(fl4, 0, sizeof(*fl4));
164                 fl4->daddr = ip_hdr(skb)->saddr;
165         } else {
166                 struct flowi6 *fl6 = &fl.u.ip6;
167
168                 memset(fl6, 0, sizeof(*fl6));
169                 ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
170         }
171         rcu_read_lock();
172         ai = nf_get_afinfo(family);
173         if (ai != NULL)
174                 ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
175         rcu_read_unlock();
176
177         if (rt != NULL) {
178                 mtu = dst_mtu(&rt->dst);
179                 dst_release(&rt->dst);
180         }
181         return mtu;
182 }
183
184 static unsigned int
185 tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
186 {
187         struct iphdr *iph = ip_hdr(skb);
188         __be16 newlen;
189         int ret;
190
191         ret = tcpmss_mangle_packet(skb, par,
192                                    tcpmss_reverse_mtu(skb, PF_INET),
193                                    iph->ihl * 4,
194                                    sizeof(*iph) + sizeof(struct tcphdr));
195         if (ret < 0)
196                 return NF_DROP;
197         if (ret > 0) {
198                 iph = ip_hdr(skb);
199                 newlen = htons(ntohs(iph->tot_len) + ret);
200                 csum_replace2(&iph->check, iph->tot_len, newlen);
201                 iph->tot_len = newlen;
202         }
203         return XT_CONTINUE;
204 }
205
206 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
207 static unsigned int
208 tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
209 {
210         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
211         u8 nexthdr;
212         int tcphoff;
213         int ret;
214
215         nexthdr = ipv6h->nexthdr;
216         tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
217         if (tcphoff < 0)
218                 return NF_DROP;
219         ret = tcpmss_mangle_packet(skb, par,
220                                    tcpmss_reverse_mtu(skb, PF_INET6),
221                                    tcphoff,
222                                    sizeof(*ipv6h) + sizeof(struct tcphdr));
223         if (ret < 0)
224                 return NF_DROP;
225         if (ret > 0) {
226                 ipv6h = ipv6_hdr(skb);
227                 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
228         }
229         return XT_CONTINUE;
230 }
231 #endif
232
233 /* Must specify -p tcp --syn */
234 static inline bool find_syn_match(const struct xt_entry_match *m)
235 {
236         const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
237
238         if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
239             tcpinfo->flg_cmp & TCPHDR_SYN &&
240             !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
241                 return true;
242
243         return false;
244 }
245
246 static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
247 {
248         const struct xt_tcpmss_info *info = par->targinfo;
249         const struct ipt_entry *e = par->entryinfo;
250         const struct xt_entry_match *ematch;
251
252         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
253             (par->hook_mask & ~((1 << NF_INET_FORWARD) |
254                            (1 << NF_INET_LOCAL_OUT) |
255                            (1 << NF_INET_POST_ROUTING))) != 0) {
256                 pr_info("path-MTU clamping only supported in "
257                         "FORWARD, OUTPUT and POSTROUTING hooks\n");
258                 return -EINVAL;
259         }
260         xt_ematch_foreach(ematch, e)
261                 if (find_syn_match(ematch))
262                         return 0;
263         pr_info("Only works on TCP SYN packets\n");
264         return -EINVAL;
265 }
266
267 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
268 static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
269 {
270         const struct xt_tcpmss_info *info = par->targinfo;
271         const struct ip6t_entry *e = par->entryinfo;
272         const struct xt_entry_match *ematch;
273
274         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
275             (par->hook_mask & ~((1 << NF_INET_FORWARD) |
276                            (1 << NF_INET_LOCAL_OUT) |
277                            (1 << NF_INET_POST_ROUTING))) != 0) {
278                 pr_info("path-MTU clamping only supported in "
279                         "FORWARD, OUTPUT and POSTROUTING hooks\n");
280                 return -EINVAL;
281         }
282         xt_ematch_foreach(ematch, e)
283                 if (find_syn_match(ematch))
284                         return 0;
285         pr_info("Only works on TCP SYN packets\n");
286         return -EINVAL;
287 }
288 #endif
289
290 static struct xt_target tcpmss_tg_reg[] __read_mostly = {
291         {
292                 .family         = NFPROTO_IPV4,
293                 .name           = "TCPMSS",
294                 .checkentry     = tcpmss_tg4_check,
295                 .target         = tcpmss_tg4,
296                 .targetsize     = sizeof(struct xt_tcpmss_info),
297                 .proto          = IPPROTO_TCP,
298                 .me             = THIS_MODULE,
299         },
300 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
301         {
302                 .family         = NFPROTO_IPV6,
303                 .name           = "TCPMSS",
304                 .checkentry     = tcpmss_tg6_check,
305                 .target         = tcpmss_tg6,
306                 .targetsize     = sizeof(struct xt_tcpmss_info),
307                 .proto          = IPPROTO_TCP,
308                 .me             = THIS_MODULE,
309         },
310 #endif
311 };
312
313 static int __init tcpmss_tg_init(void)
314 {
315         return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
316 }
317
318 static void __exit tcpmss_tg_exit(void)
319 {
320         xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
321 }
322
323 module_init(tcpmss_tg_init);
324 module_exit(tcpmss_tg_exit);