abc805bbc206f806e41230f064143fbb2d481c58
[pandora-kernel.git] / net / netfilter / xt_TCPMSS.c
1 /*
2  * This is a module which is used for setting the MSS option in TCP packets.
3  *
4  * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/ip.h>
14 #include <linux/gfp.h>
15 #include <linux/ipv6.h>
16 #include <linux/tcp.h>
17 #include <net/dst.h>
18 #include <net/flow.h>
19 #include <net/ipv6.h>
20 #include <net/route.h>
21 #include <net/tcp.h>
22
23 #include <linux/netfilter_ipv4/ip_tables.h>
24 #include <linux/netfilter_ipv6/ip6_tables.h>
25 #include <linux/netfilter/x_tables.h>
26 #include <linux/netfilter/xt_tcpudp.h>
27 #include <linux/netfilter/xt_TCPMSS.h>
28
29 MODULE_LICENSE("GPL");
30 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
31 MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
32 MODULE_ALIAS("ipt_TCPMSS");
33 MODULE_ALIAS("ip6t_TCPMSS");
34
35 static inline unsigned int
36 optlen(const u_int8_t *opt, unsigned int offset)
37 {
38         /* Beware zero-length options: make finite progress */
39         if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
40                 return 1;
41         else
42                 return opt[offset+1];
43 }
44
45 static int
46 tcpmss_mangle_packet(struct sk_buff *skb,
47                      const struct xt_action_param *par,
48                      unsigned int in_mtu,
49                      unsigned int tcphoff,
50                      unsigned int minlen)
51 {
52         const struct xt_tcpmss_info *info = par->targinfo;
53         struct tcphdr *tcph;
54         int len, tcp_hdrlen;
55         unsigned int i;
56         __be16 oldval;
57         u16 newmss;
58         u8 *opt;
59
60         /* This is a fragment, no TCP header is available */
61         if (par->fragoff != 0)
62                 return 0;
63
64         if (!skb_make_writable(skb, skb->len))
65                 return -1;
66
67         len = skb->len - tcphoff;
68         if (len < (int)sizeof(struct tcphdr))
69                 return -1;
70
71         tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
72         tcp_hdrlen = tcph->doff * 4;
73
74         if (len < tcp_hdrlen)
75                 return -1;
76
77         if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
78                 if (dst_mtu(skb_dst(skb)) <= minlen) {
79                         if (net_ratelimit())
80                                 pr_err("unknown or invalid path-MTU (%u)\n",
81                                        dst_mtu(skb_dst(skb)));
82                         return -1;
83                 }
84                 if (in_mtu <= minlen) {
85                         if (net_ratelimit())
86                                 pr_err("unknown or invalid path-MTU (%u)\n",
87                                        in_mtu);
88                         return -1;
89                 }
90                 newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
91         } else
92                 newmss = info->mss;
93
94         opt = (u_int8_t *)tcph;
95         for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
96                 if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
97                         u_int16_t oldmss;
98
99                         oldmss = (opt[i+2] << 8) | opt[i+3];
100
101                         /* Never increase MSS, even when setting it, as
102                          * doing so results in problems for hosts that rely
103                          * on MSS being set correctly.
104                          */
105                         if (oldmss <= newmss)
106                                 return 0;
107
108                         opt[i+2] = (newmss & 0xff00) >> 8;
109                         opt[i+3] = newmss & 0x00ff;
110
111                         inet_proto_csum_replace2(&tcph->check, skb,
112                                                  htons(oldmss), htons(newmss),
113                                                  0);
114                         return 0;
115                 }
116         }
117
118         /* There is data after the header so the option can't be added
119          * without moving it, and doing so may make the SYN packet
120          * itself too large. Accept the packet unmodified instead.
121          */
122         if (len > tcp_hdrlen)
123                 return 0;
124
125         /*
126          * MSS Option not found ?! add it..
127          */
128         if (skb_tailroom(skb) < TCPOLEN_MSS) {
129                 if (pskb_expand_head(skb, 0,
130                                      TCPOLEN_MSS - skb_tailroom(skb),
131                                      GFP_ATOMIC))
132                         return -1;
133                 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
134         }
135
136         skb_put(skb, TCPOLEN_MSS);
137
138         opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
139         memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
140
141         inet_proto_csum_replace2(&tcph->check, skb,
142                                  htons(len), htons(len + TCPOLEN_MSS), 1);
143         opt[0] = TCPOPT_MSS;
144         opt[1] = TCPOLEN_MSS;
145         opt[2] = (newmss & 0xff00) >> 8;
146         opt[3] = newmss & 0x00ff;
147
148         inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
149
150         oldval = ((__be16 *)tcph)[6];
151         tcph->doff += TCPOLEN_MSS/4;
152         inet_proto_csum_replace2(&tcph->check, skb,
153                                  oldval, ((__be16 *)tcph)[6], 0);
154         return TCPOLEN_MSS;
155 }
156
157 static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
158                                     unsigned int family)
159 {
160         struct flowi fl;
161         const struct nf_afinfo *ai;
162         struct rtable *rt = NULL;
163         u_int32_t mtu     = ~0U;
164
165         if (family == PF_INET) {
166                 struct flowi4 *fl4 = &fl.u.ip4;
167                 memset(fl4, 0, sizeof(*fl4));
168                 fl4->daddr = ip_hdr(skb)->saddr;
169         } else {
170                 struct flowi6 *fl6 = &fl.u.ip6;
171
172                 memset(fl6, 0, sizeof(*fl6));
173                 ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
174         }
175         rcu_read_lock();
176         ai = nf_get_afinfo(family);
177         if (ai != NULL)
178                 ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
179         rcu_read_unlock();
180
181         if (rt != NULL) {
182                 mtu = dst_mtu(&rt->dst);
183                 dst_release(&rt->dst);
184         }
185         return mtu;
186 }
187
188 static unsigned int
189 tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
190 {
191         struct iphdr *iph = ip_hdr(skb);
192         __be16 newlen;
193         int ret;
194
195         ret = tcpmss_mangle_packet(skb, par,
196                                    tcpmss_reverse_mtu(skb, PF_INET),
197                                    iph->ihl * 4,
198                                    sizeof(*iph) + sizeof(struct tcphdr));
199         if (ret < 0)
200                 return NF_DROP;
201         if (ret > 0) {
202                 iph = ip_hdr(skb);
203                 newlen = htons(ntohs(iph->tot_len) + ret);
204                 csum_replace2(&iph->check, iph->tot_len, newlen);
205                 iph->tot_len = newlen;
206         }
207         return XT_CONTINUE;
208 }
209
210 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
211 static unsigned int
212 tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
213 {
214         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
215         u8 nexthdr;
216         int tcphoff;
217         int ret;
218
219         nexthdr = ipv6h->nexthdr;
220         tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
221         if (tcphoff < 0)
222                 return NF_DROP;
223         ret = tcpmss_mangle_packet(skb, par,
224                                    tcpmss_reverse_mtu(skb, PF_INET6),
225                                    tcphoff,
226                                    sizeof(*ipv6h) + sizeof(struct tcphdr));
227         if (ret < 0)
228                 return NF_DROP;
229         if (ret > 0) {
230                 ipv6h = ipv6_hdr(skb);
231                 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
232         }
233         return XT_CONTINUE;
234 }
235 #endif
236
237 /* Must specify -p tcp --syn */
238 static inline bool find_syn_match(const struct xt_entry_match *m)
239 {
240         const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
241
242         if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
243             tcpinfo->flg_cmp & TCPHDR_SYN &&
244             !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
245                 return true;
246
247         return false;
248 }
249
250 static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
251 {
252         const struct xt_tcpmss_info *info = par->targinfo;
253         const struct ipt_entry *e = par->entryinfo;
254         const struct xt_entry_match *ematch;
255
256         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
257             (par->hook_mask & ~((1 << NF_INET_FORWARD) |
258                            (1 << NF_INET_LOCAL_OUT) |
259                            (1 << NF_INET_POST_ROUTING))) != 0) {
260                 pr_info("path-MTU clamping only supported in "
261                         "FORWARD, OUTPUT and POSTROUTING hooks\n");
262                 return -EINVAL;
263         }
264         xt_ematch_foreach(ematch, e)
265                 if (find_syn_match(ematch))
266                         return 0;
267         pr_info("Only works on TCP SYN packets\n");
268         return -EINVAL;
269 }
270
271 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
272 static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
273 {
274         const struct xt_tcpmss_info *info = par->targinfo;
275         const struct ip6t_entry *e = par->entryinfo;
276         const struct xt_entry_match *ematch;
277
278         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
279             (par->hook_mask & ~((1 << NF_INET_FORWARD) |
280                            (1 << NF_INET_LOCAL_OUT) |
281                            (1 << NF_INET_POST_ROUTING))) != 0) {
282                 pr_info("path-MTU clamping only supported in "
283                         "FORWARD, OUTPUT and POSTROUTING hooks\n");
284                 return -EINVAL;
285         }
286         xt_ematch_foreach(ematch, e)
287                 if (find_syn_match(ematch))
288                         return 0;
289         pr_info("Only works on TCP SYN packets\n");
290         return -EINVAL;
291 }
292 #endif
293
294 static struct xt_target tcpmss_tg_reg[] __read_mostly = {
295         {
296                 .family         = NFPROTO_IPV4,
297                 .name           = "TCPMSS",
298                 .checkentry     = tcpmss_tg4_check,
299                 .target         = tcpmss_tg4,
300                 .targetsize     = sizeof(struct xt_tcpmss_info),
301                 .proto          = IPPROTO_TCP,
302                 .me             = THIS_MODULE,
303         },
304 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
305         {
306                 .family         = NFPROTO_IPV6,
307                 .name           = "TCPMSS",
308                 .checkentry     = tcpmss_tg6_check,
309                 .target         = tcpmss_tg6,
310                 .targetsize     = sizeof(struct xt_tcpmss_info),
311                 .proto          = IPPROTO_TCP,
312                 .me             = THIS_MODULE,
313         },
314 #endif
315 };
316
317 static int __init tcpmss_tg_init(void)
318 {
319         return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
320 }
321
322 static void __exit tcpmss_tg_exit(void)
323 {
324         xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
325 }
326
327 module_init(tcpmss_tg_init);
328 module_exit(tcpmss_tg_exit);