[IPVS]: Replace local version of skb_make_writable
[pandora-kernel.git] / net / ipv4 / ipvs / ip_vs_proto_udp.c
1 /*
2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
3  *
4  * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *              Julian Anastasov <ja@ssi.bg>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  *
14  * Changes:
15  *
16  */
17
18 #include <linux/in.h>
19 #include <linux/ip.h>
20 #include <linux/kernel.h>
21 #include <linux/netfilter.h>
22 #include <linux/netfilter_ipv4.h>
23 #include <linux/udp.h>
24
25 #include <net/ip_vs.h>
26 #include <net/ip.h>
27
28 static struct ip_vs_conn *
29 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
30                 const struct iphdr *iph, unsigned int proto_off, int inverse)
31 {
32         struct ip_vs_conn *cp;
33         __be16 _ports[2], *pptr;
34
35         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
36         if (pptr == NULL)
37                 return NULL;
38
39         if (likely(!inverse)) {
40                 cp = ip_vs_conn_in_get(iph->protocol,
41                                        iph->saddr, pptr[0],
42                                        iph->daddr, pptr[1]);
43         } else {
44                 cp = ip_vs_conn_in_get(iph->protocol,
45                                        iph->daddr, pptr[1],
46                                        iph->saddr, pptr[0]);
47         }
48
49         return cp;
50 }
51
52
53 static struct ip_vs_conn *
54 udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
55                  const struct iphdr *iph, unsigned int proto_off, int inverse)
56 {
57         struct ip_vs_conn *cp;
58         __be16 _ports[2], *pptr;
59
60         pptr = skb_header_pointer(skb, ip_hdrlen(skb),
61                                   sizeof(_ports), _ports);
62         if (pptr == NULL)
63                 return NULL;
64
65         if (likely(!inverse)) {
66                 cp = ip_vs_conn_out_get(iph->protocol,
67                                         iph->saddr, pptr[0],
68                                         iph->daddr, pptr[1]);
69         } else {
70                 cp = ip_vs_conn_out_get(iph->protocol,
71                                         iph->daddr, pptr[1],
72                                         iph->saddr, pptr[0]);
73         }
74
75         return cp;
76 }
77
78
79 static int
80 udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
81                   int *verdict, struct ip_vs_conn **cpp)
82 {
83         struct ip_vs_service *svc;
84         struct udphdr _udph, *uh;
85
86         uh = skb_header_pointer(skb, ip_hdrlen(skb),
87                                 sizeof(_udph), &_udph);
88         if (uh == NULL) {
89                 *verdict = NF_DROP;
90                 return 0;
91         }
92
93         if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
94                                      ip_hdr(skb)->daddr, uh->dest))) {
95                 if (ip_vs_todrop()) {
96                         /*
97                          * It seems that we are very loaded.
98                          * We have to drop this packet :(
99                          */
100                         ip_vs_service_put(svc);
101                         *verdict = NF_DROP;
102                         return 0;
103                 }
104
105                 /*
106                  * Let the virtual server select a real server for the
107                  * incoming connection, and create a connection entry.
108                  */
109                 *cpp = ip_vs_schedule(svc, skb);
110                 if (!*cpp) {
111                         *verdict = ip_vs_leave(svc, skb, pp);
112                         return 0;
113                 }
114                 ip_vs_service_put(svc);
115         }
116         return 1;
117 }
118
119
120 static inline void
121 udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
122                      __be16 oldport, __be16 newport)
123 {
124         uhdr->check =
125                 csum_fold(ip_vs_check_diff4(oldip, newip,
126                                  ip_vs_check_diff2(oldport, newport,
127                                         ~csum_unfold(uhdr->check))));
128         if (!uhdr->check)
129                 uhdr->check = CSUM_MANGLED_0;
130 }
131
132 static int
133 udp_snat_handler(struct sk_buff **pskb,
134                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
135 {
136         struct udphdr *udph;
137         const unsigned int udphoff = ip_hdrlen(*pskb);
138
139         /* csum_check requires unshared skb */
140         if (!skb_make_writable(*pskb, udphoff+sizeof(*udph)))
141                 return 0;
142
143         if (unlikely(cp->app != NULL)) {
144                 /* Some checks before mangling */
145                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
146                         return 0;
147
148                 /*
149                  *      Call application helper if needed
150                  */
151                 if (!ip_vs_app_pkt_out(cp, pskb))
152                         return 0;
153         }
154
155         udph = (void *)ip_hdr(*pskb) + udphoff;
156         udph->source = cp->vport;
157
158         /*
159          *      Adjust UDP checksums
160          */
161         if (!cp->app && (udph->check != 0)) {
162                 /* Only port and addr are changed, do fast csum update */
163                 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
164                                      cp->dport, cp->vport);
165                 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
166                         (*pskb)->ip_summed = CHECKSUM_NONE;
167         } else {
168                 /* full checksum calculation */
169                 udph->check = 0;
170                 (*pskb)->csum = skb_checksum(*pskb, udphoff,
171                                              (*pskb)->len - udphoff, 0);
172                 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
173                                                 (*pskb)->len - udphoff,
174                                                 cp->protocol,
175                                                 (*pskb)->csum);
176                 if (udph->check == 0)
177                         udph->check = CSUM_MANGLED_0;
178                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
179                           pp->name, udph->check,
180                           (char*)&(udph->check) - (char*)udph);
181         }
182         return 1;
183 }
184
185
186 static int
187 udp_dnat_handler(struct sk_buff **pskb,
188                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
189 {
190         struct udphdr *udph;
191         unsigned int udphoff = ip_hdrlen(*pskb);
192
193         /* csum_check requires unshared skb */
194         if (!skb_make_writable(*pskb, udphoff+sizeof(*udph)))
195                 return 0;
196
197         if (unlikely(cp->app != NULL)) {
198                 /* Some checks before mangling */
199                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
200                         return 0;
201
202                 /*
203                  *      Attempt ip_vs_app call.
204                  *      It will fix ip_vs_conn
205                  */
206                 if (!ip_vs_app_pkt_in(cp, pskb))
207                         return 0;
208         }
209
210         udph = (void *)ip_hdr(*pskb) + udphoff;
211         udph->dest = cp->dport;
212
213         /*
214          *      Adjust UDP checksums
215          */
216         if (!cp->app && (udph->check != 0)) {
217                 /* Only port and addr are changed, do fast csum update */
218                 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
219                                      cp->vport, cp->dport);
220                 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
221                         (*pskb)->ip_summed = CHECKSUM_NONE;
222         } else {
223                 /* full checksum calculation */
224                 udph->check = 0;
225                 (*pskb)->csum = skb_checksum(*pskb, udphoff,
226                                              (*pskb)->len - udphoff, 0);
227                 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
228                                                 (*pskb)->len - udphoff,
229                                                 cp->protocol,
230                                                 (*pskb)->csum);
231                 if (udph->check == 0)
232                         udph->check = CSUM_MANGLED_0;
233                 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
234         }
235         return 1;
236 }
237
238
239 static int
240 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
241 {
242         struct udphdr _udph, *uh;
243         const unsigned int udphoff = ip_hdrlen(skb);
244
245         uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
246         if (uh == NULL)
247                 return 0;
248
249         if (uh->check != 0) {
250                 switch (skb->ip_summed) {
251                 case CHECKSUM_NONE:
252                         skb->csum = skb_checksum(skb, udphoff,
253                                                  skb->len - udphoff, 0);
254                 case CHECKSUM_COMPLETE:
255                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
256                                               ip_hdr(skb)->daddr,
257                                               skb->len - udphoff,
258                                               ip_hdr(skb)->protocol,
259                                               skb->csum)) {
260                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
261                                                  "Failed checksum for");
262                                 return 0;
263                         }
264                         break;
265                 default:
266                         /* No need to checksum. */
267                         break;
268                 }
269         }
270         return 1;
271 }
272
273
274 /*
275  *      Note: the caller guarantees that only one of register_app,
276  *      unregister_app or app_conn_bind is called each time.
277  */
278
279 #define UDP_APP_TAB_BITS        4
280 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
281 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
282
283 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
284 static DEFINE_SPINLOCK(udp_app_lock);
285
286 static inline __u16 udp_app_hashkey(__be16 port)
287 {
288         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
289                 & UDP_APP_TAB_MASK;
290 }
291
292
293 static int udp_register_app(struct ip_vs_app *inc)
294 {
295         struct ip_vs_app *i;
296         __u16 hash;
297         __be16 port = inc->port;
298         int ret = 0;
299
300         hash = udp_app_hashkey(port);
301
302
303         spin_lock_bh(&udp_app_lock);
304         list_for_each_entry(i, &udp_apps[hash], p_list) {
305                 if (i->port == port) {
306                         ret = -EEXIST;
307                         goto out;
308                 }
309         }
310         list_add(&inc->p_list, &udp_apps[hash]);
311         atomic_inc(&ip_vs_protocol_udp.appcnt);
312
313   out:
314         spin_unlock_bh(&udp_app_lock);
315         return ret;
316 }
317
318
319 static void
320 udp_unregister_app(struct ip_vs_app *inc)
321 {
322         spin_lock_bh(&udp_app_lock);
323         atomic_dec(&ip_vs_protocol_udp.appcnt);
324         list_del(&inc->p_list);
325         spin_unlock_bh(&udp_app_lock);
326 }
327
328
329 static int udp_app_conn_bind(struct ip_vs_conn *cp)
330 {
331         int hash;
332         struct ip_vs_app *inc;
333         int result = 0;
334
335         /* Default binding: bind app only for NAT */
336         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
337                 return 0;
338
339         /* Lookup application incarnations and bind the right one */
340         hash = udp_app_hashkey(cp->vport);
341
342         spin_lock(&udp_app_lock);
343         list_for_each_entry(inc, &udp_apps[hash], p_list) {
344                 if (inc->port == cp->vport) {
345                         if (unlikely(!ip_vs_app_inc_get(inc)))
346                                 break;
347                         spin_unlock(&udp_app_lock);
348
349                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
350                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
351                                   __FUNCTION__,
352                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
353                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
354                                   inc->name, ntohs(inc->port));
355                         cp->app = inc;
356                         if (inc->init_conn)
357                                 result = inc->init_conn(inc, cp);
358                         goto out;
359                 }
360         }
361         spin_unlock(&udp_app_lock);
362
363   out:
364         return result;
365 }
366
367
368 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
369         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
370         [IP_VS_UDP_S_LAST]              =       2*HZ,
371 };
372
373 static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
374         [IP_VS_UDP_S_NORMAL]            =       "UDP",
375         [IP_VS_UDP_S_LAST]              =       "BUG!",
376 };
377
378
379 static int
380 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
381 {
382         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
383                                        udp_state_name_table, sname, to);
384 }
385
386 static const char * udp_state_name(int state)
387 {
388         if (state >= IP_VS_UDP_S_LAST)
389                 return "ERR!";
390         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
391 }
392
393 static int
394 udp_state_transition(struct ip_vs_conn *cp, int direction,
395                      const struct sk_buff *skb,
396                      struct ip_vs_protocol *pp)
397 {
398         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
399         return 1;
400 }
401
402 static void udp_init(struct ip_vs_protocol *pp)
403 {
404         IP_VS_INIT_HASH_TABLE(udp_apps);
405         pp->timeout_table = udp_timeouts;
406 }
407
408 static void udp_exit(struct ip_vs_protocol *pp)
409 {
410 }
411
412
413 struct ip_vs_protocol ip_vs_protocol_udp = {
414         .name =                 "UDP",
415         .protocol =             IPPROTO_UDP,
416         .dont_defrag =          0,
417         .init =                 udp_init,
418         .exit =                 udp_exit,
419         .conn_schedule =        udp_conn_schedule,
420         .conn_in_get =          udp_conn_in_get,
421         .conn_out_get =         udp_conn_out_get,
422         .snat_handler =         udp_snat_handler,
423         .dnat_handler =         udp_dnat_handler,
424         .csum_check =           udp_csum_check,
425         .state_transition =     udp_state_transition,
426         .state_name =           udp_state_name,
427         .register_app =         udp_register_app,
428         .unregister_app =       udp_unregister_app,
429         .app_conn_bind =        udp_app_conn_bind,
430         .debug_packet =         ip_vs_tcpudp_debug_packet,
431         .timeout_change =       NULL,
432         .set_state_timeout =    udp_set_state_timeout,
433 };