2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21 #include <linux/kernel.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/udp.h>
26 #include <net/ip_vs.h>
28 #include <net/ip6_checksum.h>
31 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
32 int *verdict, struct ip_vs_conn **cpp)
34 struct ip_vs_service *svc;
35 struct udphdr _udph, *uh;
36 struct ip_vs_iphdr iph;
38 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
40 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest);
51 * It seems that we are very loaded.
52 * We have to drop this packet :(
54 ip_vs_service_put(svc);
60 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry.
63 *cpp = ip_vs_schedule(svc, skb);
65 *verdict = ip_vs_leave(svc, skb, pp);
68 ip_vs_service_put(svc);
75 udp_fast_csum_update(int af, struct udphdr *uhdr,
76 const union nf_inet_addr *oldip,
77 const union nf_inet_addr *newip,
78 __be16 oldport, __be16 newport)
80 #ifdef CONFIG_IP_VS_IPV6
83 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
84 ip_vs_check_diff2(oldport, newport,
85 ~csum_unfold(uhdr->check))));
89 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
90 ip_vs_check_diff2(oldport, newport,
91 ~csum_unfold(uhdr->check))));
93 uhdr->check = CSUM_MANGLED_0;
97 udp_partial_csum_update(int af, struct udphdr *uhdr,
98 const union nf_inet_addr *oldip,
99 const union nf_inet_addr *newip,
100 __be16 oldlen, __be16 newlen)
102 #ifdef CONFIG_IP_VS_IPV6
105 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106 ip_vs_check_diff2(oldlen, newlen,
107 ~csum_unfold(uhdr->check))));
111 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112 ip_vs_check_diff2(oldlen, newlen,
113 ~csum_unfold(uhdr->check))));
118 udp_snat_handler(struct sk_buff *skb,
119 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
122 unsigned int udphoff;
125 #ifdef CONFIG_IP_VS_IPV6
126 if (cp->af == AF_INET6)
127 udphoff = sizeof(struct ipv6hdr);
130 udphoff = ip_hdrlen(skb);
131 oldlen = skb->len - udphoff;
133 /* csum_check requires unshared skb */
134 if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
137 if (unlikely(cp->app != NULL)) {
138 /* Some checks before mangling */
139 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
143 * Call application helper if needed
145 if (!ip_vs_app_pkt_out(cp, skb))
149 udph = (void *)skb_network_header(skb) + udphoff;
150 udph->source = cp->vport;
153 * Adjust UDP checksums
155 if (skb->ip_summed == CHECKSUM_PARTIAL) {
156 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
158 htons(skb->len - udphoff));
159 } else if (!cp->app && (udph->check != 0)) {
160 /* Only port and addr are changed, do fast csum update */
161 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
162 cp->dport, cp->vport);
163 if (skb->ip_summed == CHECKSUM_COMPLETE)
164 skb->ip_summed = CHECKSUM_NONE;
166 /* full checksum calculation */
168 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
169 #ifdef CONFIG_IP_VS_IPV6
170 if (cp->af == AF_INET6)
171 udph->check = csum_ipv6_magic(&cp->vaddr.in6,
174 cp->protocol, skb->csum);
177 udph->check = csum_tcpudp_magic(cp->vaddr.ip,
182 if (udph->check == 0)
183 udph->check = CSUM_MANGLED_0;
184 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185 pp->name, udph->check,
186 (char*)&(udph->check) - (char*)udph);
193 udp_dnat_handler(struct sk_buff *skb,
194 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
197 unsigned int udphoff;
200 #ifdef CONFIG_IP_VS_IPV6
201 if (cp->af == AF_INET6)
202 udphoff = sizeof(struct ipv6hdr);
205 udphoff = ip_hdrlen(skb);
206 oldlen = skb->len - udphoff;
208 /* csum_check requires unshared skb */
209 if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
212 if (unlikely(cp->app != NULL)) {
213 /* Some checks before mangling */
214 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
218 * Attempt ip_vs_app call.
219 * It will fix ip_vs_conn
221 if (!ip_vs_app_pkt_in(cp, skb))
225 udph = (void *)skb_network_header(skb) + udphoff;
226 udph->dest = cp->dport;
229 * Adjust UDP checksums
231 if (skb->ip_summed == CHECKSUM_PARTIAL) {
232 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
234 htons(skb->len - udphoff));
235 } else if (!cp->app && (udph->check != 0)) {
236 /* Only port and addr are changed, do fast csum update */
237 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
238 cp->vport, cp->dport);
239 if (skb->ip_summed == CHECKSUM_COMPLETE)
240 skb->ip_summed = CHECKSUM_NONE;
242 /* full checksum calculation */
244 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
245 #ifdef CONFIG_IP_VS_IPV6
246 if (cp->af == AF_INET6)
247 udph->check = csum_ipv6_magic(&cp->caddr.in6,
250 cp->protocol, skb->csum);
253 udph->check = csum_tcpudp_magic(cp->caddr.ip,
258 if (udph->check == 0)
259 udph->check = CSUM_MANGLED_0;
260 skb->ip_summed = CHECKSUM_UNNECESSARY;
267 udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
269 struct udphdr _udph, *uh;
270 unsigned int udphoff;
272 #ifdef CONFIG_IP_VS_IPV6
274 udphoff = sizeof(struct ipv6hdr);
277 udphoff = ip_hdrlen(skb);
279 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
283 if (uh->check != 0) {
284 switch (skb->ip_summed) {
286 skb->csum = skb_checksum(skb, udphoff,
287 skb->len - udphoff, 0);
288 case CHECKSUM_COMPLETE:
289 #ifdef CONFIG_IP_VS_IPV6
290 if (af == AF_INET6) {
291 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
292 &ipv6_hdr(skb)->daddr,
294 ipv6_hdr(skb)->nexthdr,
296 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
297 "Failed checksum for");
302 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
305 ip_hdr(skb)->protocol,
307 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
308 "Failed checksum for");
313 /* No need to checksum. */
322 * Note: the caller guarantees that only one of register_app,
323 * unregister_app or app_conn_bind is called each time.
326 #define UDP_APP_TAB_BITS 4
327 #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
328 #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
330 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
331 static DEFINE_SPINLOCK(udp_app_lock);
333 static inline __u16 udp_app_hashkey(__be16 port)
335 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
340 static int udp_register_app(struct ip_vs_app *inc)
344 __be16 port = inc->port;
347 hash = udp_app_hashkey(port);
350 spin_lock_bh(&udp_app_lock);
351 list_for_each_entry(i, &udp_apps[hash], p_list) {
352 if (i->port == port) {
357 list_add(&inc->p_list, &udp_apps[hash]);
358 atomic_inc(&ip_vs_protocol_udp.appcnt);
361 spin_unlock_bh(&udp_app_lock);
367 udp_unregister_app(struct ip_vs_app *inc)
369 spin_lock_bh(&udp_app_lock);
370 atomic_dec(&ip_vs_protocol_udp.appcnt);
371 list_del(&inc->p_list);
372 spin_unlock_bh(&udp_app_lock);
376 static int udp_app_conn_bind(struct ip_vs_conn *cp)
379 struct ip_vs_app *inc;
382 /* Default binding: bind app only for NAT */
383 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
386 /* Lookup application incarnations and bind the right one */
387 hash = udp_app_hashkey(cp->vport);
389 spin_lock(&udp_app_lock);
390 list_for_each_entry(inc, &udp_apps[hash], p_list) {
391 if (inc->port == cp->vport) {
392 if (unlikely(!ip_vs_app_inc_get(inc)))
394 spin_unlock(&udp_app_lock);
396 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
397 "%s:%u to app %s on port %u\n",
399 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
401 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
403 inc->name, ntohs(inc->port));
407 result = inc->init_conn(inc, cp);
411 spin_unlock(&udp_app_lock);
418 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
419 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
420 [IP_VS_UDP_S_LAST] = 2*HZ,
423 static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
424 [IP_VS_UDP_S_NORMAL] = "UDP",
425 [IP_VS_UDP_S_LAST] = "BUG!",
430 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
432 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
433 udp_state_name_table, sname, to);
436 static const char * udp_state_name(int state)
438 if (state >= IP_VS_UDP_S_LAST)
440 return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
444 udp_state_transition(struct ip_vs_conn *cp, int direction,
445 const struct sk_buff *skb,
446 struct ip_vs_protocol *pp)
448 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
452 static void udp_init(struct ip_vs_protocol *pp)
454 IP_VS_INIT_HASH_TABLE(udp_apps);
455 pp->timeout_table = udp_timeouts;
458 static void udp_exit(struct ip_vs_protocol *pp)
463 struct ip_vs_protocol ip_vs_protocol_udp = {
465 .protocol = IPPROTO_UDP,
466 .num_states = IP_VS_UDP_S_LAST,
470 .conn_schedule = udp_conn_schedule,
471 .conn_in_get = ip_vs_conn_in_get_proto,
472 .conn_out_get = ip_vs_conn_out_get_proto,
473 .snat_handler = udp_snat_handler,
474 .dnat_handler = udp_dnat_handler,
475 .csum_check = udp_csum_check,
476 .state_transition = udp_state_transition,
477 .state_name = udp_state_name,
478 .register_app = udp_register_app,
479 .unregister_app = udp_unregister_app,
480 .app_conn_bind = udp_app_conn_bind,
481 .debug_packet = ip_vs_tcpudp_debug_packet,
482 .timeout_change = NULL,
483 .set_state_timeout = udp_set_state_timeout,