2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level;
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
84 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
85 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
95 static void update_defense_level(struct netns_ipvs *ipvs)
98 static int old_secure_tcp = 0;
103 /* we only count free and buffered memory (in pages) */
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
111 nomem = (availmem < ipvs->sysctl_amemthresh);
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
119 atomic_set(&ipvs->dropentry, 0);
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
126 atomic_set(&ipvs->dropentry, 0);
131 atomic_set(&ipvs->dropentry, 1);
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
138 atomic_set(&ipvs->dropentry, 1);
141 spin_unlock(&ipvs->dropentry_lock);
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
166 ipvs->sysctl_drop_packet = 1;
170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
173 spin_unlock(&ipvs->droppacket_lock);
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
179 if (old_secure_tcp >= 2)
184 if (old_secure_tcp < 2)
186 ipvs->sysctl_secure_tcp = 2;
188 if (old_secure_tcp >= 2)
194 if (old_secure_tcp < 2)
197 if (old_secure_tcp >= 2)
199 ipvs->sysctl_secure_tcp = 1;
203 if (old_secure_tcp < 2)
207 old_secure_tcp = ipvs->sysctl_secure_tcp;
209 ip_vs_protocol_timeout_change(ipvs,
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
218 * Timer for checking the defense
220 #define DEFENSE_TIMER_PERIOD 1*HZ
222 static void defense_work_handler(struct work_struct *work)
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
234 ip_vs_use_count_inc(void)
236 return try_module_get(THIS_MODULE);
240 ip_vs_use_count_dec(void)
242 module_put(THIS_MODULE);
247 * Hash table: for virtual service lookups
249 #define IP_VS_SVC_TAB_BITS 8
250 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253 /* the service table hashed by <protocol, addr, port> */
254 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255 /* the service table hashed by fwmark */
256 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
260 * Returns hash value for virtual service
262 static inline unsigned
263 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
266 register unsigned porth = ntohs(port);
267 __be32 addr_fold = addr->ip;
269 #ifdef CONFIG_IP_VS_IPV6
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
274 addr_fold ^= ((size_t)net>>8);
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
277 & IP_VS_SVC_TAB_MASK;
281 * Returns hash value of fwmark for virtual service lookup
283 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
293 static int ip_vs_svc_hash(struct ip_vs_service *svc)
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
303 if (svc->fwmark == 0) {
305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
312 * Hash it by fwmark in svc_fwm_table
314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
326 * Unhashes a service from svc_table / svc_fwm_table.
327 * Should be called with locked tables.
329 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
337 if (svc->fwmark == 0) {
338 /* Remove it from the svc_table table */
339 list_del(&svc->s_list);
341 /* Remove it from the svc_fwm_table table */
342 list_del(&svc->f_list);
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
352 * Get service by {netns, proto,addr,port} in the service table.
354 static inline struct ip_vs_service *
355 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
359 struct ip_vs_service *svc;
361 /* Check for "full" addressed entries */
362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
367 && (svc->port == vport)
368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
380 * Get service by {fwmark} in the service table.
382 static inline struct ip_vs_service *
383 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
386 struct ip_vs_service *svc;
388 /* Check for fwmark addressed entries */
389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
402 struct ip_vs_service *
403 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
404 const union nf_inet_addr *vaddr, __be16 vport)
406 struct ip_vs_service *svc;
407 struct netns_ipvs *ipvs = net_ipvs(net);
409 read_lock(&__ip_vs_svc_lock);
412 * Check the table hashed by fwmark first
414 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
419 * Check the table hashed by <protocol,addr,port>
420 * for "full" addressed entries
422 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
425 && protocol == IPPROTO_TCP
426 && atomic_read(&ipvs->ftpsvc_counter)
427 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
429 * Check if ftp service entry exists, the packet
430 * might belong to FTP data connections.
432 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
436 && atomic_read(&ipvs->nullsvc_counter)) {
438 * Check if the catch-all port (port zero) exists
440 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
445 atomic_inc(&svc->usecnt);
446 read_unlock(&__ip_vs_svc_lock);
448 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
449 fwmark, ip_vs_proto_name(protocol),
450 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
451 svc ? "hit" : "not hit");
458 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
460 atomic_inc(&svc->refcnt);
465 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
467 struct ip_vs_service *svc = dest->svc;
470 if (atomic_dec_and_test(&svc->refcnt)) {
471 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
473 IP_VS_DBG_ADDR(svc->af, &svc->addr),
474 ntohs(svc->port), atomic_read(&svc->usecnt));
475 free_percpu(svc->stats.cpustats);
482 * Returns hash value for real service
484 static inline unsigned ip_vs_rs_hashkey(int af,
485 const union nf_inet_addr *addr,
488 register unsigned porth = ntohs(port);
489 __be32 addr_fold = addr->ip;
491 #ifdef CONFIG_IP_VS_IPV6
493 addr_fold = addr->ip6[0]^addr->ip6[1]^
494 addr->ip6[2]^addr->ip6[3];
497 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
502 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
503 * should be called with locked tables.
505 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
509 if (!list_empty(&dest->d_list)) {
514 * Hash by proto,addr,port,
515 * which are the parameters of the real service.
517 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
519 list_add(&dest->d_list, &ipvs->rs_table[hash]);
525 * UNhashes ip_vs_dest from rs_table.
526 * should be called with locked tables.
528 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531 * Remove it from the rs_table table.
533 if (!list_empty(&dest->d_list)) {
534 list_del(&dest->d_list);
535 INIT_LIST_HEAD(&dest->d_list);
542 * Lookup real service by <proto,addr,port> in the real service table.
545 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
546 const union nf_inet_addr *daddr,
549 struct netns_ipvs *ipvs = net_ipvs(net);
551 struct ip_vs_dest *dest;
554 * Check for "full" addressed entries
555 * Return the first found entry
557 hash = ip_vs_rs_hashkey(af, daddr, dport);
559 read_lock(&ipvs->rs_lock);
560 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
562 && ip_vs_addr_equal(af, &dest->addr, daddr)
563 && (dest->port == dport)
564 && ((dest->protocol == protocol) ||
567 read_unlock(&ipvs->rs_lock);
571 read_unlock(&ipvs->rs_lock);
577 * Lookup destination by {addr,port} in the given service
579 static struct ip_vs_dest *
580 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
583 struct ip_vs_dest *dest;
586 * Find the destination for the given service
588 list_for_each_entry(dest, &svc->destinations, n_list) {
589 if ((dest->af == svc->af)
590 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
591 && (dest->port == dport)) {
601 * Find destination by {daddr,dport,vaddr,protocol}
602 * Cretaed to be used in ip_vs_process_message() in
603 * the backup synchronization daemon. It finds the
604 * destination to be bound to the received connection
607 * ip_vs_lookup_real_service() looked promissing, but
608 * seems not working as expected.
610 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
611 const union nf_inet_addr *daddr,
613 const union nf_inet_addr *vaddr,
614 __be16 vport, __u16 protocol, __u32 fwmark)
616 struct ip_vs_dest *dest;
617 struct ip_vs_service *svc;
619 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
622 dest = ip_vs_lookup_dest(svc, daddr, dport);
624 atomic_inc(&dest->refcnt);
625 ip_vs_service_put(svc);
630 * Lookup dest by {svc,addr,port} in the destination trash.
631 * The destination trash is used to hold the destinations that are removed
632 * from the service table but are still referenced by some conn entries.
633 * The reason to add the destination trash is when the dest is temporary
634 * down (either by administrator or by monitor program), the dest can be
635 * picked back from the trash, the remaining connections to the dest can
636 * continue, and the counting information of the dest is also useful for
639 static struct ip_vs_dest *
640 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
643 struct ip_vs_dest *dest, *nxt;
644 struct netns_ipvs *ipvs = net_ipvs(svc->net);
647 * Find the destination in trash
649 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
650 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
653 IP_VS_DBG_ADDR(svc->af, &dest->addr),
655 atomic_read(&dest->refcnt));
656 if (dest->af == svc->af &&
657 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
658 dest->port == dport &&
659 dest->vfwmark == svc->fwmark &&
660 dest->protocol == svc->protocol &&
662 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
663 dest->vport == svc->port))) {
669 * Try to purge the destination from trash if not referenced
671 if (atomic_read(&dest->refcnt) == 1) {
672 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
675 IP_VS_DBG_ADDR(svc->af, &dest->addr),
677 list_del(&dest->n_list);
678 ip_vs_dst_reset(dest);
679 __ip_vs_unbind_svc(dest);
680 free_percpu(dest->stats.cpustats);
690 * Clean up all the destinations in the trash
691 * Called by the ip_vs_control_cleanup()
693 * When the ip_vs_control_clearup is activated by ipvs module exit,
694 * the service tables must have been flushed and all the connections
695 * are expired, and the refcnt of each destination in the trash must
696 * be 1, so we simply release them here.
698 static void ip_vs_trash_cleanup(struct net *net)
700 struct ip_vs_dest *dest, *nxt;
701 struct netns_ipvs *ipvs = net_ipvs(net);
703 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
704 list_del(&dest->n_list);
705 ip_vs_dst_reset(dest);
706 __ip_vs_unbind_svc(dest);
707 free_percpu(dest->stats.cpustats);
714 ip_vs_zero_stats(struct ip_vs_stats *stats)
716 spin_lock_bh(&stats->lock);
718 memset(&stats->ustats, 0, sizeof(stats->ustats));
719 ip_vs_zero_estimator(stats);
721 spin_unlock_bh(&stats->lock);
725 * Update a destination in the given service
728 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
729 struct ip_vs_dest_user_kern *udest, int add)
731 struct netns_ipvs *ipvs = net_ipvs(svc->net);
734 /* set the weight and the flags */
735 atomic_set(&dest->weight, udest->weight);
736 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
737 conn_flags |= IP_VS_CONN_F_INACTIVE;
739 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
740 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
741 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
744 * Put the real service in rs_table if not present.
745 * For now only for NAT!
747 write_lock_bh(&ipvs->rs_lock);
748 ip_vs_rs_hash(ipvs, dest);
749 write_unlock_bh(&ipvs->rs_lock);
751 atomic_set(&dest->conn_flags, conn_flags);
753 /* bind the service */
755 __ip_vs_bind_svc(dest, svc);
757 if (dest->svc != svc) {
758 __ip_vs_unbind_svc(dest);
759 ip_vs_zero_stats(&dest->stats);
760 __ip_vs_bind_svc(dest, svc);
764 /* set the dest status flags */
765 dest->flags |= IP_VS_DEST_F_AVAILABLE;
767 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
768 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
769 dest->u_threshold = udest->u_threshold;
770 dest->l_threshold = udest->l_threshold;
772 spin_lock_bh(&dest->dst_lock);
773 ip_vs_dst_reset(dest);
774 spin_unlock_bh(&dest->dst_lock);
777 ip_vs_new_estimator(svc->net, &dest->stats);
779 write_lock_bh(&__ip_vs_svc_lock);
781 /* Wait until all other svc users go away */
782 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
785 list_add(&dest->n_list, &svc->destinations);
789 /* call the update_service, because server weight may be changed */
790 if (svc->scheduler->update_service)
791 svc->scheduler->update_service(svc);
793 write_unlock_bh(&__ip_vs_svc_lock);
798 * Create a destination for the given service
801 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
802 struct ip_vs_dest **dest_p)
804 struct ip_vs_dest *dest;
809 #ifdef CONFIG_IP_VS_IPV6
810 if (svc->af == AF_INET6) {
811 atype = ipv6_addr_type(&udest->addr.in6);
812 if ((!(atype & IPV6_ADDR_UNICAST) ||
813 atype & IPV6_ADDR_LINKLOCAL) &&
814 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
819 atype = inet_addr_type(svc->net, udest->addr.ip);
820 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
824 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
826 pr_err("%s(): no memory.\n", __func__);
829 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
830 if (!dest->stats.cpustats) {
831 pr_err("%s() alloc_percpu failed\n", __func__);
836 dest->protocol = svc->protocol;
837 dest->vaddr = svc->addr;
838 dest->vport = svc->port;
839 dest->vfwmark = svc->fwmark;
840 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
841 dest->port = udest->port;
843 atomic_set(&dest->activeconns, 0);
844 atomic_set(&dest->inactconns, 0);
845 atomic_set(&dest->persistconns, 0);
846 atomic_set(&dest->refcnt, 1);
848 INIT_LIST_HEAD(&dest->d_list);
849 spin_lock_init(&dest->dst_lock);
850 spin_lock_init(&dest->stats.lock);
851 __ip_vs_update_dest(svc, dest, udest, 1);
865 * Add a destination into an existing service
868 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
870 struct ip_vs_dest *dest;
871 union nf_inet_addr daddr;
872 __be16 dport = udest->port;
877 if (udest->weight < 0) {
878 pr_err("%s(): server weight less than zero\n", __func__);
882 if (udest->l_threshold > udest->u_threshold) {
883 pr_err("%s(): lower threshold is higher than upper threshold\n",
888 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
891 * Check if the dest already exists in the list
893 dest = ip_vs_lookup_dest(svc, &daddr, dport);
896 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
901 * Check if the dest already exists in the trash and
902 * is from the same service
904 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
907 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
908 "dest->refcnt=%d, service %u/%s:%u\n",
909 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
910 atomic_read(&dest->refcnt),
912 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
916 * Get the destination from the trash
918 list_del(&dest->n_list);
920 __ip_vs_update_dest(svc, dest, udest, 1);
924 * Allocate and initialize the dest structure
926 ret = ip_vs_new_dest(svc, udest, &dest);
935 * Edit a destination in the given service
938 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
940 struct ip_vs_dest *dest;
941 union nf_inet_addr daddr;
942 __be16 dport = udest->port;
946 if (udest->weight < 0) {
947 pr_err("%s(): server weight less than zero\n", __func__);
951 if (udest->l_threshold > udest->u_threshold) {
952 pr_err("%s(): lower threshold is higher than upper threshold\n",
957 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
960 * Lookup the destination list
962 dest = ip_vs_lookup_dest(svc, &daddr, dport);
965 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
969 __ip_vs_update_dest(svc, dest, udest, 0);
977 * Delete a destination (must be already unlinked from the service)
979 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
981 struct netns_ipvs *ipvs = net_ipvs(net);
983 ip_vs_kill_estimator(net, &dest->stats);
986 * Remove it from the d-linked list with the real services.
988 write_lock_bh(&ipvs->rs_lock);
989 ip_vs_rs_unhash(dest);
990 write_unlock_bh(&ipvs->rs_lock);
993 * Decrease the refcnt of the dest, and free the dest
994 * if nobody refers to it (refcnt=0). Otherwise, throw
995 * the destination into the trash.
997 if (atomic_dec_and_test(&dest->refcnt)) {
998 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1000 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1002 ip_vs_dst_reset(dest);
1003 /* simply decrease svc->refcnt here, let the caller check
1004 and release the service if nobody refers to it.
1005 Only user context can release destination and service,
1006 and only one user context can update virtual service at a
1007 time, so the operation here is OK */
1008 atomic_dec(&dest->svc->refcnt);
1009 free_percpu(dest->stats.cpustats);
1012 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1013 "dest->refcnt=%d\n",
1014 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1016 atomic_read(&dest->refcnt));
1017 list_add(&dest->n_list, &ipvs->dest_trash);
1018 atomic_inc(&dest->refcnt);
1024 * Unlink a destination from the given service
1026 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1027 struct ip_vs_dest *dest,
1030 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1033 * Remove it from the d-linked destination list.
1035 list_del(&dest->n_list);
1039 * Call the update_service function of its scheduler
1041 if (svcupd && svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
1047 * Delete a destination server in the given service
1050 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1052 struct ip_vs_dest *dest;
1053 __be16 dport = udest->port;
1057 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1060 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1064 write_lock_bh(&__ip_vs_svc_lock);
1067 * Wait until all other svc users go away.
1069 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1072 * Unlink dest from the service
1074 __ip_vs_unlink_dest(svc, dest, 1);
1076 write_unlock_bh(&__ip_vs_svc_lock);
1079 * Delete the destination
1081 __ip_vs_del_dest(svc->net, dest);
1090 * Add a service into the service hash table
1093 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1094 struct ip_vs_service **svc_p)
1097 struct ip_vs_scheduler *sched = NULL;
1098 struct ip_vs_pe *pe = NULL;
1099 struct ip_vs_service *svc = NULL;
1100 struct netns_ipvs *ipvs = net_ipvs(net);
1102 /* increase the module use count */
1103 ip_vs_use_count_inc();
1105 /* Lookup the scheduler by 'u->sched_name' */
1106 sched = ip_vs_scheduler_get(u->sched_name);
1107 if (sched == NULL) {
1108 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1113 if (u->pe_name && *u->pe_name) {
1114 pe = ip_vs_pe_getbyname(u->pe_name);
1116 pr_info("persistence engine module ip_vs_pe_%s "
1117 "not found\n", u->pe_name);
1123 #ifdef CONFIG_IP_VS_IPV6
1124 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1130 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1132 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1136 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1137 if (!svc->stats.cpustats) {
1138 pr_err("%s() alloc_percpu failed\n", __func__);
1142 /* I'm the first user of the service */
1143 atomic_set(&svc->usecnt, 0);
1144 atomic_set(&svc->refcnt, 0);
1147 svc->protocol = u->protocol;
1148 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1149 svc->port = u->port;
1150 svc->fwmark = u->fwmark;
1151 svc->flags = u->flags;
1152 svc->timeout = u->timeout * HZ;
1153 svc->netmask = u->netmask;
1156 INIT_LIST_HEAD(&svc->destinations);
1157 rwlock_init(&svc->sched_lock);
1158 spin_lock_init(&svc->stats.lock);
1160 /* Bind the scheduler */
1161 ret = ip_vs_bind_scheduler(svc, sched);
1166 /* Bind the ct retriever */
1167 ip_vs_bind_pe(svc, pe);
1170 /* Update the virtual service counters */
1171 if (svc->port == FTPPORT)
1172 atomic_inc(&ipvs->ftpsvc_counter);
1173 else if (svc->port == 0)
1174 atomic_inc(&ipvs->nullsvc_counter);
1176 ip_vs_new_estimator(net, &svc->stats);
1178 /* Count only IPv4 services for old get/setsockopt interface */
1179 if (svc->af == AF_INET)
1180 ipvs->num_services++;
1182 /* Hash the service into the service table */
1183 write_lock_bh(&__ip_vs_svc_lock);
1184 ip_vs_svc_hash(svc);
1185 write_unlock_bh(&__ip_vs_svc_lock);
1193 ip_vs_unbind_scheduler(svc);
1196 ip_vs_app_inc_put(svc->inc);
1199 if (svc->stats.cpustats)
1200 free_percpu(svc->stats.cpustats);
1203 ip_vs_scheduler_put(sched);
1206 /* decrease the module use count */
1207 ip_vs_use_count_dec();
1214 * Edit a service and bind it with a new scheduler
1217 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1219 struct ip_vs_scheduler *sched, *old_sched;
1220 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1224 * Lookup the scheduler, by 'u->sched_name'
1226 sched = ip_vs_scheduler_get(u->sched_name);
1227 if (sched == NULL) {
1228 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1233 if (u->pe_name && *u->pe_name) {
1234 pe = ip_vs_pe_getbyname(u->pe_name);
1236 pr_info("persistence engine module ip_vs_pe_%s "
1237 "not found\n", u->pe_name);
1244 #ifdef CONFIG_IP_VS_IPV6
1245 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1251 write_lock_bh(&__ip_vs_svc_lock);
1254 * Wait until all other svc users go away.
1256 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1259 * Set the flags and timeout value
1261 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1262 svc->timeout = u->timeout * HZ;
1263 svc->netmask = u->netmask;
1265 old_sched = svc->scheduler;
1266 if (sched != old_sched) {
1268 * Unbind the old scheduler
1270 if ((ret = ip_vs_unbind_scheduler(svc))) {
1276 * Bind the new scheduler
1278 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1280 * If ip_vs_bind_scheduler fails, restore the old
1282 * The main reason of failure is out of memory.
1284 * The question is if the old scheduler can be
1285 * restored all the time. TODO: if it cannot be
1286 * restored some time, we must delete the service,
1287 * otherwise the system may crash.
1289 ip_vs_bind_scheduler(svc, old_sched);
1297 ip_vs_unbind_pe(svc);
1298 ip_vs_bind_pe(svc, pe);
1302 write_unlock_bh(&__ip_vs_svc_lock);
1304 ip_vs_scheduler_put(old_sched);
1305 ip_vs_pe_put(old_pe);
1311 * Delete a service from the service list
1312 * - The service must be unlinked, unlocked and not referenced!
1313 * - We are called under _bh lock
1315 static void __ip_vs_del_service(struct ip_vs_service *svc)
1317 struct ip_vs_dest *dest, *nxt;
1318 struct ip_vs_scheduler *old_sched;
1319 struct ip_vs_pe *old_pe;
1320 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1322 pr_info("%s: enter\n", __func__);
1324 /* Count only IPv4 services for old get/setsockopt interface */
1325 if (svc->af == AF_INET)
1326 ipvs->num_services--;
1328 ip_vs_kill_estimator(svc->net, &svc->stats);
1330 /* Unbind scheduler */
1331 old_sched = svc->scheduler;
1332 ip_vs_unbind_scheduler(svc);
1333 ip_vs_scheduler_put(old_sched);
1335 /* Unbind persistence engine */
1337 ip_vs_unbind_pe(svc);
1338 ip_vs_pe_put(old_pe);
1340 /* Unbind app inc */
1342 ip_vs_app_inc_put(svc->inc);
1347 * Unlink the whole destination list
1349 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1350 __ip_vs_unlink_dest(svc, dest, 0);
1351 __ip_vs_del_dest(svc->net, dest);
1355 * Update the virtual service counters
1357 if (svc->port == FTPPORT)
1358 atomic_dec(&ipvs->ftpsvc_counter);
1359 else if (svc->port == 0)
1360 atomic_dec(&ipvs->nullsvc_counter);
1363 * Free the service if nobody refers to it
1365 if (atomic_read(&svc->refcnt) == 0) {
1366 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1368 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1369 ntohs(svc->port), atomic_read(&svc->usecnt));
1370 free_percpu(svc->stats.cpustats);
1374 /* decrease the module use count */
1375 ip_vs_use_count_dec();
1379 * Unlink a service from list and try to delete it if its refcnt reached 0
1381 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1384 * Unhash it from the service table
1386 write_lock_bh(&__ip_vs_svc_lock);
1388 ip_vs_svc_unhash(svc);
1391 * Wait until all the svc users go away.
1393 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1395 __ip_vs_del_service(svc);
1397 write_unlock_bh(&__ip_vs_svc_lock);
1401 * Delete a service from the service list
1403 static int ip_vs_del_service(struct ip_vs_service *svc)
1407 ip_vs_unlink_service(svc);
1414 * Flush all the virtual services
1416 static int ip_vs_flush(struct net *net)
1419 struct ip_vs_service *svc, *nxt;
1422 * Flush the service table hashed by <netns,protocol,addr,port>
1424 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1425 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1427 if (net_eq(svc->net, net))
1428 ip_vs_unlink_service(svc);
1433 * Flush the service table hashed by fwmark
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt,
1437 &ip_vs_svc_fwm_table[idx], f_list) {
1438 if (net_eq(svc->net, net))
1439 ip_vs_unlink_service(svc);
1448 * Zero counters in a service or all services
1450 static int ip_vs_zero_service(struct ip_vs_service *svc)
1452 struct ip_vs_dest *dest;
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 list_for_each_entry(dest, &svc->destinations, n_list) {
1456 ip_vs_zero_stats(&dest->stats);
1458 ip_vs_zero_stats(&svc->stats);
1459 write_unlock_bh(&__ip_vs_svc_lock);
1463 static int ip_vs_zero_all(struct net *net)
1466 struct ip_vs_service *svc;
1468 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1469 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1470 if (net_eq(svc->net, net))
1471 ip_vs_zero_service(svc);
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
1482 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1488 proc_do_defense_mode(ctl_table *table, int write,
1489 void __user *buffer, size_t *lenp, loff_t *ppos)
1491 struct net *net = current->nsproxy->net_ns;
1492 int *valp = table->data;
1496 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1497 if (write && (*valp != val)) {
1498 if ((*valp < 0) || (*valp > 3)) {
1499 /* Restore the correct value */
1502 update_defense_level(net_ipvs(net));
1510 proc_do_sync_threshold(ctl_table *table, int write,
1511 void __user *buffer, size_t *lenp, loff_t *ppos)
1513 int *valp = table->data;
1517 /* backup the value first */
1518 memcpy(val, valp, sizeof(val));
1520 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1521 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1522 /* Restore the correct value */
1523 memcpy(valp, val, sizeof(val));
1529 proc_do_sync_mode(ctl_table *table, int write,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1532 int *valp = table->data;
1536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1537 if (write && (*valp != val)) {
1538 if ((*valp < 0) || (*valp > 1)) {
1539 /* Restore the correct value */
1542 struct net *net = current->nsproxy->net_ns;
1543 ip_vs_sync_switch_mode(net, val);
1550 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1551 * Do not change order or insert new entries without
1552 * align with netns init in __ip_vs_control_init()
1555 static struct ctl_table vs_vars[] = {
1557 .procname = "amemthresh",
1558 .maxlen = sizeof(int),
1560 .proc_handler = proc_dointvec,
1563 .procname = "am_droprate",
1564 .maxlen = sizeof(int),
1566 .proc_handler = proc_dointvec,
1569 .procname = "drop_entry",
1570 .maxlen = sizeof(int),
1572 .proc_handler = proc_do_defense_mode,
1575 .procname = "drop_packet",
1576 .maxlen = sizeof(int),
1578 .proc_handler = proc_do_defense_mode,
1580 #ifdef CONFIG_IP_VS_NFCT
1582 .procname = "conntrack",
1583 .maxlen = sizeof(int),
1585 .proc_handler = &proc_dointvec,
1589 .procname = "secure_tcp",
1590 .maxlen = sizeof(int),
1592 .proc_handler = proc_do_defense_mode,
1595 .procname = "snat_reroute",
1596 .maxlen = sizeof(int),
1598 .proc_handler = &proc_dointvec,
1601 .procname = "sync_version",
1602 .maxlen = sizeof(int),
1604 .proc_handler = &proc_do_sync_mode,
1607 .procname = "cache_bypass",
1608 .maxlen = sizeof(int),
1610 .proc_handler = proc_dointvec,
1613 .procname = "expire_nodest_conn",
1614 .maxlen = sizeof(int),
1616 .proc_handler = proc_dointvec,
1619 .procname = "expire_quiescent_template",
1620 .maxlen = sizeof(int),
1622 .proc_handler = proc_dointvec,
1625 .procname = "sync_threshold",
1627 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1629 .proc_handler = proc_do_sync_threshold,
1632 .procname = "nat_icmp_send",
1633 .maxlen = sizeof(int),
1635 .proc_handler = proc_dointvec,
1637 #ifdef CONFIG_IP_VS_DEBUG
1639 .procname = "debug_level",
1640 .data = &sysctl_ip_vs_debug_level,
1641 .maxlen = sizeof(int),
1643 .proc_handler = proc_dointvec,
1648 .procname = "timeout_established",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1650 .maxlen = sizeof(int),
1652 .proc_handler = proc_dointvec_jiffies,
1655 .procname = "timeout_synsent",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1657 .maxlen = sizeof(int),
1659 .proc_handler = proc_dointvec_jiffies,
1662 .procname = "timeout_synrecv",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1664 .maxlen = sizeof(int),
1666 .proc_handler = proc_dointvec_jiffies,
1669 .procname = "timeout_finwait",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1671 .maxlen = sizeof(int),
1673 .proc_handler = proc_dointvec_jiffies,
1676 .procname = "timeout_timewait",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1678 .maxlen = sizeof(int),
1680 .proc_handler = proc_dointvec_jiffies,
1683 .procname = "timeout_close",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1685 .maxlen = sizeof(int),
1687 .proc_handler = proc_dointvec_jiffies,
1690 .procname = "timeout_closewait",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1692 .maxlen = sizeof(int),
1694 .proc_handler = proc_dointvec_jiffies,
1697 .procname = "timeout_lastack",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1699 .maxlen = sizeof(int),
1701 .proc_handler = proc_dointvec_jiffies,
1704 .procname = "timeout_listen",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1706 .maxlen = sizeof(int),
1708 .proc_handler = proc_dointvec_jiffies,
1711 .procname = "timeout_synack",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1713 .maxlen = sizeof(int),
1715 .proc_handler = proc_dointvec_jiffies,
1718 .procname = "timeout_udp",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec_jiffies,
1725 .procname = "timeout_icmp",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1727 .maxlen = sizeof(int),
1729 .proc_handler = proc_dointvec_jiffies,
1735 const struct ctl_path net_vs_ctl_path[] = {
1736 { .procname = "net", },
1737 { .procname = "ipv4", },
1738 { .procname = "vs", },
1741 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1743 #ifdef CONFIG_PROC_FS
1746 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1747 struct list_head *table;
1752 * Write the contents of the VS rule table to a PROCfs file.
1753 * (It is kept just for backward compatibility)
1755 static inline const char *ip_vs_fwd_name(unsigned flags)
1757 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1758 case IP_VS_CONN_F_LOCALNODE:
1760 case IP_VS_CONN_F_TUNNEL:
1762 case IP_VS_CONN_F_DROUTE:
1770 /* Get the Nth entry in the two lists */
1771 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1773 struct net *net = seq_file_net(seq);
1774 struct ip_vs_iter *iter = seq->private;
1776 struct ip_vs_service *svc;
1778 /* look in hash by protocol */
1779 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1780 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1781 if (net_eq(svc->net, net) && pos-- == 0) {
1782 iter->table = ip_vs_svc_table;
1789 /* keep looking in fwmark */
1790 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1791 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1792 if (net_eq(svc->net, net) && pos-- == 0) {
1793 iter->table = ip_vs_svc_fwm_table;
1803 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1804 __acquires(__ip_vs_svc_lock)
1807 read_lock_bh(&__ip_vs_svc_lock);
1808 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1812 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1814 struct list_head *e;
1815 struct ip_vs_iter *iter;
1816 struct ip_vs_service *svc;
1819 if (v == SEQ_START_TOKEN)
1820 return ip_vs_info_array(seq,0);
1823 iter = seq->private;
1825 if (iter->table == ip_vs_svc_table) {
1826 /* next service in table hashed by protocol */
1827 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1828 return list_entry(e, struct ip_vs_service, s_list);
1831 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1832 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1838 iter->table = ip_vs_svc_fwm_table;
1843 /* next service in hashed by fwmark */
1844 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1845 return list_entry(e, struct ip_vs_service, f_list);
1848 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1849 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1857 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1858 __releases(__ip_vs_svc_lock)
1860 read_unlock_bh(&__ip_vs_svc_lock);
1864 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1866 if (v == SEQ_START_TOKEN) {
1868 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1869 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1871 "Prot LocalAddress:Port Scheduler Flags\n");
1873 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1875 const struct ip_vs_service *svc = v;
1876 const struct ip_vs_iter *iter = seq->private;
1877 const struct ip_vs_dest *dest;
1879 if (iter->table == ip_vs_svc_table) {
1880 #ifdef CONFIG_IP_VS_IPV6
1881 if (svc->af == AF_INET6)
1882 seq_printf(seq, "%s [%pI6]:%04X %s ",
1883 ip_vs_proto_name(svc->protocol),
1886 svc->scheduler->name);
1889 seq_printf(seq, "%s %08X:%04X %s %s ",
1890 ip_vs_proto_name(svc->protocol),
1891 ntohl(svc->addr.ip),
1893 svc->scheduler->name,
1894 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1896 seq_printf(seq, "FWM %08X %s %s",
1897 svc->fwmark, svc->scheduler->name,
1898 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1901 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1902 seq_printf(seq, "persistent %d %08X\n",
1904 ntohl(svc->netmask));
1906 seq_putc(seq, '\n');
1908 list_for_each_entry(dest, &svc->destinations, n_list) {
1909 #ifdef CONFIG_IP_VS_IPV6
1910 if (dest->af == AF_INET6)
1913 " %-7s %-6d %-10d %-10d\n",
1916 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1917 atomic_read(&dest->weight),
1918 atomic_read(&dest->activeconns),
1919 atomic_read(&dest->inactconns));
1924 "%-7s %-6d %-10d %-10d\n",
1925 ntohl(dest->addr.ip),
1927 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1928 atomic_read(&dest->weight),
1929 atomic_read(&dest->activeconns),
1930 atomic_read(&dest->inactconns));
1937 static const struct seq_operations ip_vs_info_seq_ops = {
1938 .start = ip_vs_info_seq_start,
1939 .next = ip_vs_info_seq_next,
1940 .stop = ip_vs_info_seq_stop,
1941 .show = ip_vs_info_seq_show,
1944 static int ip_vs_info_open(struct inode *inode, struct file *file)
1946 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1947 sizeof(struct ip_vs_iter));
1950 static const struct file_operations ip_vs_info_fops = {
1951 .owner = THIS_MODULE,
1952 .open = ip_vs_info_open,
1954 .llseek = seq_lseek,
1955 .release = seq_release_private,
1960 #ifdef CONFIG_PROC_FS
1961 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1963 struct net *net = seq_file_single_net(seq);
1964 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1966 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1968 " Total Incoming Outgoing Incoming Outgoing\n");
1970 " Conns Packets Packets Bytes Bytes\n");
1972 spin_lock_bh(&tot_stats->lock);
1973 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1974 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1975 (unsigned long long) tot_stats->ustats.inbytes,
1976 (unsigned long long) tot_stats->ustats.outbytes);
1978 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1980 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1981 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1982 tot_stats->ustats.cps,
1983 tot_stats->ustats.inpps,
1984 tot_stats->ustats.outpps,
1985 tot_stats->ustats.inbps,
1986 tot_stats->ustats.outbps);
1987 spin_unlock_bh(&tot_stats->lock);
1992 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1994 return single_open_net(inode, file, ip_vs_stats_show);
1997 static const struct file_operations ip_vs_stats_fops = {
1998 .owner = THIS_MODULE,
1999 .open = ip_vs_stats_seq_open,
2001 .llseek = seq_lseek,
2002 .release = single_release,
2005 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2007 struct net *net = seq_file_single_net(seq);
2008 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2011 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2013 " Total Incoming Outgoing Incoming Outgoing\n");
2015 "CPU Conns Packets Packets Bytes Bytes\n");
2017 for_each_possible_cpu(i) {
2018 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2019 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2020 i, u->ustats.conns, u->ustats.inpkts,
2021 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2022 (__u64)u->ustats.outbytes);
2025 spin_lock_bh(&tot_stats->lock);
2026 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2027 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2028 tot_stats->ustats.outpkts,
2029 (unsigned long long) tot_stats->ustats.inbytes,
2030 (unsigned long long) tot_stats->ustats.outbytes);
2032 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2034 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2035 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2036 tot_stats->ustats.cps,
2037 tot_stats->ustats.inpps,
2038 tot_stats->ustats.outpps,
2039 tot_stats->ustats.inbps,
2040 tot_stats->ustats.outbps);
2041 spin_unlock_bh(&tot_stats->lock);
2046 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2048 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2051 static const struct file_operations ip_vs_stats_percpu_fops = {
2052 .owner = THIS_MODULE,
2053 .open = ip_vs_stats_percpu_seq_open,
2055 .llseek = seq_lseek,
2056 .release = single_release,
2061 * Set timeout values for tcp tcpfin udp in the timeout_table.
2063 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2065 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2066 struct ip_vs_proto_data *pd;
2069 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2074 #ifdef CONFIG_IP_VS_PROTO_TCP
2075 if (u->tcp_timeout) {
2076 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2077 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2078 = u->tcp_timeout * HZ;
2081 if (u->tcp_fin_timeout) {
2082 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2083 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2084 = u->tcp_fin_timeout * HZ;
2088 #ifdef CONFIG_IP_VS_PROTO_UDP
2089 if (u->udp_timeout) {
2090 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2091 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2092 = u->udp_timeout * HZ;
2099 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2100 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2101 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2102 sizeof(struct ip_vs_dest_user))
2103 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2104 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2105 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2107 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2108 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2109 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2110 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2112 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2117 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2121 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2122 struct ip_vs_service_user *usvc_compat)
2124 memset(usvc, 0, sizeof(*usvc));
2127 usvc->protocol = usvc_compat->protocol;
2128 usvc->addr.ip = usvc_compat->addr;
2129 usvc->port = usvc_compat->port;
2130 usvc->fwmark = usvc_compat->fwmark;
2132 /* Deep copy of sched_name is not needed here */
2133 usvc->sched_name = usvc_compat->sched_name;
2135 usvc->flags = usvc_compat->flags;
2136 usvc->timeout = usvc_compat->timeout;
2137 usvc->netmask = usvc_compat->netmask;
2140 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2141 struct ip_vs_dest_user *udest_compat)
2143 memset(udest, 0, sizeof(*udest));
2145 udest->addr.ip = udest_compat->addr;
2146 udest->port = udest_compat->port;
2147 udest->conn_flags = udest_compat->conn_flags;
2148 udest->weight = udest_compat->weight;
2149 udest->u_threshold = udest_compat->u_threshold;
2150 udest->l_threshold = udest_compat->l_threshold;
2154 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2156 struct net *net = sock_net(sk);
2158 unsigned char arg[MAX_ARG_LEN];
2159 struct ip_vs_service_user *usvc_compat;
2160 struct ip_vs_service_user_kern usvc;
2161 struct ip_vs_service *svc;
2162 struct ip_vs_dest_user *udest_compat;
2163 struct ip_vs_dest_user_kern udest;
2165 if (!capable(CAP_NET_ADMIN))
2168 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2170 if (len < 0 || len > MAX_ARG_LEN)
2172 if (len != set_arglen[SET_CMDID(cmd)]) {
2173 pr_err("set_ctl: len %u != %u\n",
2174 len, set_arglen[SET_CMDID(cmd)]);
2178 if (copy_from_user(arg, user, len) != 0)
2181 /* increase the module use count */
2182 ip_vs_use_count_inc();
2184 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2189 if (cmd == IP_VS_SO_SET_FLUSH) {
2190 /* Flush the virtual service */
2191 ret = ip_vs_flush(net);
2193 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2194 /* Set timeout values for (tcp tcpfin udp) */
2195 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2197 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2198 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2199 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2202 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2203 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2204 ret = stop_sync_thread(net, dm->state);
2208 usvc_compat = (struct ip_vs_service_user *)arg;
2209 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2211 /* We only use the new structs internally, so copy userspace compat
2212 * structs to extended internal versions */
2213 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2214 ip_vs_copy_udest_compat(&udest, udest_compat);
2216 if (cmd == IP_VS_SO_SET_ZERO) {
2217 /* if no service address is set, zero counters in all */
2218 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2219 ret = ip_vs_zero_all(net);
2224 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2225 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2226 usvc.protocol != IPPROTO_SCTP) {
2227 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2228 usvc.protocol, &usvc.addr.ip,
2229 ntohs(usvc.port), usvc.sched_name);
2234 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2235 if (usvc.fwmark == 0)
2236 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2237 &usvc.addr, usvc.port);
2239 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2241 if (cmd != IP_VS_SO_SET_ADD
2242 && (svc == NULL || svc->protocol != usvc.protocol)) {
2248 case IP_VS_SO_SET_ADD:
2252 ret = ip_vs_add_service(net, &usvc, &svc);
2254 case IP_VS_SO_SET_EDIT:
2255 ret = ip_vs_edit_service(svc, &usvc);
2257 case IP_VS_SO_SET_DEL:
2258 ret = ip_vs_del_service(svc);
2262 case IP_VS_SO_SET_ZERO:
2263 ret = ip_vs_zero_service(svc);
2265 case IP_VS_SO_SET_ADDDEST:
2266 ret = ip_vs_add_dest(svc, &udest);
2268 case IP_VS_SO_SET_EDITDEST:
2269 ret = ip_vs_edit_dest(svc, &udest);
2271 case IP_VS_SO_SET_DELDEST:
2272 ret = ip_vs_del_dest(svc, &udest);
2279 mutex_unlock(&__ip_vs_mutex);
2281 /* decrease the module use count */
2282 ip_vs_use_count_dec();
2289 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2291 spin_lock_bh(&src->lock);
2292 memcpy(dst, &src->ustats, sizeof(*dst));
2293 spin_unlock_bh(&src->lock);
2297 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2299 dst->protocol = src->protocol;
2300 dst->addr = src->addr.ip;
2301 dst->port = src->port;
2302 dst->fwmark = src->fwmark;
2303 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2304 dst->flags = src->flags;
2305 dst->timeout = src->timeout / HZ;
2306 dst->netmask = src->netmask;
2307 dst->num_dests = src->num_dests;
2308 ip_vs_copy_stats(&dst->stats, &src->stats);
2312 __ip_vs_get_service_entries(struct net *net,
2313 const struct ip_vs_get_services *get,
2314 struct ip_vs_get_services __user *uptr)
2317 struct ip_vs_service *svc;
2318 struct ip_vs_service_entry entry;
2321 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2322 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2323 /* Only expose IPv4 entries to old interface */
2324 if (svc->af != AF_INET || !net_eq(svc->net, net))
2327 if (count >= get->num_services)
2329 memset(&entry, 0, sizeof(entry));
2330 ip_vs_copy_service(&entry, svc);
2331 if (copy_to_user(&uptr->entrytable[count],
2332 &entry, sizeof(entry))) {
2340 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2341 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2342 /* Only expose IPv4 entries to old interface */
2343 if (svc->af != AF_INET || !net_eq(svc->net, net))
2346 if (count >= get->num_services)
2348 memset(&entry, 0, sizeof(entry));
2349 ip_vs_copy_service(&entry, svc);
2350 if (copy_to_user(&uptr->entrytable[count],
2351 &entry, sizeof(entry))) {
2363 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2364 struct ip_vs_get_dests __user *uptr)
2366 struct ip_vs_service *svc;
2367 union nf_inet_addr addr = { .ip = get->addr };
2371 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2373 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2378 struct ip_vs_dest *dest;
2379 struct ip_vs_dest_entry entry;
2381 list_for_each_entry(dest, &svc->destinations, n_list) {
2382 if (count >= get->num_dests)
2385 entry.addr = dest->addr.ip;
2386 entry.port = dest->port;
2387 entry.conn_flags = atomic_read(&dest->conn_flags);
2388 entry.weight = atomic_read(&dest->weight);
2389 entry.u_threshold = dest->u_threshold;
2390 entry.l_threshold = dest->l_threshold;
2391 entry.activeconns = atomic_read(&dest->activeconns);
2392 entry.inactconns = atomic_read(&dest->inactconns);
2393 entry.persistconns = atomic_read(&dest->persistconns);
2394 ip_vs_copy_stats(&entry.stats, &dest->stats);
2395 if (copy_to_user(&uptr->entrytable[count],
2396 &entry, sizeof(entry))) {
2408 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2410 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2411 struct ip_vs_proto_data *pd;
2414 #ifdef CONFIG_IP_VS_PROTO_TCP
2415 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2416 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2417 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2419 #ifdef CONFIG_IP_VS_PROTO_UDP
2420 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2422 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2427 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2428 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2429 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2430 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2431 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2432 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2433 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2435 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2436 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2437 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2439 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2440 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2441 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2442 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2446 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2448 unsigned char arg[128];
2450 unsigned int copylen;
2451 struct net *net = sock_net(sk);
2452 struct netns_ipvs *ipvs = net_ipvs(net);
2455 if (!capable(CAP_NET_ADMIN))
2458 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2461 if (*len < get_arglen[GET_CMDID(cmd)]) {
2462 pr_err("get_ctl: len %u < %u\n",
2463 *len, get_arglen[GET_CMDID(cmd)]);
2467 copylen = get_arglen[GET_CMDID(cmd)];
2471 if (copy_from_user(arg, user, copylen) != 0)
2474 if (mutex_lock_interruptible(&__ip_vs_mutex))
2475 return -ERESTARTSYS;
2478 case IP_VS_SO_GET_VERSION:
2482 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2483 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2484 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2488 *len = strlen(buf)+1;
2492 case IP_VS_SO_GET_INFO:
2494 struct ip_vs_getinfo info;
2495 info.version = IP_VS_VERSION_CODE;
2496 info.size = ip_vs_conn_tab_size;
2497 info.num_services = ipvs->num_services;
2498 if (copy_to_user(user, &info, sizeof(info)) != 0)
2503 case IP_VS_SO_GET_SERVICES:
2505 struct ip_vs_get_services *get;
2508 get = (struct ip_vs_get_services *)arg;
2509 size = sizeof(*get) +
2510 sizeof(struct ip_vs_service_entry) * get->num_services;
2512 pr_err("length: %u != %u\n", *len, size);
2516 ret = __ip_vs_get_service_entries(net, get, user);
2520 case IP_VS_SO_GET_SERVICE:
2522 struct ip_vs_service_entry *entry;
2523 struct ip_vs_service *svc;
2524 union nf_inet_addr addr;
2526 entry = (struct ip_vs_service_entry *)arg;
2527 addr.ip = entry->addr;
2529 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2531 svc = __ip_vs_service_find(net, AF_INET,
2532 entry->protocol, &addr,
2535 ip_vs_copy_service(entry, svc);
2536 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2543 case IP_VS_SO_GET_DESTS:
2545 struct ip_vs_get_dests *get;
2548 get = (struct ip_vs_get_dests *)arg;
2549 size = sizeof(*get) +
2550 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2552 pr_err("length: %u != %u\n", *len, size);
2556 ret = __ip_vs_get_dest_entries(net, get, user);
2560 case IP_VS_SO_GET_TIMEOUT:
2562 struct ip_vs_timeout_user t;
2564 __ip_vs_get_timeouts(net, &t);
2565 if (copy_to_user(user, &t, sizeof(t)) != 0)
2570 case IP_VS_SO_GET_DAEMON:
2572 struct ip_vs_daemon_user d[2];
2574 memset(&d, 0, sizeof(d));
2575 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2576 d[0].state = IP_VS_STATE_MASTER;
2577 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2578 sizeof(d[0].mcast_ifn));
2579 d[0].syncid = ipvs->master_syncid;
2581 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2582 d[1].state = IP_VS_STATE_BACKUP;
2583 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2584 sizeof(d[1].mcast_ifn));
2585 d[1].syncid = ipvs->backup_syncid;
2587 if (copy_to_user(user, &d, sizeof(d)) != 0)
2597 mutex_unlock(&__ip_vs_mutex);
2602 static struct nf_sockopt_ops ip_vs_sockopts = {
2604 .set_optmin = IP_VS_BASE_CTL,
2605 .set_optmax = IP_VS_SO_SET_MAX+1,
2606 .set = do_ip_vs_set_ctl,
2607 .get_optmin = IP_VS_BASE_CTL,
2608 .get_optmax = IP_VS_SO_GET_MAX+1,
2609 .get = do_ip_vs_get_ctl,
2610 .owner = THIS_MODULE,
2614 * Generic Netlink interface
2617 /* IPVS genetlink family */
2618 static struct genl_family ip_vs_genl_family = {
2619 .id = GENL_ID_GENERATE,
2621 .name = IPVS_GENL_NAME,
2622 .version = IPVS_GENL_VERSION,
2623 .maxattr = IPVS_CMD_MAX,
2624 .netnsok = true, /* Make ipvsadm to work on netns */
2627 /* Policy used for first-level command attributes */
2628 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2629 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2630 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2631 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2632 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2633 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2634 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2637 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2638 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2639 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2640 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2641 .len = IP_VS_IFNAME_MAXLEN },
2642 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2645 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2646 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2647 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2648 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2649 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2650 .len = sizeof(union nf_inet_addr) },
2651 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2652 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2653 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2654 .len = IP_VS_SCHEDNAME_MAXLEN },
2655 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2656 .len = IP_VS_PENAME_MAXLEN },
2657 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2658 .len = sizeof(struct ip_vs_flags) },
2659 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2660 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2661 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2664 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2665 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2666 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2667 .len = sizeof(union nf_inet_addr) },
2668 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2669 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2679 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2680 struct ip_vs_stats *stats)
2682 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2686 spin_lock_bh(&stats->lock);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2689 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2691 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2692 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2696 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2699 spin_unlock_bh(&stats->lock);
2701 nla_nest_end(skb, nl_stats);
2706 spin_unlock_bh(&stats->lock);
2707 nla_nest_cancel(skb, nl_stats);
2711 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2712 struct ip_vs_service *svc)
2714 struct nlattr *nl_service;
2715 struct ip_vs_flags flags = { .flags = svc->flags,
2718 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2722 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2725 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2727 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2728 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2729 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2732 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2734 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2735 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2736 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2737 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2739 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2740 goto nla_put_failure;
2742 nla_nest_end(skb, nl_service);
2747 nla_nest_cancel(skb, nl_service);
2751 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2752 struct ip_vs_service *svc,
2753 struct netlink_callback *cb)
2757 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2758 &ip_vs_genl_family, NLM_F_MULTI,
2759 IPVS_CMD_NEW_SERVICE);
2763 if (ip_vs_genl_fill_service(skb, svc) < 0)
2764 goto nla_put_failure;
2766 return genlmsg_end(skb, hdr);
2769 genlmsg_cancel(skb, hdr);
2773 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2774 struct netlink_callback *cb)
2777 int start = cb->args[0];
2778 struct ip_vs_service *svc;
2779 struct net *net = skb_sknet(skb);
2781 mutex_lock(&__ip_vs_mutex);
2782 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2783 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2784 if (++idx <= start || !net_eq(svc->net, net))
2786 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2788 goto nla_put_failure;
2793 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2794 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2795 if (++idx <= start || !net_eq(svc->net, net))
2797 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2799 goto nla_put_failure;
2805 mutex_unlock(&__ip_vs_mutex);
2811 static int ip_vs_genl_parse_service(struct net *net,
2812 struct ip_vs_service_user_kern *usvc,
2813 struct nlattr *nla, int full_entry,
2814 struct ip_vs_service **ret_svc)
2816 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2817 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2818 struct ip_vs_service *svc;
2820 /* Parse mandatory identifying service fields first */
2822 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2825 nla_af = attrs[IPVS_SVC_ATTR_AF];
2826 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2827 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2828 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2829 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2831 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2834 memset(usvc, 0, sizeof(*usvc));
2836 usvc->af = nla_get_u16(nla_af);
2837 #ifdef CONFIG_IP_VS_IPV6
2838 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2840 if (usvc->af != AF_INET)
2842 return -EAFNOSUPPORT;
2845 usvc->protocol = IPPROTO_TCP;
2846 usvc->fwmark = nla_get_u32(nla_fwmark);
2848 usvc->protocol = nla_get_u16(nla_protocol);
2849 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2850 usvc->port = nla_get_u16(nla_port);
2855 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2857 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2858 &usvc->addr, usvc->port);
2861 /* If a full entry was requested, check for the additional fields */
2863 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2865 struct ip_vs_flags flags;
2867 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2868 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2869 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2870 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2871 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2873 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2876 nla_memcpy(&flags, nla_flags, sizeof(flags));
2878 /* prefill flags from service if it already exists */
2880 usvc->flags = svc->flags;
2882 /* set new flags from userland */
2883 usvc->flags = (usvc->flags & ~flags.mask) |
2884 (flags.flags & flags.mask);
2885 usvc->sched_name = nla_data(nla_sched);
2886 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2887 usvc->timeout = nla_get_u32(nla_timeout);
2888 usvc->netmask = nla_get_u32(nla_netmask);
2894 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2897 struct ip_vs_service_user_kern usvc;
2898 struct ip_vs_service *svc;
2901 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2902 return ret ? ERR_PTR(ret) : svc;
2905 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2907 struct nlattr *nl_dest;
2909 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2913 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2914 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2916 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2917 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2922 atomic_read(&dest->activeconns));
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2924 atomic_read(&dest->inactconns));
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2926 atomic_read(&dest->persistconns));
2928 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2929 goto nla_put_failure;
2931 nla_nest_end(skb, nl_dest);
2936 nla_nest_cancel(skb, nl_dest);
2940 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2941 struct netlink_callback *cb)
2945 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2946 &ip_vs_genl_family, NLM_F_MULTI,
2951 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2952 goto nla_put_failure;
2954 return genlmsg_end(skb, hdr);
2957 genlmsg_cancel(skb, hdr);
2961 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2962 struct netlink_callback *cb)
2965 int start = cb->args[0];
2966 struct ip_vs_service *svc;
2967 struct ip_vs_dest *dest;
2968 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2969 struct net *net = skb_sknet(skb);
2971 mutex_lock(&__ip_vs_mutex);
2973 /* Try to find the service for which to dump destinations */
2974 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2975 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2979 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2980 if (IS_ERR(svc) || svc == NULL)
2983 /* Dump the destinations */
2984 list_for_each_entry(dest, &svc->destinations, n_list) {
2987 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2989 goto nla_put_failure;
2997 mutex_unlock(&__ip_vs_mutex);
3002 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3003 struct nlattr *nla, int full_entry)
3005 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3006 struct nlattr *nla_addr, *nla_port;
3008 /* Parse mandatory identifying destination fields first */
3010 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3013 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3014 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3016 if (!(nla_addr && nla_port))
3019 memset(udest, 0, sizeof(*udest));
3021 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3022 udest->port = nla_get_u16(nla_port);
3024 /* If a full entry was requested, check for the additional fields */
3026 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3029 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3030 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3031 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3032 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3034 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3037 udest->conn_flags = nla_get_u32(nla_fwd)
3038 & IP_VS_CONN_F_FWD_MASK;
3039 udest->weight = nla_get_u32(nla_weight);
3040 udest->u_threshold = nla_get_u32(nla_u_thresh);
3041 udest->l_threshold = nla_get_u32(nla_l_thresh);
3047 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3048 const char *mcast_ifn, __be32 syncid)
3050 struct nlattr *nl_daemon;
3052 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3056 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3057 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3058 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3060 nla_nest_end(skb, nl_daemon);
3065 nla_nest_cancel(skb, nl_daemon);
3069 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3070 const char *mcast_ifn, __be32 syncid,
3071 struct netlink_callback *cb)
3074 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3075 &ip_vs_genl_family, NLM_F_MULTI,
3076 IPVS_CMD_NEW_DAEMON);
3080 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3081 goto nla_put_failure;
3083 return genlmsg_end(skb, hdr);
3086 genlmsg_cancel(skb, hdr);
3090 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3091 struct netlink_callback *cb)
3093 struct net *net = skb_net(skb);
3094 struct netns_ipvs *ipvs = net_ipvs(net);
3096 mutex_lock(&__ip_vs_mutex);
3097 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3098 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3099 ipvs->master_mcast_ifn,
3100 ipvs->master_syncid, cb) < 0)
3101 goto nla_put_failure;
3106 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3107 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3108 ipvs->backup_mcast_ifn,
3109 ipvs->backup_syncid, cb) < 0)
3110 goto nla_put_failure;
3116 mutex_unlock(&__ip_vs_mutex);
3121 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3123 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3124 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3125 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3128 return start_sync_thread(net,
3129 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3130 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3131 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3134 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3136 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3139 return stop_sync_thread(net,
3140 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3143 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3145 struct ip_vs_timeout_user t;
3147 __ip_vs_get_timeouts(net, &t);
3149 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3150 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3152 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3154 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3156 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3157 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3159 return ip_vs_set_timeout(net, &t);
3162 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3164 struct ip_vs_service *svc = NULL;
3165 struct ip_vs_service_user_kern usvc;
3166 struct ip_vs_dest_user_kern udest;
3168 int need_full_svc = 0, need_full_dest = 0;
3170 struct netns_ipvs *ipvs;
3172 net = skb_sknet(skb);
3173 ipvs = net_ipvs(net);
3174 cmd = info->genlhdr->cmd;
3176 mutex_lock(&__ip_vs_mutex);
3178 if (cmd == IPVS_CMD_FLUSH) {
3179 ret = ip_vs_flush(net);
3181 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3182 ret = ip_vs_genl_set_config(net, info->attrs);
3184 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3185 cmd == IPVS_CMD_DEL_DAEMON) {
3187 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3189 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3190 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3191 info->attrs[IPVS_CMD_ATTR_DAEMON],
3192 ip_vs_daemon_policy)) {
3197 if (cmd == IPVS_CMD_NEW_DAEMON)
3198 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3200 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3202 } else if (cmd == IPVS_CMD_ZERO &&
3203 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3204 ret = ip_vs_zero_all(net);
3208 /* All following commands require a service argument, so check if we
3209 * received a valid one. We need a full service specification when
3210 * adding / editing a service. Only identifying members otherwise. */
3211 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3214 ret = ip_vs_genl_parse_service(net, &usvc,
3215 info->attrs[IPVS_CMD_ATTR_SERVICE],
3216 need_full_svc, &svc);
3220 /* Unless we're adding a new service, the service must already exist */
3221 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3226 /* Destination commands require a valid destination argument. For
3227 * adding / editing a destination, we need a full destination
3229 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3230 cmd == IPVS_CMD_DEL_DEST) {
3231 if (cmd != IPVS_CMD_DEL_DEST)
3234 ret = ip_vs_genl_parse_dest(&udest,
3235 info->attrs[IPVS_CMD_ATTR_DEST],
3242 case IPVS_CMD_NEW_SERVICE:
3244 ret = ip_vs_add_service(net, &usvc, &svc);
3248 case IPVS_CMD_SET_SERVICE:
3249 ret = ip_vs_edit_service(svc, &usvc);
3251 case IPVS_CMD_DEL_SERVICE:
3252 ret = ip_vs_del_service(svc);
3253 /* do not use svc, it can be freed */
3255 case IPVS_CMD_NEW_DEST:
3256 ret = ip_vs_add_dest(svc, &udest);
3258 case IPVS_CMD_SET_DEST:
3259 ret = ip_vs_edit_dest(svc, &udest);
3261 case IPVS_CMD_DEL_DEST:
3262 ret = ip_vs_del_dest(svc, &udest);
3265 ret = ip_vs_zero_service(svc);
3272 mutex_unlock(&__ip_vs_mutex);
3277 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3279 struct sk_buff *msg;
3281 int ret, cmd, reply_cmd;
3283 struct netns_ipvs *ipvs;
3285 net = skb_sknet(skb);
3286 ipvs = net_ipvs(net);
3287 cmd = info->genlhdr->cmd;
3289 if (cmd == IPVS_CMD_GET_SERVICE)
3290 reply_cmd = IPVS_CMD_NEW_SERVICE;
3291 else if (cmd == IPVS_CMD_GET_INFO)
3292 reply_cmd = IPVS_CMD_SET_INFO;
3293 else if (cmd == IPVS_CMD_GET_CONFIG)
3294 reply_cmd = IPVS_CMD_SET_CONFIG;
3296 pr_err("unknown Generic Netlink command\n");
3300 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3304 mutex_lock(&__ip_vs_mutex);
3306 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3308 goto nla_put_failure;
3311 case IPVS_CMD_GET_SERVICE:
3313 struct ip_vs_service *svc;
3315 svc = ip_vs_genl_find_service(net,
3316 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3321 ret = ip_vs_genl_fill_service(msg, svc);
3323 goto nla_put_failure;
3332 case IPVS_CMD_GET_CONFIG:
3334 struct ip_vs_timeout_user t;
3336 __ip_vs_get_timeouts(net, &t);
3337 #ifdef CONFIG_IP_VS_PROTO_TCP
3338 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3339 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3342 #ifdef CONFIG_IP_VS_PROTO_UDP
3343 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3349 case IPVS_CMD_GET_INFO:
3350 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3351 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3352 ip_vs_conn_tab_size);
3356 genlmsg_end(msg, reply);
3357 ret = genlmsg_reply(msg, info);
3361 pr_err("not enough space in Netlink message\n");
3367 mutex_unlock(&__ip_vs_mutex);
3373 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3375 .cmd = IPVS_CMD_NEW_SERVICE,
3376 .flags = GENL_ADMIN_PERM,
3377 .policy = ip_vs_cmd_policy,
3378 .doit = ip_vs_genl_set_cmd,
3381 .cmd = IPVS_CMD_SET_SERVICE,
3382 .flags = GENL_ADMIN_PERM,
3383 .policy = ip_vs_cmd_policy,
3384 .doit = ip_vs_genl_set_cmd,
3387 .cmd = IPVS_CMD_DEL_SERVICE,
3388 .flags = GENL_ADMIN_PERM,
3389 .policy = ip_vs_cmd_policy,
3390 .doit = ip_vs_genl_set_cmd,
3393 .cmd = IPVS_CMD_GET_SERVICE,
3394 .flags = GENL_ADMIN_PERM,
3395 .doit = ip_vs_genl_get_cmd,
3396 .dumpit = ip_vs_genl_dump_services,
3397 .policy = ip_vs_cmd_policy,
3400 .cmd = IPVS_CMD_NEW_DEST,
3401 .flags = GENL_ADMIN_PERM,
3402 .policy = ip_vs_cmd_policy,
3403 .doit = ip_vs_genl_set_cmd,
3406 .cmd = IPVS_CMD_SET_DEST,
3407 .flags = GENL_ADMIN_PERM,
3408 .policy = ip_vs_cmd_policy,
3409 .doit = ip_vs_genl_set_cmd,
3412 .cmd = IPVS_CMD_DEL_DEST,
3413 .flags = GENL_ADMIN_PERM,
3414 .policy = ip_vs_cmd_policy,
3415 .doit = ip_vs_genl_set_cmd,
3418 .cmd = IPVS_CMD_GET_DEST,
3419 .flags = GENL_ADMIN_PERM,
3420 .policy = ip_vs_cmd_policy,
3421 .dumpit = ip_vs_genl_dump_dests,
3424 .cmd = IPVS_CMD_NEW_DAEMON,
3425 .flags = GENL_ADMIN_PERM,
3426 .policy = ip_vs_cmd_policy,
3427 .doit = ip_vs_genl_set_cmd,
3430 .cmd = IPVS_CMD_DEL_DAEMON,
3431 .flags = GENL_ADMIN_PERM,
3432 .policy = ip_vs_cmd_policy,
3433 .doit = ip_vs_genl_set_cmd,
3436 .cmd = IPVS_CMD_GET_DAEMON,
3437 .flags = GENL_ADMIN_PERM,
3438 .dumpit = ip_vs_genl_dump_daemons,
3441 .cmd = IPVS_CMD_SET_CONFIG,
3442 .flags = GENL_ADMIN_PERM,
3443 .policy = ip_vs_cmd_policy,
3444 .doit = ip_vs_genl_set_cmd,
3447 .cmd = IPVS_CMD_GET_CONFIG,
3448 .flags = GENL_ADMIN_PERM,
3449 .doit = ip_vs_genl_get_cmd,
3452 .cmd = IPVS_CMD_GET_INFO,
3453 .flags = GENL_ADMIN_PERM,
3454 .doit = ip_vs_genl_get_cmd,
3457 .cmd = IPVS_CMD_ZERO,
3458 .flags = GENL_ADMIN_PERM,
3459 .policy = ip_vs_cmd_policy,
3460 .doit = ip_vs_genl_set_cmd,
3463 .cmd = IPVS_CMD_FLUSH,
3464 .flags = GENL_ADMIN_PERM,
3465 .doit = ip_vs_genl_set_cmd,
3469 static int __init ip_vs_genl_register(void)
3471 return genl_register_family_with_ops(&ip_vs_genl_family,
3472 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3475 static void ip_vs_genl_unregister(void)
3477 genl_unregister_family(&ip_vs_genl_family);
3480 /* End of Generic Netlink interface definitions */
3483 * per netns intit/exit func.
3485 int __net_init __ip_vs_control_init(struct net *net)
3488 struct netns_ipvs *ipvs = net_ipvs(net);
3489 struct ctl_table *tbl;
3491 atomic_set(&ipvs->dropentry, 0);
3492 spin_lock_init(&ipvs->dropentry_lock);
3493 spin_lock_init(&ipvs->droppacket_lock);
3494 spin_lock_init(&ipvs->securetcp_lock);
3495 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3497 /* Initialize rs_table */
3498 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3499 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3501 INIT_LIST_HEAD(&ipvs->dest_trash);
3502 atomic_set(&ipvs->ftpsvc_counter, 0);
3503 atomic_set(&ipvs->nullsvc_counter, 0);
3506 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3507 if (ipvs->tot_stats == NULL) {
3508 pr_err("%s(): no memory.\n", __func__);
3511 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3512 if (!ipvs->cpustats) {
3513 pr_err("%s() alloc_percpu failed\n", __func__);
3516 spin_lock_init(&ipvs->tot_stats->lock);
3518 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3519 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3520 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3521 &ip_vs_stats_percpu_fops);
3523 if (!net_eq(net, &init_net)) {
3524 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 /* Initialize sysctl defaults */
3531 ipvs->sysctl_amemthresh = 1024;
3532 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3533 ipvs->sysctl_am_droprate = 10;
3534 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3535 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3536 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3537 #ifdef CONFIG_IP_VS_NFCT
3538 tbl[idx++].data = &ipvs->sysctl_conntrack;
3540 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3541 ipvs->sysctl_snat_reroute = 1;
3542 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3543 ipvs->sysctl_sync_ver = 1;
3544 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3545 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3546 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3547 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3548 ipvs->sysctl_sync_threshold[0] = 3;
3549 ipvs->sysctl_sync_threshold[1] = 50;
3550 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3551 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3552 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3555 #ifdef CONFIG_SYSCTL
3556 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3558 if (ipvs->sysctl_hdr == NULL) {
3559 if (!net_eq(net, &init_net))
3564 ip_vs_new_estimator(net, ipvs->tot_stats);
3565 ipvs->sysctl_tbl = tbl;
3566 /* Schedule defense work */
3567 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3568 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3572 free_percpu(ipvs->cpustats);
3574 kfree(ipvs->tot_stats);
3578 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3580 struct netns_ipvs *ipvs = net_ipvs(net);
3582 ip_vs_trash_cleanup(net);
3583 ip_vs_kill_estimator(net, ipvs->tot_stats);
3584 cancel_delayed_work_sync(&ipvs->defense_work);
3585 cancel_work_sync(&ipvs->defense_work.work);
3586 #ifdef CONFIG_SYSCTL
3587 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3589 proc_net_remove(net, "ip_vs_stats_percpu");
3590 proc_net_remove(net, "ip_vs_stats");
3591 proc_net_remove(net, "ip_vs");
3592 free_percpu(ipvs->cpustats);
3593 kfree(ipvs->tot_stats);
3596 static struct pernet_operations ipvs_control_ops = {
3597 .init = __ip_vs_control_init,
3598 .exit = __ip_vs_control_cleanup,
3601 int __init ip_vs_control_init(void)
3608 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3609 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3610 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3611 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3614 ret = register_pernet_subsys(&ipvs_control_ops);
3616 pr_err("cannot register namespace.\n");
3620 smp_wmb(); /* Do we really need it now ? */
3622 ret = nf_register_sockopt(&ip_vs_sockopts);
3624 pr_err("cannot register sockopt.\n");
3628 ret = ip_vs_genl_register();
3630 pr_err("cannot register Generic Netlink interface.\n");
3631 nf_unregister_sockopt(&ip_vs_sockopts);
3639 unregister_pernet_subsys(&ipvs_control_ops);
3645 void ip_vs_control_cleanup(void)
3648 unregister_pernet_subsys(&ipvs_control_ops);
3649 ip_vs_genl_unregister();
3650 nf_unregister_sockopt(&ip_vs_sockopts);