2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level;
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
84 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
85 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
95 static void update_defense_level(struct netns_ipvs *ipvs)
98 static int old_secure_tcp = 0;
103 /* we only count free and buffered memory (in pages) */
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
111 nomem = (availmem < ipvs->sysctl_amemthresh);
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
119 atomic_set(&ipvs->dropentry, 0);
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
126 atomic_set(&ipvs->dropentry, 0);
131 atomic_set(&ipvs->dropentry, 1);
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
138 atomic_set(&ipvs->dropentry, 1);
141 spin_unlock(&ipvs->dropentry_lock);
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
166 ipvs->sysctl_drop_packet = 1;
170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
173 spin_unlock(&ipvs->droppacket_lock);
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
179 if (old_secure_tcp >= 2)
184 if (old_secure_tcp < 2)
186 ipvs->sysctl_secure_tcp = 2;
188 if (old_secure_tcp >= 2)
194 if (old_secure_tcp < 2)
197 if (old_secure_tcp >= 2)
199 ipvs->sysctl_secure_tcp = 1;
203 if (old_secure_tcp < 2)
207 old_secure_tcp = ipvs->sysctl_secure_tcp;
209 ip_vs_protocol_timeout_change(ipvs,
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
218 * Timer for checking the defense
220 #define DEFENSE_TIMER_PERIOD 1*HZ
222 static void defense_work_handler(struct work_struct *work)
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
234 ip_vs_use_count_inc(void)
236 return try_module_get(THIS_MODULE);
240 ip_vs_use_count_dec(void)
242 module_put(THIS_MODULE);
247 * Hash table: for virtual service lookups
249 #define IP_VS_SVC_TAB_BITS 8
250 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253 /* the service table hashed by <protocol, addr, port> */
254 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255 /* the service table hashed by fwmark */
256 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
260 * Returns hash value for virtual service
262 static inline unsigned
263 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
266 register unsigned porth = ntohs(port);
267 __be32 addr_fold = addr->ip;
269 #ifdef CONFIG_IP_VS_IPV6
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
274 addr_fold ^= ((size_t)net>>8);
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
277 & IP_VS_SVC_TAB_MASK;
281 * Returns hash value of fwmark for virtual service lookup
283 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
293 static int ip_vs_svc_hash(struct ip_vs_service *svc)
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
303 if (svc->fwmark == 0) {
305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
312 * Hash it by fwmark in svc_fwm_table
314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
326 * Unhashes a service from svc_table / svc_fwm_table.
327 * Should be called with locked tables.
329 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
337 if (svc->fwmark == 0) {
338 /* Remove it from the svc_table table */
339 list_del(&svc->s_list);
341 /* Remove it from the svc_fwm_table table */
342 list_del(&svc->f_list);
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
352 * Get service by {netns, proto,addr,port} in the service table.
354 static inline struct ip_vs_service *
355 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
359 struct ip_vs_service *svc;
361 /* Check for "full" addressed entries */
362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
367 && (svc->port == vport)
368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
380 * Get service by {fwmark} in the service table.
382 static inline struct ip_vs_service *
383 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
386 struct ip_vs_service *svc;
388 /* Check for fwmark addressed entries */
389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
402 struct ip_vs_service *
403 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
404 const union nf_inet_addr *vaddr, __be16 vport)
406 struct ip_vs_service *svc;
407 struct netns_ipvs *ipvs = net_ipvs(net);
409 read_lock(&__ip_vs_svc_lock);
412 * Check the table hashed by fwmark first
415 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
421 * Check the table hashed by <protocol,addr,port>
422 * for "full" addressed entries
424 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
427 && protocol == IPPROTO_TCP
428 && atomic_read(&ipvs->ftpsvc_counter)
429 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
431 * Check if ftp service entry exists, the packet
432 * might belong to FTP data connections.
434 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
438 && atomic_read(&ipvs->nullsvc_counter)) {
440 * Check if the catch-all port (port zero) exists
442 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
447 atomic_inc(&svc->usecnt);
448 read_unlock(&__ip_vs_svc_lock);
450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol),
452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453 svc ? "hit" : "not hit");
460 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462 atomic_inc(&svc->refcnt);
467 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
469 struct ip_vs_service *svc = dest->svc;
472 if (atomic_dec_and_test(&svc->refcnt)) {
473 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
475 IP_VS_DBG_ADDR(svc->af, &svc->addr),
476 ntohs(svc->port), atomic_read(&svc->usecnt));
477 free_percpu(svc->stats.cpustats);
484 * Returns hash value for real service
486 static inline unsigned ip_vs_rs_hashkey(int af,
487 const union nf_inet_addr *addr,
490 register unsigned porth = ntohs(port);
491 __be32 addr_fold = addr->ip;
493 #ifdef CONFIG_IP_VS_IPV6
495 addr_fold = addr->ip6[0]^addr->ip6[1]^
496 addr->ip6[2]^addr->ip6[3];
499 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
504 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
505 * should be called with locked tables.
507 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
511 if (!list_empty(&dest->d_list)) {
516 * Hash by proto,addr,port,
517 * which are the parameters of the real service.
519 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
521 list_add(&dest->d_list, &ipvs->rs_table[hash]);
527 * UNhashes ip_vs_dest from rs_table.
528 * should be called with locked tables.
530 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
533 * Remove it from the rs_table table.
535 if (!list_empty(&dest->d_list)) {
536 list_del(&dest->d_list);
537 INIT_LIST_HEAD(&dest->d_list);
544 * Lookup real service by <proto,addr,port> in the real service table.
547 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
548 const union nf_inet_addr *daddr,
551 struct netns_ipvs *ipvs = net_ipvs(net);
553 struct ip_vs_dest *dest;
556 * Check for "full" addressed entries
557 * Return the first found entry
559 hash = ip_vs_rs_hashkey(af, daddr, dport);
561 read_lock(&ipvs->rs_lock);
562 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
564 && ip_vs_addr_equal(af, &dest->addr, daddr)
565 && (dest->port == dport)
566 && ((dest->protocol == protocol) ||
569 read_unlock(&ipvs->rs_lock);
573 read_unlock(&ipvs->rs_lock);
579 * Lookup destination by {addr,port} in the given service
581 static struct ip_vs_dest *
582 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
585 struct ip_vs_dest *dest;
588 * Find the destination for the given service
590 list_for_each_entry(dest, &svc->destinations, n_list) {
591 if ((dest->af == svc->af)
592 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
593 && (dest->port == dport)) {
603 * Find destination by {daddr,dport,vaddr,protocol}
604 * Cretaed to be used in ip_vs_process_message() in
605 * the backup synchronization daemon. It finds the
606 * destination to be bound to the received connection
609 * ip_vs_lookup_real_service() looked promissing, but
610 * seems not working as expected.
612 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
613 const union nf_inet_addr *daddr,
615 const union nf_inet_addr *vaddr,
616 __be16 vport, __u16 protocol, __u32 fwmark)
618 struct ip_vs_dest *dest;
619 struct ip_vs_service *svc;
621 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
624 dest = ip_vs_lookup_dest(svc, daddr, dport);
626 atomic_inc(&dest->refcnt);
627 ip_vs_service_put(svc);
632 * Lookup dest by {svc,addr,port} in the destination trash.
633 * The destination trash is used to hold the destinations that are removed
634 * from the service table but are still referenced by some conn entries.
635 * The reason to add the destination trash is when the dest is temporary
636 * down (either by administrator or by monitor program), the dest can be
637 * picked back from the trash, the remaining connections to the dest can
638 * continue, and the counting information of the dest is also useful for
641 static struct ip_vs_dest *
642 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
645 struct ip_vs_dest *dest, *nxt;
646 struct netns_ipvs *ipvs = net_ipvs(svc->net);
649 * Find the destination in trash
651 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
652 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
655 IP_VS_DBG_ADDR(svc->af, &dest->addr),
657 atomic_read(&dest->refcnt));
658 if (dest->af == svc->af &&
659 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
660 dest->port == dport &&
661 dest->vfwmark == svc->fwmark &&
662 dest->protocol == svc->protocol &&
664 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
665 dest->vport == svc->port))) {
671 * Try to purge the destination from trash if not referenced
673 if (atomic_read(&dest->refcnt) == 1) {
674 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
677 IP_VS_DBG_ADDR(svc->af, &dest->addr),
679 list_del(&dest->n_list);
680 ip_vs_dst_reset(dest);
681 __ip_vs_unbind_svc(dest);
682 free_percpu(dest->stats.cpustats);
692 * Clean up all the destinations in the trash
693 * Called by the ip_vs_control_cleanup()
695 * When the ip_vs_control_clearup is activated by ipvs module exit,
696 * the service tables must have been flushed and all the connections
697 * are expired, and the refcnt of each destination in the trash must
698 * be 1, so we simply release them here.
700 static void ip_vs_trash_cleanup(struct net *net)
702 struct ip_vs_dest *dest, *nxt;
703 struct netns_ipvs *ipvs = net_ipvs(net);
705 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
706 list_del(&dest->n_list);
707 ip_vs_dst_reset(dest);
708 __ip_vs_unbind_svc(dest);
709 free_percpu(dest->stats.cpustats);
715 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
719 spin_lock_bh(&src->lock);
721 IP_VS_SHOW_STATS_COUNTER(conns);
722 IP_VS_SHOW_STATS_COUNTER(inpkts);
723 IP_VS_SHOW_STATS_COUNTER(outpkts);
724 IP_VS_SHOW_STATS_COUNTER(inbytes);
725 IP_VS_SHOW_STATS_COUNTER(outbytes);
727 ip_vs_read_estimator(dst, src);
729 spin_unlock_bh(&src->lock);
733 ip_vs_zero_stats(struct ip_vs_stats *stats)
735 spin_lock_bh(&stats->lock);
737 /* get current counters as zero point, rates are zeroed */
739 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
741 IP_VS_ZERO_STATS_COUNTER(conns);
742 IP_VS_ZERO_STATS_COUNTER(inpkts);
743 IP_VS_ZERO_STATS_COUNTER(outpkts);
744 IP_VS_ZERO_STATS_COUNTER(inbytes);
745 IP_VS_ZERO_STATS_COUNTER(outbytes);
747 ip_vs_zero_estimator(stats);
749 spin_unlock_bh(&stats->lock);
753 * Update a destination in the given service
756 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
757 struct ip_vs_dest_user_kern *udest, int add)
759 struct netns_ipvs *ipvs = net_ipvs(svc->net);
762 /* set the weight and the flags */
763 atomic_set(&dest->weight, udest->weight);
764 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
765 conn_flags |= IP_VS_CONN_F_INACTIVE;
767 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
768 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
769 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
772 * Put the real service in rs_table if not present.
773 * For now only for NAT!
775 write_lock_bh(&ipvs->rs_lock);
776 ip_vs_rs_hash(ipvs, dest);
777 write_unlock_bh(&ipvs->rs_lock);
779 atomic_set(&dest->conn_flags, conn_flags);
781 /* bind the service */
783 __ip_vs_bind_svc(dest, svc);
785 if (dest->svc != svc) {
786 __ip_vs_unbind_svc(dest);
787 ip_vs_zero_stats(&dest->stats);
788 __ip_vs_bind_svc(dest, svc);
792 /* set the dest status flags */
793 dest->flags |= IP_VS_DEST_F_AVAILABLE;
795 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
796 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
797 dest->u_threshold = udest->u_threshold;
798 dest->l_threshold = udest->l_threshold;
800 spin_lock(&dest->dst_lock);
801 ip_vs_dst_reset(dest);
802 spin_unlock(&dest->dst_lock);
805 ip_vs_new_estimator(svc->net, &dest->stats);
807 write_lock_bh(&__ip_vs_svc_lock);
809 /* Wait until all other svc users go away */
810 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
813 list_add(&dest->n_list, &svc->destinations);
817 /* call the update_service, because server weight may be changed */
818 if (svc->scheduler->update_service)
819 svc->scheduler->update_service(svc);
821 write_unlock_bh(&__ip_vs_svc_lock);
826 * Create a destination for the given service
829 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
830 struct ip_vs_dest **dest_p)
832 struct ip_vs_dest *dest;
837 #ifdef CONFIG_IP_VS_IPV6
838 if (svc->af == AF_INET6) {
839 atype = ipv6_addr_type(&udest->addr.in6);
840 if ((!(atype & IPV6_ADDR_UNICAST) ||
841 atype & IPV6_ADDR_LINKLOCAL) &&
842 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
847 atype = inet_addr_type(svc->net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
854 pr_err("%s(): no memory.\n", __func__);
857 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
858 if (!dest->stats.cpustats) {
859 pr_err("%s() alloc_percpu failed\n", __func__);
864 dest->protocol = svc->protocol;
865 dest->vaddr = svc->addr;
866 dest->vport = svc->port;
867 dest->vfwmark = svc->fwmark;
868 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
869 dest->port = udest->port;
871 atomic_set(&dest->activeconns, 0);
872 atomic_set(&dest->inactconns, 0);
873 atomic_set(&dest->persistconns, 0);
874 atomic_set(&dest->refcnt, 1);
876 INIT_LIST_HEAD(&dest->d_list);
877 spin_lock_init(&dest->dst_lock);
878 spin_lock_init(&dest->stats.lock);
879 __ip_vs_update_dest(svc, dest, udest, 1);
893 * Add a destination into an existing service
896 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
898 struct ip_vs_dest *dest;
899 union nf_inet_addr daddr;
900 __be16 dport = udest->port;
905 if (udest->weight < 0) {
906 pr_err("%s(): server weight less than zero\n", __func__);
910 if (udest->l_threshold > udest->u_threshold) {
911 pr_err("%s(): lower threshold is higher than upper threshold\n",
916 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
919 * Check if the dest already exists in the list
921 dest = ip_vs_lookup_dest(svc, &daddr, dport);
924 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
929 * Check if the dest already exists in the trash and
930 * is from the same service
932 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
935 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
936 "dest->refcnt=%d, service %u/%s:%u\n",
937 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
938 atomic_read(&dest->refcnt),
940 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
944 * Get the destination from the trash
946 list_del(&dest->n_list);
948 __ip_vs_update_dest(svc, dest, udest, 1);
952 * Allocate and initialize the dest structure
954 ret = ip_vs_new_dest(svc, udest, &dest);
963 * Edit a destination in the given service
966 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
968 struct ip_vs_dest *dest;
969 union nf_inet_addr daddr;
970 __be16 dport = udest->port;
974 if (udest->weight < 0) {
975 pr_err("%s(): server weight less than zero\n", __func__);
979 if (udest->l_threshold > udest->u_threshold) {
980 pr_err("%s(): lower threshold is higher than upper threshold\n",
985 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
988 * Lookup the destination list
990 dest = ip_vs_lookup_dest(svc, &daddr, dport);
993 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
997 __ip_vs_update_dest(svc, dest, udest, 0);
1005 * Delete a destination (must be already unlinked from the service)
1007 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1009 struct netns_ipvs *ipvs = net_ipvs(net);
1011 ip_vs_kill_estimator(net, &dest->stats);
1014 * Remove it from the d-linked list with the real services.
1016 write_lock_bh(&ipvs->rs_lock);
1017 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&ipvs->rs_lock);
1021 * Decrease the refcnt of the dest, and free the dest
1022 * if nobody refers to it (refcnt=0). Otherwise, throw
1023 * the destination into the trash.
1025 if (atomic_dec_and_test(&dest->refcnt)) {
1026 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1028 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1030 ip_vs_dst_reset(dest);
1031 /* simply decrease svc->refcnt here, let the caller check
1032 and release the service if nobody refers to it.
1033 Only user context can release destination and service,
1034 and only one user context can update virtual service at a
1035 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt);
1037 free_percpu(dest->stats.cpustats);
1040 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1041 "dest->refcnt=%d\n",
1042 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044 atomic_read(&dest->refcnt));
1045 list_add(&dest->n_list, &ipvs->dest_trash);
1046 atomic_inc(&dest->refcnt);
1052 * Unlink a destination from the given service
1054 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1055 struct ip_vs_dest *dest,
1058 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1061 * Remove it from the d-linked destination list.
1063 list_del(&dest->n_list);
1067 * Call the update_service function of its scheduler
1069 if (svcupd && svc->scheduler->update_service)
1070 svc->scheduler->update_service(svc);
1075 * Delete a destination server in the given service
1078 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1080 struct ip_vs_dest *dest;
1081 __be16 dport = udest->port;
1085 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1088 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1092 write_lock_bh(&__ip_vs_svc_lock);
1095 * Wait until all other svc users go away.
1097 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1100 * Unlink dest from the service
1102 __ip_vs_unlink_dest(svc, dest, 1);
1104 write_unlock_bh(&__ip_vs_svc_lock);
1107 * Delete the destination
1109 __ip_vs_del_dest(svc->net, dest);
1118 * Add a service into the service hash table
1121 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1122 struct ip_vs_service **svc_p)
1125 struct ip_vs_scheduler *sched = NULL;
1126 struct ip_vs_pe *pe = NULL;
1127 struct ip_vs_service *svc = NULL;
1128 struct netns_ipvs *ipvs = net_ipvs(net);
1130 /* increase the module use count */
1131 ip_vs_use_count_inc();
1133 /* Lookup the scheduler by 'u->sched_name' */
1134 sched = ip_vs_scheduler_get(u->sched_name);
1135 if (sched == NULL) {
1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_getbyname(u->pe_name);
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1151 #ifdef CONFIG_IP_VS_IPV6
1152 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1164 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1165 if (!svc->stats.cpustats) {
1166 pr_err("%s() alloc_percpu failed\n", __func__);
1170 /* I'm the first user of the service */
1171 atomic_set(&svc->usecnt, 0);
1172 atomic_set(&svc->refcnt, 0);
1175 svc->protocol = u->protocol;
1176 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1177 svc->port = u->port;
1178 svc->fwmark = u->fwmark;
1179 svc->flags = u->flags;
1180 svc->timeout = u->timeout * HZ;
1181 svc->netmask = u->netmask;
1184 INIT_LIST_HEAD(&svc->destinations);
1185 rwlock_init(&svc->sched_lock);
1186 spin_lock_init(&svc->stats.lock);
1188 /* Bind the scheduler */
1189 ret = ip_vs_bind_scheduler(svc, sched);
1194 /* Bind the ct retriever */
1195 ip_vs_bind_pe(svc, pe);
1198 /* Update the virtual service counters */
1199 if (svc->port == FTPPORT)
1200 atomic_inc(&ipvs->ftpsvc_counter);
1201 else if (svc->port == 0)
1202 atomic_inc(&ipvs->nullsvc_counter);
1204 ip_vs_new_estimator(net, &svc->stats);
1206 /* Count only IPv4 services for old get/setsockopt interface */
1207 if (svc->af == AF_INET)
1208 ipvs->num_services++;
1210 /* Hash the service into the service table */
1211 write_lock_bh(&__ip_vs_svc_lock);
1212 ip_vs_svc_hash(svc);
1213 write_unlock_bh(&__ip_vs_svc_lock);
1221 ip_vs_unbind_scheduler(svc);
1224 ip_vs_app_inc_put(svc->inc);
1227 if (svc->stats.cpustats)
1228 free_percpu(svc->stats.cpustats);
1231 ip_vs_scheduler_put(sched);
1234 /* decrease the module use count */
1235 ip_vs_use_count_dec();
1242 * Edit a service and bind it with a new scheduler
1245 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1247 struct ip_vs_scheduler *sched, *old_sched;
1248 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1252 * Lookup the scheduler, by 'u->sched_name'
1254 sched = ip_vs_scheduler_get(u->sched_name);
1255 if (sched == NULL) {
1256 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1261 if (u->pe_name && *u->pe_name) {
1262 pe = ip_vs_pe_getbyname(u->pe_name);
1264 pr_info("persistence engine module ip_vs_pe_%s "
1265 "not found\n", u->pe_name);
1272 #ifdef CONFIG_IP_VS_IPV6
1273 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1279 write_lock_bh(&__ip_vs_svc_lock);
1282 * Wait until all other svc users go away.
1284 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1287 * Set the flags and timeout value
1289 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1290 svc->timeout = u->timeout * HZ;
1291 svc->netmask = u->netmask;
1293 old_sched = svc->scheduler;
1294 if (sched != old_sched) {
1296 * Unbind the old scheduler
1298 if ((ret = ip_vs_unbind_scheduler(svc))) {
1304 * Bind the new scheduler
1306 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1308 * If ip_vs_bind_scheduler fails, restore the old
1310 * The main reason of failure is out of memory.
1312 * The question is if the old scheduler can be
1313 * restored all the time. TODO: if it cannot be
1314 * restored some time, we must delete the service,
1315 * otherwise the system may crash.
1317 ip_vs_bind_scheduler(svc, old_sched);
1325 ip_vs_unbind_pe(svc);
1326 ip_vs_bind_pe(svc, pe);
1330 write_unlock_bh(&__ip_vs_svc_lock);
1332 ip_vs_scheduler_put(old_sched);
1333 ip_vs_pe_put(old_pe);
1339 * Delete a service from the service list
1340 * - The service must be unlinked, unlocked and not referenced!
1341 * - We are called under _bh lock
1343 static void __ip_vs_del_service(struct ip_vs_service *svc)
1345 struct ip_vs_dest *dest, *nxt;
1346 struct ip_vs_scheduler *old_sched;
1347 struct ip_vs_pe *old_pe;
1348 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1350 pr_info("%s: enter\n", __func__);
1352 /* Count only IPv4 services for old get/setsockopt interface */
1353 if (svc->af == AF_INET)
1354 ipvs->num_services--;
1356 ip_vs_kill_estimator(svc->net, &svc->stats);
1358 /* Unbind scheduler */
1359 old_sched = svc->scheduler;
1360 ip_vs_unbind_scheduler(svc);
1361 ip_vs_scheduler_put(old_sched);
1363 /* Unbind persistence engine */
1365 ip_vs_unbind_pe(svc);
1366 ip_vs_pe_put(old_pe);
1368 /* Unbind app inc */
1370 ip_vs_app_inc_put(svc->inc);
1375 * Unlink the whole destination list
1377 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1378 __ip_vs_unlink_dest(svc, dest, 0);
1379 __ip_vs_del_dest(svc->net, dest);
1383 * Update the virtual service counters
1385 if (svc->port == FTPPORT)
1386 atomic_dec(&ipvs->ftpsvc_counter);
1387 else if (svc->port == 0)
1388 atomic_dec(&ipvs->nullsvc_counter);
1391 * Free the service if nobody refers to it
1393 if (atomic_read(&svc->refcnt) == 0) {
1394 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1396 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1397 ntohs(svc->port), atomic_read(&svc->usecnt));
1398 free_percpu(svc->stats.cpustats);
1402 /* decrease the module use count */
1403 ip_vs_use_count_dec();
1407 * Unlink a service from list and try to delete it if its refcnt reached 0
1409 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1412 * Unhash it from the service table
1414 write_lock_bh(&__ip_vs_svc_lock);
1416 ip_vs_svc_unhash(svc);
1419 * Wait until all the svc users go away.
1421 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1423 __ip_vs_del_service(svc);
1425 write_unlock_bh(&__ip_vs_svc_lock);
1429 * Delete a service from the service list
1431 static int ip_vs_del_service(struct ip_vs_service *svc)
1435 ip_vs_unlink_service(svc);
1442 * Flush all the virtual services
1444 static int ip_vs_flush(struct net *net)
1447 struct ip_vs_service *svc, *nxt;
1450 * Flush the service table hashed by <netns,protocol,addr,port>
1452 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1453 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1455 if (net_eq(svc->net, net))
1456 ip_vs_unlink_service(svc);
1461 * Flush the service table hashed by fwmark
1463 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1464 list_for_each_entry_safe(svc, nxt,
1465 &ip_vs_svc_fwm_table[idx], f_list) {
1466 if (net_eq(svc->net, net))
1467 ip_vs_unlink_service(svc);
1476 * Zero counters in a service or all services
1478 static int ip_vs_zero_service(struct ip_vs_service *svc)
1480 struct ip_vs_dest *dest;
1482 write_lock_bh(&__ip_vs_svc_lock);
1483 list_for_each_entry(dest, &svc->destinations, n_list) {
1484 ip_vs_zero_stats(&dest->stats);
1486 ip_vs_zero_stats(&svc->stats);
1487 write_unlock_bh(&__ip_vs_svc_lock);
1491 static int ip_vs_zero_all(struct net *net)
1494 struct ip_vs_service *svc;
1496 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1497 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1498 if (net_eq(svc->net, net))
1499 ip_vs_zero_service(svc);
1503 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1504 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1505 if (net_eq(svc->net, net))
1506 ip_vs_zero_service(svc);
1510 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1516 proc_do_defense_mode(ctl_table *table, int write,
1517 void __user *buffer, size_t *lenp, loff_t *ppos)
1519 struct net *net = current->nsproxy->net_ns;
1520 int *valp = table->data;
1524 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1525 if (write && (*valp != val)) {
1526 if ((*valp < 0) || (*valp > 3)) {
1527 /* Restore the correct value */
1530 update_defense_level(net_ipvs(net));
1538 proc_do_sync_threshold(ctl_table *table, int write,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1541 int *valp = table->data;
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1557 proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1560 int *valp = table->data;
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
1583 static struct ctl_table vs_vars[] = {
1585 .procname = "amemthresh",
1586 .maxlen = sizeof(int),
1588 .proc_handler = proc_dointvec,
1591 .procname = "am_droprate",
1592 .maxlen = sizeof(int),
1594 .proc_handler = proc_dointvec,
1597 .procname = "drop_entry",
1598 .maxlen = sizeof(int),
1600 .proc_handler = proc_do_defense_mode,
1603 .procname = "drop_packet",
1604 .maxlen = sizeof(int),
1606 .proc_handler = proc_do_defense_mode,
1608 #ifdef CONFIG_IP_VS_NFCT
1610 .procname = "conntrack",
1611 .maxlen = sizeof(int),
1613 .proc_handler = &proc_dointvec,
1617 .procname = "secure_tcp",
1618 .maxlen = sizeof(int),
1620 .proc_handler = proc_do_defense_mode,
1623 .procname = "snat_reroute",
1624 .maxlen = sizeof(int),
1626 .proc_handler = &proc_dointvec,
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1632 .proc_handler = &proc_do_sync_mode,
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1638 .proc_handler = proc_dointvec,
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1644 .proc_handler = proc_dointvec,
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1650 .proc_handler = proc_dointvec,
1653 .procname = "sync_threshold",
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1657 .proc_handler = proc_do_sync_threshold,
1660 .procname = "nat_icmp_send",
1661 .maxlen = sizeof(int),
1663 .proc_handler = proc_dointvec,
1665 #ifdef CONFIG_IP_VS_DEBUG
1667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1671 .proc_handler = proc_dointvec,
1676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1680 .proc_handler = proc_dointvec_jiffies,
1683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1687 .proc_handler = proc_dointvec_jiffies,
1690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1694 .proc_handler = proc_dointvec_jiffies,
1697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1701 .proc_handler = proc_dointvec_jiffies,
1704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1708 .proc_handler = proc_dointvec_jiffies,
1711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1715 .proc_handler = proc_dointvec_jiffies,
1718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec_jiffies,
1725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1729 .proc_handler = proc_dointvec_jiffies,
1732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1736 .proc_handler = proc_dointvec_jiffies,
1739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1743 .proc_handler = proc_dointvec_jiffies,
1746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1750 .proc_handler = proc_dointvec_jiffies,
1753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1757 .proc_handler = proc_dointvec_jiffies,
1763 const struct ctl_path net_vs_ctl_path[] = {
1764 { .procname = "net", },
1765 { .procname = "ipv4", },
1766 { .procname = "vs", },
1769 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1771 #ifdef CONFIG_PROC_FS
1774 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1775 struct list_head *table;
1780 * Write the contents of the VS rule table to a PROCfs file.
1781 * (It is kept just for backward compatibility)
1783 static inline const char *ip_vs_fwd_name(unsigned flags)
1785 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1786 case IP_VS_CONN_F_LOCALNODE:
1788 case IP_VS_CONN_F_TUNNEL:
1790 case IP_VS_CONN_F_DROUTE:
1798 /* Get the Nth entry in the two lists */
1799 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1801 struct net *net = seq_file_net(seq);
1802 struct ip_vs_iter *iter = seq->private;
1804 struct ip_vs_service *svc;
1806 /* look in hash by protocol */
1807 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1808 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1809 if (net_eq(svc->net, net) && pos-- == 0) {
1810 iter->table = ip_vs_svc_table;
1817 /* keep looking in fwmark */
1818 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1819 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1820 if (net_eq(svc->net, net) && pos-- == 0) {
1821 iter->table = ip_vs_svc_fwm_table;
1831 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1832 __acquires(__ip_vs_svc_lock)
1835 read_lock_bh(&__ip_vs_svc_lock);
1836 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1840 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1842 struct list_head *e;
1843 struct ip_vs_iter *iter;
1844 struct ip_vs_service *svc;
1847 if (v == SEQ_START_TOKEN)
1848 return ip_vs_info_array(seq,0);
1851 iter = seq->private;
1853 if (iter->table == ip_vs_svc_table) {
1854 /* next service in table hashed by protocol */
1855 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1856 return list_entry(e, struct ip_vs_service, s_list);
1859 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1860 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1866 iter->table = ip_vs_svc_fwm_table;
1871 /* next service in hashed by fwmark */
1872 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1873 return list_entry(e, struct ip_vs_service, f_list);
1876 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1877 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1885 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1886 __releases(__ip_vs_svc_lock)
1888 read_unlock_bh(&__ip_vs_svc_lock);
1892 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1894 if (v == SEQ_START_TOKEN) {
1896 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1897 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1899 "Prot LocalAddress:Port Scheduler Flags\n");
1901 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1903 const struct ip_vs_service *svc = v;
1904 const struct ip_vs_iter *iter = seq->private;
1905 const struct ip_vs_dest *dest;
1907 if (iter->table == ip_vs_svc_table) {
1908 #ifdef CONFIG_IP_VS_IPV6
1909 if (svc->af == AF_INET6)
1910 seq_printf(seq, "%s [%pI6]:%04X %s ",
1911 ip_vs_proto_name(svc->protocol),
1914 svc->scheduler->name);
1917 seq_printf(seq, "%s %08X:%04X %s %s ",
1918 ip_vs_proto_name(svc->protocol),
1919 ntohl(svc->addr.ip),
1921 svc->scheduler->name,
1922 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1924 seq_printf(seq, "FWM %08X %s %s",
1925 svc->fwmark, svc->scheduler->name,
1926 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1929 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1930 seq_printf(seq, "persistent %d %08X\n",
1932 ntohl(svc->netmask));
1934 seq_putc(seq, '\n');
1936 list_for_each_entry(dest, &svc->destinations, n_list) {
1937 #ifdef CONFIG_IP_VS_IPV6
1938 if (dest->af == AF_INET6)
1941 " %-7s %-6d %-10d %-10d\n",
1944 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1945 atomic_read(&dest->weight),
1946 atomic_read(&dest->activeconns),
1947 atomic_read(&dest->inactconns));
1952 "%-7s %-6d %-10d %-10d\n",
1953 ntohl(dest->addr.ip),
1955 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1956 atomic_read(&dest->weight),
1957 atomic_read(&dest->activeconns),
1958 atomic_read(&dest->inactconns));
1965 static const struct seq_operations ip_vs_info_seq_ops = {
1966 .start = ip_vs_info_seq_start,
1967 .next = ip_vs_info_seq_next,
1968 .stop = ip_vs_info_seq_stop,
1969 .show = ip_vs_info_seq_show,
1972 static int ip_vs_info_open(struct inode *inode, struct file *file)
1974 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1975 sizeof(struct ip_vs_iter));
1978 static const struct file_operations ip_vs_info_fops = {
1979 .owner = THIS_MODULE,
1980 .open = ip_vs_info_open,
1982 .llseek = seq_lseek,
1983 .release = seq_release_private,
1988 #ifdef CONFIG_PROC_FS
1989 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1991 struct net *net = seq_file_single_net(seq);
1992 struct ip_vs_stats_user show;
1994 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1996 " Total Incoming Outgoing Incoming Outgoing\n");
1998 " Conns Packets Packets Bytes Bytes\n");
2000 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2001 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2002 show.inpkts, show.outpkts,
2003 (unsigned long long) show.inbytes,
2004 (unsigned long long) show.outbytes);
2006 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2008 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2009 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2010 show.cps, show.inpps, show.outpps,
2011 show.inbps, show.outbps);
2016 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2018 return single_open_net(inode, file, ip_vs_stats_show);
2021 static const struct file_operations ip_vs_stats_fops = {
2022 .owner = THIS_MODULE,
2023 .open = ip_vs_stats_seq_open,
2025 .llseek = seq_lseek,
2026 .release = single_release,
2029 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2031 struct net *net = seq_file_single_net(seq);
2032 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2033 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2034 struct ip_vs_stats_user rates;
2037 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2039 " Total Incoming Outgoing Incoming Outgoing\n");
2041 "CPU Conns Packets Packets Bytes Bytes\n");
2043 for_each_possible_cpu(i) {
2044 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046 __u64 inbytes, outbytes;
2049 start = u64_stats_fetch_begin_bh(&u->syncp);
2050 inbytes = u->ustats.inbytes;
2051 outbytes = u->ustats.outbytes;
2052 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2055 i, u->ustats.conns, u->ustats.inpkts,
2056 u->ustats.outpkts, (__u64)inbytes,
2060 spin_lock_bh(&tot_stats->lock);
2062 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2063 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2064 tot_stats->ustats.outpkts,
2065 (unsigned long long) tot_stats->ustats.inbytes,
2066 (unsigned long long) tot_stats->ustats.outbytes);
2068 ip_vs_read_estimator(&rates, tot_stats);
2070 spin_unlock_bh(&tot_stats->lock);
2072 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2074 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2075 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2085 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2087 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2090 static const struct file_operations ip_vs_stats_percpu_fops = {
2091 .owner = THIS_MODULE,
2092 .open = ip_vs_stats_percpu_seq_open,
2094 .llseek = seq_lseek,
2095 .release = single_release,
2100 * Set timeout values for tcp tcpfin udp in the timeout_table.
2102 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2104 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2105 struct ip_vs_proto_data *pd;
2108 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2113 #ifdef CONFIG_IP_VS_PROTO_TCP
2114 if (u->tcp_timeout) {
2115 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2116 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2117 = u->tcp_timeout * HZ;
2120 if (u->tcp_fin_timeout) {
2121 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2122 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2123 = u->tcp_fin_timeout * HZ;
2127 #ifdef CONFIG_IP_VS_PROTO_UDP
2128 if (u->udp_timeout) {
2129 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2130 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2131 = u->udp_timeout * HZ;
2138 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2139 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2140 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2141 sizeof(struct ip_vs_dest_user))
2142 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2143 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2144 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2146 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2147 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2148 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2149 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2151 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2152 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2160 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2161 struct ip_vs_service_user *usvc_compat)
2163 memset(usvc, 0, sizeof(*usvc));
2166 usvc->protocol = usvc_compat->protocol;
2167 usvc->addr.ip = usvc_compat->addr;
2168 usvc->port = usvc_compat->port;
2169 usvc->fwmark = usvc_compat->fwmark;
2171 /* Deep copy of sched_name is not needed here */
2172 usvc->sched_name = usvc_compat->sched_name;
2174 usvc->flags = usvc_compat->flags;
2175 usvc->timeout = usvc_compat->timeout;
2176 usvc->netmask = usvc_compat->netmask;
2179 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2180 struct ip_vs_dest_user *udest_compat)
2182 memset(udest, 0, sizeof(*udest));
2184 udest->addr.ip = udest_compat->addr;
2185 udest->port = udest_compat->port;
2186 udest->conn_flags = udest_compat->conn_flags;
2187 udest->weight = udest_compat->weight;
2188 udest->u_threshold = udest_compat->u_threshold;
2189 udest->l_threshold = udest_compat->l_threshold;
2193 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2195 struct net *net = sock_net(sk);
2197 unsigned char arg[MAX_ARG_LEN];
2198 struct ip_vs_service_user *usvc_compat;
2199 struct ip_vs_service_user_kern usvc;
2200 struct ip_vs_service *svc;
2201 struct ip_vs_dest_user *udest_compat;
2202 struct ip_vs_dest_user_kern udest;
2204 if (!capable(CAP_NET_ADMIN))
2207 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2209 if (len < 0 || len > MAX_ARG_LEN)
2211 if (len != set_arglen[SET_CMDID(cmd)]) {
2212 pr_err("set_ctl: len %u != %u\n",
2213 len, set_arglen[SET_CMDID(cmd)]);
2217 if (copy_from_user(arg, user, len) != 0)
2220 /* increase the module use count */
2221 ip_vs_use_count_inc();
2223 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2228 if (cmd == IP_VS_SO_SET_FLUSH) {
2229 /* Flush the virtual service */
2230 ret = ip_vs_flush(net);
2232 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2233 /* Set timeout values for (tcp tcpfin udp) */
2234 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2236 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2237 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2238 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2241 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2242 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2243 ret = stop_sync_thread(net, dm->state);
2247 usvc_compat = (struct ip_vs_service_user *)arg;
2248 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2250 /* We only use the new structs internally, so copy userspace compat
2251 * structs to extended internal versions */
2252 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2253 ip_vs_copy_udest_compat(&udest, udest_compat);
2255 if (cmd == IP_VS_SO_SET_ZERO) {
2256 /* if no service address is set, zero counters in all */
2257 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2258 ret = ip_vs_zero_all(net);
2263 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2264 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2265 usvc.protocol != IPPROTO_SCTP) {
2266 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2267 usvc.protocol, &usvc.addr.ip,
2268 ntohs(usvc.port), usvc.sched_name);
2273 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2274 if (usvc.fwmark == 0)
2275 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2276 &usvc.addr, usvc.port);
2278 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2280 if (cmd != IP_VS_SO_SET_ADD
2281 && (svc == NULL || svc->protocol != usvc.protocol)) {
2287 case IP_VS_SO_SET_ADD:
2291 ret = ip_vs_add_service(net, &usvc, &svc);
2293 case IP_VS_SO_SET_EDIT:
2294 ret = ip_vs_edit_service(svc, &usvc);
2296 case IP_VS_SO_SET_DEL:
2297 ret = ip_vs_del_service(svc);
2301 case IP_VS_SO_SET_ZERO:
2302 ret = ip_vs_zero_service(svc);
2304 case IP_VS_SO_SET_ADDDEST:
2305 ret = ip_vs_add_dest(svc, &udest);
2307 case IP_VS_SO_SET_EDITDEST:
2308 ret = ip_vs_edit_dest(svc, &udest);
2310 case IP_VS_SO_SET_DELDEST:
2311 ret = ip_vs_del_dest(svc, &udest);
2318 mutex_unlock(&__ip_vs_mutex);
2320 /* decrease the module use count */
2321 ip_vs_use_count_dec();
2328 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2330 dst->protocol = src->protocol;
2331 dst->addr = src->addr.ip;
2332 dst->port = src->port;
2333 dst->fwmark = src->fwmark;
2334 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2335 dst->flags = src->flags;
2336 dst->timeout = src->timeout / HZ;
2337 dst->netmask = src->netmask;
2338 dst->num_dests = src->num_dests;
2339 ip_vs_copy_stats(&dst->stats, &src->stats);
2343 __ip_vs_get_service_entries(struct net *net,
2344 const struct ip_vs_get_services *get,
2345 struct ip_vs_get_services __user *uptr)
2348 struct ip_vs_service *svc;
2349 struct ip_vs_service_entry entry;
2352 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2353 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2354 /* Only expose IPv4 entries to old interface */
2355 if (svc->af != AF_INET || !net_eq(svc->net, net))
2358 if (count >= get->num_services)
2360 memset(&entry, 0, sizeof(entry));
2361 ip_vs_copy_service(&entry, svc);
2362 if (copy_to_user(&uptr->entrytable[count],
2363 &entry, sizeof(entry))) {
2371 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2372 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2373 /* Only expose IPv4 entries to old interface */
2374 if (svc->af != AF_INET || !net_eq(svc->net, net))
2377 if (count >= get->num_services)
2379 memset(&entry, 0, sizeof(entry));
2380 ip_vs_copy_service(&entry, svc);
2381 if (copy_to_user(&uptr->entrytable[count],
2382 &entry, sizeof(entry))) {
2394 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2395 struct ip_vs_get_dests __user *uptr)
2397 struct ip_vs_service *svc;
2398 union nf_inet_addr addr = { .ip = get->addr };
2402 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2404 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2409 struct ip_vs_dest *dest;
2410 struct ip_vs_dest_entry entry;
2412 list_for_each_entry(dest, &svc->destinations, n_list) {
2413 if (count >= get->num_dests)
2416 entry.addr = dest->addr.ip;
2417 entry.port = dest->port;
2418 entry.conn_flags = atomic_read(&dest->conn_flags);
2419 entry.weight = atomic_read(&dest->weight);
2420 entry.u_threshold = dest->u_threshold;
2421 entry.l_threshold = dest->l_threshold;
2422 entry.activeconns = atomic_read(&dest->activeconns);
2423 entry.inactconns = atomic_read(&dest->inactconns);
2424 entry.persistconns = atomic_read(&dest->persistconns);
2425 ip_vs_copy_stats(&entry.stats, &dest->stats);
2426 if (copy_to_user(&uptr->entrytable[count],
2427 &entry, sizeof(entry))) {
2439 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2441 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2442 struct ip_vs_proto_data *pd;
2445 #ifdef CONFIG_IP_VS_PROTO_TCP
2446 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2447 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2448 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2450 #ifdef CONFIG_IP_VS_PROTO_UDP
2451 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2453 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2458 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2459 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2460 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2461 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2462 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2463 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2464 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2466 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2467 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2468 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2469 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2477 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2479 unsigned char arg[128];
2481 unsigned int copylen;
2482 struct net *net = sock_net(sk);
2483 struct netns_ipvs *ipvs = net_ipvs(net);
2486 if (!capable(CAP_NET_ADMIN))
2489 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2492 if (*len < get_arglen[GET_CMDID(cmd)]) {
2493 pr_err("get_ctl: len %u < %u\n",
2494 *len, get_arglen[GET_CMDID(cmd)]);
2498 copylen = get_arglen[GET_CMDID(cmd)];
2502 if (copy_from_user(arg, user, copylen) != 0)
2505 if (mutex_lock_interruptible(&__ip_vs_mutex))
2506 return -ERESTARTSYS;
2509 case IP_VS_SO_GET_VERSION:
2513 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2514 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2515 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2519 *len = strlen(buf)+1;
2523 case IP_VS_SO_GET_INFO:
2525 struct ip_vs_getinfo info;
2526 info.version = IP_VS_VERSION_CODE;
2527 info.size = ip_vs_conn_tab_size;
2528 info.num_services = ipvs->num_services;
2529 if (copy_to_user(user, &info, sizeof(info)) != 0)
2534 case IP_VS_SO_GET_SERVICES:
2536 struct ip_vs_get_services *get;
2539 get = (struct ip_vs_get_services *)arg;
2540 size = sizeof(*get) +
2541 sizeof(struct ip_vs_service_entry) * get->num_services;
2543 pr_err("length: %u != %u\n", *len, size);
2547 ret = __ip_vs_get_service_entries(net, get, user);
2551 case IP_VS_SO_GET_SERVICE:
2553 struct ip_vs_service_entry *entry;
2554 struct ip_vs_service *svc;
2555 union nf_inet_addr addr;
2557 entry = (struct ip_vs_service_entry *)arg;
2558 addr.ip = entry->addr;
2560 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2562 svc = __ip_vs_service_find(net, AF_INET,
2563 entry->protocol, &addr,
2566 ip_vs_copy_service(entry, svc);
2567 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2574 case IP_VS_SO_GET_DESTS:
2576 struct ip_vs_get_dests *get;
2579 get = (struct ip_vs_get_dests *)arg;
2580 size = sizeof(*get) +
2581 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2583 pr_err("length: %u != %u\n", *len, size);
2587 ret = __ip_vs_get_dest_entries(net, get, user);
2591 case IP_VS_SO_GET_TIMEOUT:
2593 struct ip_vs_timeout_user t;
2595 __ip_vs_get_timeouts(net, &t);
2596 if (copy_to_user(user, &t, sizeof(t)) != 0)
2601 case IP_VS_SO_GET_DAEMON:
2603 struct ip_vs_daemon_user d[2];
2605 memset(&d, 0, sizeof(d));
2606 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2607 d[0].state = IP_VS_STATE_MASTER;
2608 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2609 sizeof(d[0].mcast_ifn));
2610 d[0].syncid = ipvs->master_syncid;
2612 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2613 d[1].state = IP_VS_STATE_BACKUP;
2614 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2615 sizeof(d[1].mcast_ifn));
2616 d[1].syncid = ipvs->backup_syncid;
2618 if (copy_to_user(user, &d, sizeof(d)) != 0)
2628 mutex_unlock(&__ip_vs_mutex);
2633 static struct nf_sockopt_ops ip_vs_sockopts = {
2635 .set_optmin = IP_VS_BASE_CTL,
2636 .set_optmax = IP_VS_SO_SET_MAX+1,
2637 .set = do_ip_vs_set_ctl,
2638 .get_optmin = IP_VS_BASE_CTL,
2639 .get_optmax = IP_VS_SO_GET_MAX+1,
2640 .get = do_ip_vs_get_ctl,
2641 .owner = THIS_MODULE,
2645 * Generic Netlink interface
2648 /* IPVS genetlink family */
2649 static struct genl_family ip_vs_genl_family = {
2650 .id = GENL_ID_GENERATE,
2652 .name = IPVS_GENL_NAME,
2653 .version = IPVS_GENL_VERSION,
2654 .maxattr = IPVS_CMD_MAX,
2655 .netnsok = true, /* Make ipvsadm to work on netns */
2658 /* Policy used for first-level command attributes */
2659 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2660 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2661 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2662 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2663 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2664 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2665 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2668 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2669 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2670 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2671 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2672 .len = IP_VS_IFNAME_MAXLEN },
2673 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2676 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2677 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2678 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2679 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2680 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2681 .len = sizeof(union nf_inet_addr) },
2682 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2683 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2684 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2685 .len = IP_VS_SCHEDNAME_MAXLEN },
2686 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2687 .len = IP_VS_PENAME_MAXLEN },
2688 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2689 .len = sizeof(struct ip_vs_flags) },
2690 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2691 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2692 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2695 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2696 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2697 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2698 .len = sizeof(union nf_inet_addr) },
2699 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2700 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2701 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2710 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2711 struct ip_vs_stats *stats)
2713 struct ip_vs_stats_user ustats;
2714 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2718 ip_vs_copy_stats(&ustats, stats);
2720 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2723 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2725 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2731 nla_nest_end(skb, nl_stats);
2736 nla_nest_cancel(skb, nl_stats);
2740 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2741 struct ip_vs_service *svc)
2743 struct nlattr *nl_service;
2744 struct ip_vs_flags flags = { .flags = svc->flags,
2747 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2751 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2754 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2756 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2757 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2758 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2761 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2763 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2764 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2765 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2766 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2768 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2769 goto nla_put_failure;
2771 nla_nest_end(skb, nl_service);
2776 nla_nest_cancel(skb, nl_service);
2780 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2781 struct ip_vs_service *svc,
2782 struct netlink_callback *cb)
2786 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2787 &ip_vs_genl_family, NLM_F_MULTI,
2788 IPVS_CMD_NEW_SERVICE);
2792 if (ip_vs_genl_fill_service(skb, svc) < 0)
2793 goto nla_put_failure;
2795 return genlmsg_end(skb, hdr);
2798 genlmsg_cancel(skb, hdr);
2802 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2803 struct netlink_callback *cb)
2806 int start = cb->args[0];
2807 struct ip_vs_service *svc;
2808 struct net *net = skb_sknet(skb);
2810 mutex_lock(&__ip_vs_mutex);
2811 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2812 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2813 if (++idx <= start || !net_eq(svc->net, net))
2815 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2817 goto nla_put_failure;
2822 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2823 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2824 if (++idx <= start || !net_eq(svc->net, net))
2826 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2828 goto nla_put_failure;
2834 mutex_unlock(&__ip_vs_mutex);
2840 static int ip_vs_genl_parse_service(struct net *net,
2841 struct ip_vs_service_user_kern *usvc,
2842 struct nlattr *nla, int full_entry,
2843 struct ip_vs_service **ret_svc)
2845 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2846 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2847 struct ip_vs_service *svc;
2849 /* Parse mandatory identifying service fields first */
2851 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2854 nla_af = attrs[IPVS_SVC_ATTR_AF];
2855 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2856 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2857 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2858 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2860 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2863 memset(usvc, 0, sizeof(*usvc));
2865 usvc->af = nla_get_u16(nla_af);
2866 #ifdef CONFIG_IP_VS_IPV6
2867 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2869 if (usvc->af != AF_INET)
2871 return -EAFNOSUPPORT;
2874 usvc->protocol = IPPROTO_TCP;
2875 usvc->fwmark = nla_get_u32(nla_fwmark);
2877 usvc->protocol = nla_get_u16(nla_protocol);
2878 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2879 usvc->port = nla_get_u16(nla_port);
2884 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2886 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2887 &usvc->addr, usvc->port);
2890 /* If a full entry was requested, check for the additional fields */
2892 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2894 struct ip_vs_flags flags;
2896 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2897 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2898 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2899 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2900 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2902 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2905 nla_memcpy(&flags, nla_flags, sizeof(flags));
2907 /* prefill flags from service if it already exists */
2909 usvc->flags = svc->flags;
2911 /* set new flags from userland */
2912 usvc->flags = (usvc->flags & ~flags.mask) |
2913 (flags.flags & flags.mask);
2914 usvc->sched_name = nla_data(nla_sched);
2915 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2916 usvc->timeout = nla_get_u32(nla_timeout);
2917 usvc->netmask = nla_get_u32(nla_netmask);
2923 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2926 struct ip_vs_service_user_kern usvc;
2927 struct ip_vs_service *svc;
2930 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2931 return ret ? ERR_PTR(ret) : svc;
2934 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2936 struct nlattr *nl_dest;
2938 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2942 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2943 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2945 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2946 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2947 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2949 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2951 atomic_read(&dest->activeconns));
2952 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2953 atomic_read(&dest->inactconns));
2954 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2955 atomic_read(&dest->persistconns));
2957 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2958 goto nla_put_failure;
2960 nla_nest_end(skb, nl_dest);
2965 nla_nest_cancel(skb, nl_dest);
2969 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2970 struct netlink_callback *cb)
2974 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2975 &ip_vs_genl_family, NLM_F_MULTI,
2980 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2981 goto nla_put_failure;
2983 return genlmsg_end(skb, hdr);
2986 genlmsg_cancel(skb, hdr);
2990 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2991 struct netlink_callback *cb)
2994 int start = cb->args[0];
2995 struct ip_vs_service *svc;
2996 struct ip_vs_dest *dest;
2997 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2998 struct net *net = skb_sknet(skb);
3000 mutex_lock(&__ip_vs_mutex);
3002 /* Try to find the service for which to dump destinations */
3003 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3004 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3008 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3009 if (IS_ERR(svc) || svc == NULL)
3012 /* Dump the destinations */
3013 list_for_each_entry(dest, &svc->destinations, n_list) {
3016 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3018 goto nla_put_failure;
3026 mutex_unlock(&__ip_vs_mutex);
3031 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3032 struct nlattr *nla, int full_entry)
3034 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3035 struct nlattr *nla_addr, *nla_port;
3037 /* Parse mandatory identifying destination fields first */
3039 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3042 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3043 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3045 if (!(nla_addr && nla_port))
3048 memset(udest, 0, sizeof(*udest));
3050 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3051 udest->port = nla_get_u16(nla_port);
3053 /* If a full entry was requested, check for the additional fields */
3055 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3058 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3059 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3060 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3061 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3063 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3066 udest->conn_flags = nla_get_u32(nla_fwd)
3067 & IP_VS_CONN_F_FWD_MASK;
3068 udest->weight = nla_get_u32(nla_weight);
3069 udest->u_threshold = nla_get_u32(nla_u_thresh);
3070 udest->l_threshold = nla_get_u32(nla_l_thresh);
3076 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3077 const char *mcast_ifn, __be32 syncid)
3079 struct nlattr *nl_daemon;
3081 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3085 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3086 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3087 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3089 nla_nest_end(skb, nl_daemon);
3094 nla_nest_cancel(skb, nl_daemon);
3098 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3099 const char *mcast_ifn, __be32 syncid,
3100 struct netlink_callback *cb)
3103 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3104 &ip_vs_genl_family, NLM_F_MULTI,
3105 IPVS_CMD_NEW_DAEMON);
3109 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3110 goto nla_put_failure;
3112 return genlmsg_end(skb, hdr);
3115 genlmsg_cancel(skb, hdr);
3119 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3120 struct netlink_callback *cb)
3122 struct net *net = skb_net(skb);
3123 struct netns_ipvs *ipvs = net_ipvs(net);
3125 mutex_lock(&__ip_vs_mutex);
3126 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3127 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3128 ipvs->master_mcast_ifn,
3129 ipvs->master_syncid, cb) < 0)
3130 goto nla_put_failure;
3135 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3136 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3137 ipvs->backup_mcast_ifn,
3138 ipvs->backup_syncid, cb) < 0)
3139 goto nla_put_failure;
3145 mutex_unlock(&__ip_vs_mutex);
3150 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3152 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3153 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3154 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3157 return start_sync_thread(net,
3158 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3159 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3160 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3163 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3165 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3168 return stop_sync_thread(net,
3169 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3172 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3174 struct ip_vs_timeout_user t;
3176 __ip_vs_get_timeouts(net, &t);
3178 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3179 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3181 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3183 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3185 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3186 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3188 return ip_vs_set_timeout(net, &t);
3191 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3193 struct ip_vs_service *svc = NULL;
3194 struct ip_vs_service_user_kern usvc;
3195 struct ip_vs_dest_user_kern udest;
3197 int need_full_svc = 0, need_full_dest = 0;
3199 struct netns_ipvs *ipvs;
3201 net = skb_sknet(skb);
3202 ipvs = net_ipvs(net);
3203 cmd = info->genlhdr->cmd;
3205 mutex_lock(&__ip_vs_mutex);
3207 if (cmd == IPVS_CMD_FLUSH) {
3208 ret = ip_vs_flush(net);
3210 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3211 ret = ip_vs_genl_set_config(net, info->attrs);
3213 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3214 cmd == IPVS_CMD_DEL_DAEMON) {
3216 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3218 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3219 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3220 info->attrs[IPVS_CMD_ATTR_DAEMON],
3221 ip_vs_daemon_policy)) {
3226 if (cmd == IPVS_CMD_NEW_DAEMON)
3227 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3229 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3231 } else if (cmd == IPVS_CMD_ZERO &&
3232 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3233 ret = ip_vs_zero_all(net);
3237 /* All following commands require a service argument, so check if we
3238 * received a valid one. We need a full service specification when
3239 * adding / editing a service. Only identifying members otherwise. */
3240 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3243 ret = ip_vs_genl_parse_service(net, &usvc,
3244 info->attrs[IPVS_CMD_ATTR_SERVICE],
3245 need_full_svc, &svc);
3249 /* Unless we're adding a new service, the service must already exist */
3250 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3255 /* Destination commands require a valid destination argument. For
3256 * adding / editing a destination, we need a full destination
3258 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3259 cmd == IPVS_CMD_DEL_DEST) {
3260 if (cmd != IPVS_CMD_DEL_DEST)
3263 ret = ip_vs_genl_parse_dest(&udest,
3264 info->attrs[IPVS_CMD_ATTR_DEST],
3271 case IPVS_CMD_NEW_SERVICE:
3273 ret = ip_vs_add_service(net, &usvc, &svc);
3277 case IPVS_CMD_SET_SERVICE:
3278 ret = ip_vs_edit_service(svc, &usvc);
3280 case IPVS_CMD_DEL_SERVICE:
3281 ret = ip_vs_del_service(svc);
3282 /* do not use svc, it can be freed */
3284 case IPVS_CMD_NEW_DEST:
3285 ret = ip_vs_add_dest(svc, &udest);
3287 case IPVS_CMD_SET_DEST:
3288 ret = ip_vs_edit_dest(svc, &udest);
3290 case IPVS_CMD_DEL_DEST:
3291 ret = ip_vs_del_dest(svc, &udest);
3294 ret = ip_vs_zero_service(svc);
3301 mutex_unlock(&__ip_vs_mutex);
3306 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3308 struct sk_buff *msg;
3310 int ret, cmd, reply_cmd;
3312 struct netns_ipvs *ipvs;
3314 net = skb_sknet(skb);
3315 ipvs = net_ipvs(net);
3316 cmd = info->genlhdr->cmd;
3318 if (cmd == IPVS_CMD_GET_SERVICE)
3319 reply_cmd = IPVS_CMD_NEW_SERVICE;
3320 else if (cmd == IPVS_CMD_GET_INFO)
3321 reply_cmd = IPVS_CMD_SET_INFO;
3322 else if (cmd == IPVS_CMD_GET_CONFIG)
3323 reply_cmd = IPVS_CMD_SET_CONFIG;
3325 pr_err("unknown Generic Netlink command\n");
3329 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3333 mutex_lock(&__ip_vs_mutex);
3335 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3337 goto nla_put_failure;
3340 case IPVS_CMD_GET_SERVICE:
3342 struct ip_vs_service *svc;
3344 svc = ip_vs_genl_find_service(net,
3345 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3350 ret = ip_vs_genl_fill_service(msg, svc);
3352 goto nla_put_failure;
3361 case IPVS_CMD_GET_CONFIG:
3363 struct ip_vs_timeout_user t;
3365 __ip_vs_get_timeouts(net, &t);
3366 #ifdef CONFIG_IP_VS_PROTO_TCP
3367 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3371 #ifdef CONFIG_IP_VS_PROTO_UDP
3372 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3378 case IPVS_CMD_GET_INFO:
3379 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3380 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3381 ip_vs_conn_tab_size);
3385 genlmsg_end(msg, reply);
3386 ret = genlmsg_reply(msg, info);
3390 pr_err("not enough space in Netlink message\n");
3396 mutex_unlock(&__ip_vs_mutex);
3402 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3404 .cmd = IPVS_CMD_NEW_SERVICE,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3410 .cmd = IPVS_CMD_SET_SERVICE,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3416 .cmd = IPVS_CMD_DEL_SERVICE,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .doit = ip_vs_genl_set_cmd,
3422 .cmd = IPVS_CMD_GET_SERVICE,
3423 .flags = GENL_ADMIN_PERM,
3424 .doit = ip_vs_genl_get_cmd,
3425 .dumpit = ip_vs_genl_dump_services,
3426 .policy = ip_vs_cmd_policy,
3429 .cmd = IPVS_CMD_NEW_DEST,
3430 .flags = GENL_ADMIN_PERM,
3431 .policy = ip_vs_cmd_policy,
3432 .doit = ip_vs_genl_set_cmd,
3435 .cmd = IPVS_CMD_SET_DEST,
3436 .flags = GENL_ADMIN_PERM,
3437 .policy = ip_vs_cmd_policy,
3438 .doit = ip_vs_genl_set_cmd,
3441 .cmd = IPVS_CMD_DEL_DEST,
3442 .flags = GENL_ADMIN_PERM,
3443 .policy = ip_vs_cmd_policy,
3444 .doit = ip_vs_genl_set_cmd,
3447 .cmd = IPVS_CMD_GET_DEST,
3448 .flags = GENL_ADMIN_PERM,
3449 .policy = ip_vs_cmd_policy,
3450 .dumpit = ip_vs_genl_dump_dests,
3453 .cmd = IPVS_CMD_NEW_DAEMON,
3454 .flags = GENL_ADMIN_PERM,
3455 .policy = ip_vs_cmd_policy,
3456 .doit = ip_vs_genl_set_cmd,
3459 .cmd = IPVS_CMD_DEL_DAEMON,
3460 .flags = GENL_ADMIN_PERM,
3461 .policy = ip_vs_cmd_policy,
3462 .doit = ip_vs_genl_set_cmd,
3465 .cmd = IPVS_CMD_GET_DAEMON,
3466 .flags = GENL_ADMIN_PERM,
3467 .dumpit = ip_vs_genl_dump_daemons,
3470 .cmd = IPVS_CMD_SET_CONFIG,
3471 .flags = GENL_ADMIN_PERM,
3472 .policy = ip_vs_cmd_policy,
3473 .doit = ip_vs_genl_set_cmd,
3476 .cmd = IPVS_CMD_GET_CONFIG,
3477 .flags = GENL_ADMIN_PERM,
3478 .doit = ip_vs_genl_get_cmd,
3481 .cmd = IPVS_CMD_GET_INFO,
3482 .flags = GENL_ADMIN_PERM,
3483 .doit = ip_vs_genl_get_cmd,
3486 .cmd = IPVS_CMD_ZERO,
3487 .flags = GENL_ADMIN_PERM,
3488 .policy = ip_vs_cmd_policy,
3489 .doit = ip_vs_genl_set_cmd,
3492 .cmd = IPVS_CMD_FLUSH,
3493 .flags = GENL_ADMIN_PERM,
3494 .doit = ip_vs_genl_set_cmd,
3498 static int __init ip_vs_genl_register(void)
3500 return genl_register_family_with_ops(&ip_vs_genl_family,
3501 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3504 static void ip_vs_genl_unregister(void)
3506 genl_unregister_family(&ip_vs_genl_family);
3509 /* End of Generic Netlink interface definitions */
3512 * per netns intit/exit func.
3514 int __net_init __ip_vs_control_init(struct net *net)
3517 struct netns_ipvs *ipvs = net_ipvs(net);
3518 struct ctl_table *tbl;
3520 atomic_set(&ipvs->dropentry, 0);
3521 spin_lock_init(&ipvs->dropentry_lock);
3522 spin_lock_init(&ipvs->droppacket_lock);
3523 spin_lock_init(&ipvs->securetcp_lock);
3524 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3526 /* Initialize rs_table */
3527 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3528 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3530 INIT_LIST_HEAD(&ipvs->dest_trash);
3531 atomic_set(&ipvs->ftpsvc_counter, 0);
3532 atomic_set(&ipvs->nullsvc_counter, 0);
3535 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3536 if (!ipvs->tot_stats.cpustats) {
3537 pr_err("%s() alloc_percpu failed\n", __func__);
3540 spin_lock_init(&ipvs->tot_stats.lock);
3542 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3543 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3544 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3545 &ip_vs_stats_percpu_fops);
3547 if (!net_eq(net, &init_net)) {
3548 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3553 /* Initialize sysctl defaults */
3555 ipvs->sysctl_amemthresh = 1024;
3556 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3557 ipvs->sysctl_am_droprate = 10;
3558 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3559 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3560 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3561 #ifdef CONFIG_IP_VS_NFCT
3562 tbl[idx++].data = &ipvs->sysctl_conntrack;
3564 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3565 ipvs->sysctl_snat_reroute = 1;
3566 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3567 ipvs->sysctl_sync_ver = 1;
3568 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3569 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3570 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3571 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3572 ipvs->sysctl_sync_threshold[0] = 3;
3573 ipvs->sysctl_sync_threshold[1] = 50;
3574 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3575 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3576 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3579 #ifdef CONFIG_SYSCTL
3580 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3582 if (ipvs->sysctl_hdr == NULL) {
3583 if (!net_eq(net, &init_net))
3588 ip_vs_new_estimator(net, &ipvs->tot_stats);
3589 ipvs->sysctl_tbl = tbl;
3590 /* Schedule defense work */
3591 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3592 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3596 free_percpu(ipvs->tot_stats.cpustats);
3601 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3603 struct netns_ipvs *ipvs = net_ipvs(net);
3605 ip_vs_trash_cleanup(net);
3606 ip_vs_kill_estimator(net, &ipvs->tot_stats);
3607 cancel_delayed_work_sync(&ipvs->defense_work);
3608 cancel_work_sync(&ipvs->defense_work.work);
3609 #ifdef CONFIG_SYSCTL
3610 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3612 proc_net_remove(net, "ip_vs_stats_percpu");
3613 proc_net_remove(net, "ip_vs_stats");
3614 proc_net_remove(net, "ip_vs");
3615 free_percpu(ipvs->tot_stats.cpustats);
3618 static struct pernet_operations ipvs_control_ops = {
3619 .init = __ip_vs_control_init,
3620 .exit = __ip_vs_control_cleanup,
3623 int __init ip_vs_control_init(void)
3630 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3631 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3632 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3633 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3636 ret = register_pernet_subsys(&ipvs_control_ops);
3638 pr_err("cannot register namespace.\n");
3642 smp_wmb(); /* Do we really need it now ? */
3644 ret = nf_register_sockopt(&ip_vs_sockopts);
3646 pr_err("cannot register sockopt.\n");
3650 ret = ip_vs_genl_register();
3652 pr_err("cannot register Generic Netlink interface.\n");
3653 nf_unregister_sockopt(&ip_vs_sockopts);
3661 unregister_pernet_subsys(&ipvs_control_ops);
3667 void ip_vs_control_cleanup(void)
3670 unregister_pernet_subsys(&ipvs_control_ops);
3671 ip_vs_genl_unregister();
3672 nf_unregister_sockopt(&ip_vs_sockopts);