Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
[pandora-kernel.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75                                     const struct in6_addr *addr)
76 {
77         struct rt6_info *rt;
78         struct flowi6 fl6 = {
79                 .daddr = *addr,
80         };
81
82         rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
83         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
84                 return 1;
85
86         return 0;
87 }
88 #endif
89
90 #ifdef CONFIG_SYSCTL
91 /*
92  *      update_defense_level is called from keventd and from sysctl,
93  *      so it needs to protect itself from softirqs
94  */
95 static void update_defense_level(struct netns_ipvs *ipvs)
96 {
97         struct sysinfo i;
98         static int old_secure_tcp = 0;
99         int availmem;
100         int nomem;
101         int to_change = -1;
102
103         /* we only count free and buffered memory (in pages) */
104         si_meminfo(&i);
105         availmem = i.freeram + i.bufferram;
106         /* however in linux 2.5 the i.bufferram is total page cache size,
107            we need adjust it */
108         /* si_swapinfo(&i); */
109         /* availmem = availmem - (i.totalswap - i.freeswap); */
110
111         nomem = (availmem < ipvs->sysctl_amemthresh);
112
113         local_bh_disable();
114
115         /* drop_entry */
116         spin_lock(&ipvs->dropentry_lock);
117         switch (ipvs->sysctl_drop_entry) {
118         case 0:
119                 atomic_set(&ipvs->dropentry, 0);
120                 break;
121         case 1:
122                 if (nomem) {
123                         atomic_set(&ipvs->dropentry, 1);
124                         ipvs->sysctl_drop_entry = 2;
125                 } else {
126                         atomic_set(&ipvs->dropentry, 0);
127                 }
128                 break;
129         case 2:
130                 if (nomem) {
131                         atomic_set(&ipvs->dropentry, 1);
132                 } else {
133                         atomic_set(&ipvs->dropentry, 0);
134                         ipvs->sysctl_drop_entry = 1;
135                 };
136                 break;
137         case 3:
138                 atomic_set(&ipvs->dropentry, 1);
139                 break;
140         }
141         spin_unlock(&ipvs->dropentry_lock);
142
143         /* drop_packet */
144         spin_lock(&ipvs->droppacket_lock);
145         switch (ipvs->sysctl_drop_packet) {
146         case 0:
147                 ipvs->drop_rate = 0;
148                 break;
149         case 1:
150                 if (nomem) {
151                         ipvs->drop_rate = ipvs->drop_counter
152                                 = ipvs->sysctl_amemthresh /
153                                 (ipvs->sysctl_amemthresh-availmem);
154                         ipvs->sysctl_drop_packet = 2;
155                 } else {
156                         ipvs->drop_rate = 0;
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         ipvs->drop_rate = ipvs->drop_counter
162                                 = ipvs->sysctl_amemthresh /
163                                 (ipvs->sysctl_amemthresh-availmem);
164                 } else {
165                         ipvs->drop_rate = 0;
166                         ipvs->sysctl_drop_packet = 1;
167                 }
168                 break;
169         case 3:
170                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
171                 break;
172         }
173         spin_unlock(&ipvs->droppacket_lock);
174
175         /* secure_tcp */
176         spin_lock(&ipvs->securetcp_lock);
177         switch (ipvs->sysctl_secure_tcp) {
178         case 0:
179                 if (old_secure_tcp >= 2)
180                         to_change = 0;
181                 break;
182         case 1:
183                 if (nomem) {
184                         if (old_secure_tcp < 2)
185                                 to_change = 1;
186                         ipvs->sysctl_secure_tcp = 2;
187                 } else {
188                         if (old_secure_tcp >= 2)
189                                 to_change = 0;
190                 }
191                 break;
192         case 2:
193                 if (nomem) {
194                         if (old_secure_tcp < 2)
195                                 to_change = 1;
196                 } else {
197                         if (old_secure_tcp >= 2)
198                                 to_change = 0;
199                         ipvs->sysctl_secure_tcp = 1;
200                 }
201                 break;
202         case 3:
203                 if (old_secure_tcp < 2)
204                         to_change = 1;
205                 break;
206         }
207         old_secure_tcp = ipvs->sysctl_secure_tcp;
208         if (to_change >= 0)
209                 ip_vs_protocol_timeout_change(ipvs,
210                                               ipvs->sysctl_secure_tcp > 1);
211         spin_unlock(&ipvs->securetcp_lock);
212
213         local_bh_enable();
214 }
215
216
217 /*
218  *      Timer for checking the defense
219  */
220 #define DEFENSE_TIMER_PERIOD    1*HZ
221
222 static void defense_work_handler(struct work_struct *work)
223 {
224         struct netns_ipvs *ipvs =
225                 container_of(work, struct netns_ipvs, defense_work.work);
226
227         update_defense_level(ipvs);
228         if (atomic_read(&ipvs->dropentry))
229                 ip_vs_random_dropentry(ipvs->net);
230         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
231 }
232 #endif
233
234 int
235 ip_vs_use_count_inc(void)
236 {
237         return try_module_get(THIS_MODULE);
238 }
239
240 void
241 ip_vs_use_count_dec(void)
242 {
243         module_put(THIS_MODULE);
244 }
245
246
247 /*
248  *      Hash table: for virtual service lookups
249  */
250 #define IP_VS_SVC_TAB_BITS 8
251 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254 /* the service table hashed by <protocol, addr, port> */
255 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256 /* the service table hashed by fwmark */
257 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
259
260 /*
261  *      Returns hash value for virtual service
262  */
263 static inline unsigned
264 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
265                   const union nf_inet_addr *addr, __be16 port)
266 {
267         register unsigned porth = ntohs(port);
268         __be32 addr_fold = addr->ip;
269
270 #ifdef CONFIG_IP_VS_IPV6
271         if (af == AF_INET6)
272                 addr_fold = addr->ip6[0]^addr->ip6[1]^
273                             addr->ip6[2]^addr->ip6[3];
274 #endif
275         addr_fold ^= ((size_t)net>>8);
276
277         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
278                 & IP_VS_SVC_TAB_MASK;
279 }
280
281 /*
282  *      Returns hash value of fwmark for virtual service lookup
283  */
284 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
285 {
286         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
287 }
288
289 /*
290  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
291  *      or in the ip_vs_svc_fwm_table by fwmark.
292  *      Should be called with locked tables.
293  */
294 static int ip_vs_svc_hash(struct ip_vs_service *svc)
295 {
296         unsigned hash;
297
298         if (svc->flags & IP_VS_SVC_F_HASHED) {
299                 pr_err("%s(): request for already hashed, called from %pF\n",
300                        __func__, __builtin_return_address(0));
301                 return 0;
302         }
303
304         if (svc->fwmark == 0) {
305                 /*
306                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
307                  */
308                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
309                                          &svc->addr, svc->port);
310                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
311         } else {
312                 /*
313                  *  Hash it by fwmark in svc_fwm_table
314                  */
315                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
316                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
317         }
318
319         svc->flags |= IP_VS_SVC_F_HASHED;
320         /* increase its refcnt because it is referenced by the svc table */
321         atomic_inc(&svc->refcnt);
322         return 1;
323 }
324
325
326 /*
327  *      Unhashes a service from svc_table / svc_fwm_table.
328  *      Should be called with locked tables.
329  */
330 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331 {
332         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
333                 pr_err("%s(): request for unhash flagged, called from %pF\n",
334                        __func__, __builtin_return_address(0));
335                 return 0;
336         }
337
338         if (svc->fwmark == 0) {
339                 /* Remove it from the svc_table table */
340                 list_del(&svc->s_list);
341         } else {
342                 /* Remove it from the svc_fwm_table table */
343                 list_del(&svc->f_list);
344         }
345
346         svc->flags &= ~IP_VS_SVC_F_HASHED;
347         atomic_dec(&svc->refcnt);
348         return 1;
349 }
350
351
352 /*
353  *      Get service by {netns, proto,addr,port} in the service table.
354  */
355 static inline struct ip_vs_service *
356 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
357                      const union nf_inet_addr *vaddr, __be16 vport)
358 {
359         unsigned hash;
360         struct ip_vs_service *svc;
361
362         /* Check for "full" addressed entries */
363         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
364
365         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
366                 if ((svc->af == af)
367                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
368                     && (svc->port == vport)
369                     && (svc->protocol == protocol)
370                     && net_eq(svc->net, net)) {
371                         /* HIT */
372                         return svc;
373                 }
374         }
375
376         return NULL;
377 }
378
379
380 /*
381  *      Get service by {fwmark} in the service table.
382  */
383 static inline struct ip_vs_service *
384 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
385 {
386         unsigned hash;
387         struct ip_vs_service *svc;
388
389         /* Check for fwmark addressed entries */
390         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
391
392         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
393                 if (svc->fwmark == fwmark && svc->af == af
394                     && net_eq(svc->net, net)) {
395                         /* HIT */
396                         return svc;
397                 }
398         }
399
400         return NULL;
401 }
402
403 struct ip_vs_service *
404 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
405                   const union nf_inet_addr *vaddr, __be16 vport)
406 {
407         struct ip_vs_service *svc;
408         struct netns_ipvs *ipvs = net_ipvs(net);
409
410         read_lock(&__ip_vs_svc_lock);
411
412         /*
413          *      Check the table hashed by fwmark first
414          */
415         if (fwmark) {
416                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
417                 if (svc)
418                         goto out;
419         }
420
421         /*
422          *      Check the table hashed by <protocol,addr,port>
423          *      for "full" addressed entries
424          */
425         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
426
427         if (svc == NULL
428             && protocol == IPPROTO_TCP
429             && atomic_read(&ipvs->ftpsvc_counter)
430             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
431                 /*
432                  * Check if ftp service entry exists, the packet
433                  * might belong to FTP data connections.
434                  */
435                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
436         }
437
438         if (svc == NULL
439             && atomic_read(&ipvs->nullsvc_counter)) {
440                 /*
441                  * Check if the catch-all port (port zero) exists
442                  */
443                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
444         }
445
446   out:
447         if (svc)
448                 atomic_inc(&svc->usecnt);
449         read_unlock(&__ip_vs_svc_lock);
450
451         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
452                       fwmark, ip_vs_proto_name(protocol),
453                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
454                       svc ? "hit" : "not hit");
455
456         return svc;
457 }
458
459
460 static inline void
461 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462 {
463         atomic_inc(&svc->refcnt);
464         dest->svc = svc;
465 }
466
467 static void
468 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
469 {
470         struct ip_vs_service *svc = dest->svc;
471
472         dest->svc = NULL;
473         if (atomic_dec_and_test(&svc->refcnt)) {
474                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
475                               svc->fwmark,
476                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
477                               ntohs(svc->port), atomic_read(&svc->usecnt));
478                 free_percpu(svc->stats.cpustats);
479                 kfree(svc);
480         }
481 }
482
483
484 /*
485  *      Returns hash value for real service
486  */
487 static inline unsigned ip_vs_rs_hashkey(int af,
488                                             const union nf_inet_addr *addr,
489                                             __be16 port)
490 {
491         register unsigned porth = ntohs(port);
492         __be32 addr_fold = addr->ip;
493
494 #ifdef CONFIG_IP_VS_IPV6
495         if (af == AF_INET6)
496                 addr_fold = addr->ip6[0]^addr->ip6[1]^
497                             addr->ip6[2]^addr->ip6[3];
498 #endif
499
500         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
501                 & IP_VS_RTAB_MASK;
502 }
503
504 /*
505  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
506  *      should be called with locked tables.
507  */
508 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
509 {
510         unsigned hash;
511
512         if (!list_empty(&dest->d_list)) {
513                 return 0;
514         }
515
516         /*
517          *      Hash by proto,addr,port,
518          *      which are the parameters of the real service.
519          */
520         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
521
522         list_add(&dest->d_list, &ipvs->rs_table[hash]);
523
524         return 1;
525 }
526
527 /*
528  *      UNhashes ip_vs_dest from rs_table.
529  *      should be called with locked tables.
530  */
531 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
532 {
533         /*
534          * Remove it from the rs_table table.
535          */
536         if (!list_empty(&dest->d_list)) {
537                 list_del(&dest->d_list);
538                 INIT_LIST_HEAD(&dest->d_list);
539         }
540
541         return 1;
542 }
543
544 /*
545  *      Lookup real service by <proto,addr,port> in the real service table.
546  */
547 struct ip_vs_dest *
548 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
549                           const union nf_inet_addr *daddr,
550                           __be16 dport)
551 {
552         struct netns_ipvs *ipvs = net_ipvs(net);
553         unsigned hash;
554         struct ip_vs_dest *dest;
555
556         /*
557          *      Check for "full" addressed entries
558          *      Return the first found entry
559          */
560         hash = ip_vs_rs_hashkey(af, daddr, dport);
561
562         read_lock(&ipvs->rs_lock);
563         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
564                 if ((dest->af == af)
565                     && ip_vs_addr_equal(af, &dest->addr, daddr)
566                     && (dest->port == dport)
567                     && ((dest->protocol == protocol) ||
568                         dest->vfwmark)) {
569                         /* HIT */
570                         read_unlock(&ipvs->rs_lock);
571                         return dest;
572                 }
573         }
574         read_unlock(&ipvs->rs_lock);
575
576         return NULL;
577 }
578
579 /*
580  *      Lookup destination by {addr,port} in the given service
581  */
582 static struct ip_vs_dest *
583 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
584                   __be16 dport)
585 {
586         struct ip_vs_dest *dest;
587
588         /*
589          * Find the destination for the given service
590          */
591         list_for_each_entry(dest, &svc->destinations, n_list) {
592                 if ((dest->af == svc->af)
593                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
594                     && (dest->port == dport)) {
595                         /* HIT */
596                         return dest;
597                 }
598         }
599
600         return NULL;
601 }
602
603 /*
604  * Find destination by {daddr,dport,vaddr,protocol}
605  * Cretaed to be used in ip_vs_process_message() in
606  * the backup synchronization daemon. It finds the
607  * destination to be bound to the received connection
608  * on the backup.
609  *
610  * ip_vs_lookup_real_service() looked promissing, but
611  * seems not working as expected.
612  */
613 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
614                                    const union nf_inet_addr *daddr,
615                                    __be16 dport,
616                                    const union nf_inet_addr *vaddr,
617                                    __be16 vport, __u16 protocol, __u32 fwmark)
618 {
619         struct ip_vs_dest *dest;
620         struct ip_vs_service *svc;
621
622         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
623         if (!svc)
624                 return NULL;
625         dest = ip_vs_lookup_dest(svc, daddr, dport);
626         if (dest)
627                 atomic_inc(&dest->refcnt);
628         ip_vs_service_put(svc);
629         return dest;
630 }
631
632 /*
633  *  Lookup dest by {svc,addr,port} in the destination trash.
634  *  The destination trash is used to hold the destinations that are removed
635  *  from the service table but are still referenced by some conn entries.
636  *  The reason to add the destination trash is when the dest is temporary
637  *  down (either by administrator or by monitor program), the dest can be
638  *  picked back from the trash, the remaining connections to the dest can
639  *  continue, and the counting information of the dest is also useful for
640  *  scheduling.
641  */
642 static struct ip_vs_dest *
643 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
644                      __be16 dport)
645 {
646         struct ip_vs_dest *dest, *nxt;
647         struct netns_ipvs *ipvs = net_ipvs(svc->net);
648
649         /*
650          * Find the destination in trash
651          */
652         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
653                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
654                               "dest->refcnt=%d\n",
655                               dest->vfwmark,
656                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
657                               ntohs(dest->port),
658                               atomic_read(&dest->refcnt));
659                 if (dest->af == svc->af &&
660                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
661                     dest->port == dport &&
662                     dest->vfwmark == svc->fwmark &&
663                     dest->protocol == svc->protocol &&
664                     (svc->fwmark ||
665                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
666                       dest->vport == svc->port))) {
667                         /* HIT */
668                         return dest;
669                 }
670
671                 /*
672                  * Try to purge the destination from trash if not referenced
673                  */
674                 if (atomic_read(&dest->refcnt) == 1) {
675                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
676                                       "from trash\n",
677                                       dest->vfwmark,
678                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
679                                       ntohs(dest->port));
680                         list_del(&dest->n_list);
681                         ip_vs_dst_reset(dest);
682                         __ip_vs_unbind_svc(dest);
683                         free_percpu(dest->stats.cpustats);
684                         kfree(dest);
685                 }
686         }
687
688         return NULL;
689 }
690
691
692 /*
693  *  Clean up all the destinations in the trash
694  *  Called by the ip_vs_control_cleanup()
695  *
696  *  When the ip_vs_control_clearup is activated by ipvs module exit,
697  *  the service tables must have been flushed and all the connections
698  *  are expired, and the refcnt of each destination in the trash must
699  *  be 1, so we simply release them here.
700  */
701 static void ip_vs_trash_cleanup(struct net *net)
702 {
703         struct ip_vs_dest *dest, *nxt;
704         struct netns_ipvs *ipvs = net_ipvs(net);
705
706         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
707                 list_del(&dest->n_list);
708                 ip_vs_dst_reset(dest);
709                 __ip_vs_unbind_svc(dest);
710                 free_percpu(dest->stats.cpustats);
711                 kfree(dest);
712         }
713 }
714
715 static void
716 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717 {
718 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
719
720         spin_lock_bh(&src->lock);
721
722         IP_VS_SHOW_STATS_COUNTER(conns);
723         IP_VS_SHOW_STATS_COUNTER(inpkts);
724         IP_VS_SHOW_STATS_COUNTER(outpkts);
725         IP_VS_SHOW_STATS_COUNTER(inbytes);
726         IP_VS_SHOW_STATS_COUNTER(outbytes);
727
728         ip_vs_read_estimator(dst, src);
729
730         spin_unlock_bh(&src->lock);
731 }
732
733 static void
734 ip_vs_zero_stats(struct ip_vs_stats *stats)
735 {
736         spin_lock_bh(&stats->lock);
737
738         /* get current counters as zero point, rates are zeroed */
739
740 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
741
742         IP_VS_ZERO_STATS_COUNTER(conns);
743         IP_VS_ZERO_STATS_COUNTER(inpkts);
744         IP_VS_ZERO_STATS_COUNTER(outpkts);
745         IP_VS_ZERO_STATS_COUNTER(inbytes);
746         IP_VS_ZERO_STATS_COUNTER(outbytes);
747
748         ip_vs_zero_estimator(stats);
749
750         spin_unlock_bh(&stats->lock);
751 }
752
753 /*
754  *      Update a destination in the given service
755  */
756 static void
757 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
758                     struct ip_vs_dest_user_kern *udest, int add)
759 {
760         struct netns_ipvs *ipvs = net_ipvs(svc->net);
761         int conn_flags;
762
763         /* set the weight and the flags */
764         atomic_set(&dest->weight, udest->weight);
765         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
766         conn_flags |= IP_VS_CONN_F_INACTIVE;
767
768         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
769         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
770                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
771         } else {
772                 /*
773                  *    Put the real service in rs_table if not present.
774                  *    For now only for NAT!
775                  */
776                 write_lock_bh(&ipvs->rs_lock);
777                 ip_vs_rs_hash(ipvs, dest);
778                 write_unlock_bh(&ipvs->rs_lock);
779         }
780         atomic_set(&dest->conn_flags, conn_flags);
781
782         /* bind the service */
783         if (!dest->svc) {
784                 __ip_vs_bind_svc(dest, svc);
785         } else {
786                 if (dest->svc != svc) {
787                         __ip_vs_unbind_svc(dest);
788                         ip_vs_zero_stats(&dest->stats);
789                         __ip_vs_bind_svc(dest, svc);
790                 }
791         }
792
793         /* set the dest status flags */
794         dest->flags |= IP_VS_DEST_F_AVAILABLE;
795
796         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
797                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
798         dest->u_threshold = udest->u_threshold;
799         dest->l_threshold = udest->l_threshold;
800
801         spin_lock_bh(&dest->dst_lock);
802         ip_vs_dst_reset(dest);
803         spin_unlock_bh(&dest->dst_lock);
804
805         if (add)
806                 ip_vs_start_estimator(svc->net, &dest->stats);
807
808         write_lock_bh(&__ip_vs_svc_lock);
809
810         /* Wait until all other svc users go away */
811         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
812
813         if (add) {
814                 list_add(&dest->n_list, &svc->destinations);
815                 svc->num_dests++;
816         }
817
818         /* call the update_service, because server weight may be changed */
819         if (svc->scheduler->update_service)
820                 svc->scheduler->update_service(svc);
821
822         write_unlock_bh(&__ip_vs_svc_lock);
823 }
824
825
826 /*
827  *      Create a destination for the given service
828  */
829 static int
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831                struct ip_vs_dest **dest_p)
832 {
833         struct ip_vs_dest *dest;
834         unsigned atype;
835
836         EnterFunction(2);
837
838 #ifdef CONFIG_IP_VS_IPV6
839         if (svc->af == AF_INET6) {
840                 atype = ipv6_addr_type(&udest->addr.in6);
841                 if ((!(atype & IPV6_ADDR_UNICAST) ||
842                         atype & IPV6_ADDR_LINKLOCAL) &&
843                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
844                         return -EINVAL;
845         } else
846 #endif
847         {
848                 atype = inet_addr_type(svc->net, udest->addr.ip);
849                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
850                         return -EINVAL;
851         }
852
853         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
854         if (dest == NULL) {
855                 pr_err("%s(): no memory.\n", __func__);
856                 return -ENOMEM;
857         }
858         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859         if (!dest->stats.cpustats) {
860                 pr_err("%s() alloc_percpu failed\n", __func__);
861                 goto err_alloc;
862         }
863
864         dest->af = svc->af;
865         dest->protocol = svc->protocol;
866         dest->vaddr = svc->addr;
867         dest->vport = svc->port;
868         dest->vfwmark = svc->fwmark;
869         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
870         dest->port = udest->port;
871
872         atomic_set(&dest->activeconns, 0);
873         atomic_set(&dest->inactconns, 0);
874         atomic_set(&dest->persistconns, 0);
875         atomic_set(&dest->refcnt, 1);
876
877         INIT_LIST_HEAD(&dest->d_list);
878         spin_lock_init(&dest->dst_lock);
879         spin_lock_init(&dest->stats.lock);
880         __ip_vs_update_dest(svc, dest, udest, 1);
881
882         *dest_p = dest;
883
884         LeaveFunction(2);
885         return 0;
886
887 err_alloc:
888         kfree(dest);
889         return -ENOMEM;
890 }
891
892
893 /*
894  *      Add a destination into an existing service
895  */
896 static int
897 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
898 {
899         struct ip_vs_dest *dest;
900         union nf_inet_addr daddr;
901         __be16 dport = udest->port;
902         int ret;
903
904         EnterFunction(2);
905
906         if (udest->weight < 0) {
907                 pr_err("%s(): server weight less than zero\n", __func__);
908                 return -ERANGE;
909         }
910
911         if (udest->l_threshold > udest->u_threshold) {
912                 pr_err("%s(): lower threshold is higher than upper threshold\n",
913                         __func__);
914                 return -ERANGE;
915         }
916
917         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
918
919         /*
920          * Check if the dest already exists in the list
921          */
922         dest = ip_vs_lookup_dest(svc, &daddr, dport);
923
924         if (dest != NULL) {
925                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
926                 return -EEXIST;
927         }
928
929         /*
930          * Check if the dest already exists in the trash and
931          * is from the same service
932          */
933         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
934
935         if (dest != NULL) {
936                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
937                               "dest->refcnt=%d, service %u/%s:%u\n",
938                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
939                               atomic_read(&dest->refcnt),
940                               dest->vfwmark,
941                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
942                               ntohs(dest->vport));
943
944                 /*
945                  * Get the destination from the trash
946                  */
947                 list_del(&dest->n_list);
948
949                 __ip_vs_update_dest(svc, dest, udest, 1);
950                 ret = 0;
951         } else {
952                 /*
953                  * Allocate and initialize the dest structure
954                  */
955                 ret = ip_vs_new_dest(svc, udest, &dest);
956         }
957         LeaveFunction(2);
958
959         return ret;
960 }
961
962
963 /*
964  *      Edit a destination in the given service
965  */
966 static int
967 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
968 {
969         struct ip_vs_dest *dest;
970         union nf_inet_addr daddr;
971         __be16 dport = udest->port;
972
973         EnterFunction(2);
974
975         if (udest->weight < 0) {
976                 pr_err("%s(): server weight less than zero\n", __func__);
977                 return -ERANGE;
978         }
979
980         if (udest->l_threshold > udest->u_threshold) {
981                 pr_err("%s(): lower threshold is higher than upper threshold\n",
982                         __func__);
983                 return -ERANGE;
984         }
985
986         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
987
988         /*
989          *  Lookup the destination list
990          */
991         dest = ip_vs_lookup_dest(svc, &daddr, dport);
992
993         if (dest == NULL) {
994                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
995                 return -ENOENT;
996         }
997
998         __ip_vs_update_dest(svc, dest, udest, 0);
999         LeaveFunction(2);
1000
1001         return 0;
1002 }
1003
1004
1005 /*
1006  *      Delete a destination (must be already unlinked from the service)
1007  */
1008 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1009 {
1010         struct netns_ipvs *ipvs = net_ipvs(net);
1011
1012         ip_vs_stop_estimator(net, &dest->stats);
1013
1014         /*
1015          *  Remove it from the d-linked list with the real services.
1016          */
1017         write_lock_bh(&ipvs->rs_lock);
1018         ip_vs_rs_unhash(dest);
1019         write_unlock_bh(&ipvs->rs_lock);
1020
1021         /*
1022          *  Decrease the refcnt of the dest, and free the dest
1023          *  if nobody refers to it (refcnt=0). Otherwise, throw
1024          *  the destination into the trash.
1025          */
1026         if (atomic_dec_and_test(&dest->refcnt)) {
1027                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1028                               dest->vfwmark,
1029                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1030                               ntohs(dest->port));
1031                 ip_vs_dst_reset(dest);
1032                 /* simply decrease svc->refcnt here, let the caller check
1033                    and release the service if nobody refers to it.
1034                    Only user context can release destination and service,
1035                    and only one user context can update virtual service at a
1036                    time, so the operation here is OK */
1037                 atomic_dec(&dest->svc->refcnt);
1038                 free_percpu(dest->stats.cpustats);
1039                 kfree(dest);
1040         } else {
1041                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042                               "dest->refcnt=%d\n",
1043                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044                               ntohs(dest->port),
1045                               atomic_read(&dest->refcnt));
1046                 list_add(&dest->n_list, &ipvs->dest_trash);
1047                 atomic_inc(&dest->refcnt);
1048         }
1049 }
1050
1051
1052 /*
1053  *      Unlink a destination from the given service
1054  */
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056                                 struct ip_vs_dest *dest,
1057                                 int svcupd)
1058 {
1059         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060
1061         /*
1062          *  Remove it from the d-linked destination list.
1063          */
1064         list_del(&dest->n_list);
1065         svc->num_dests--;
1066
1067         /*
1068          *  Call the update_service function of its scheduler
1069          */
1070         if (svcupd && svc->scheduler->update_service)
1071                         svc->scheduler->update_service(svc);
1072 }
1073
1074
1075 /*
1076  *      Delete a destination server in the given service
1077  */
1078 static int
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1080 {
1081         struct ip_vs_dest *dest;
1082         __be16 dport = udest->port;
1083
1084         EnterFunction(2);
1085
1086         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1087
1088         if (dest == NULL) {
1089                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1090                 return -ENOENT;
1091         }
1092
1093         write_lock_bh(&__ip_vs_svc_lock);
1094
1095         /*
1096          *      Wait until all other svc users go away.
1097          */
1098         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1099
1100         /*
1101          *      Unlink dest from the service
1102          */
1103         __ip_vs_unlink_dest(svc, dest, 1);
1104
1105         write_unlock_bh(&__ip_vs_svc_lock);
1106
1107         /*
1108          *      Delete the destination
1109          */
1110         __ip_vs_del_dest(svc->net, dest);
1111
1112         LeaveFunction(2);
1113
1114         return 0;
1115 }
1116
1117
1118 /*
1119  *      Add a service into the service hash table
1120  */
1121 static int
1122 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1123                   struct ip_vs_service **svc_p)
1124 {
1125         int ret = 0;
1126         struct ip_vs_scheduler *sched = NULL;
1127         struct ip_vs_pe *pe = NULL;
1128         struct ip_vs_service *svc = NULL;
1129         struct netns_ipvs *ipvs = net_ipvs(net);
1130
1131         /* increase the module use count */
1132         ip_vs_use_count_inc();
1133
1134         /* Lookup the scheduler by 'u->sched_name' */
1135         sched = ip_vs_scheduler_get(u->sched_name);
1136         if (sched == NULL) {
1137                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1138                 ret = -ENOENT;
1139                 goto out_err;
1140         }
1141
1142         if (u->pe_name && *u->pe_name) {
1143                 pe = ip_vs_pe_getbyname(u->pe_name);
1144                 if (pe == NULL) {
1145                         pr_info("persistence engine module ip_vs_pe_%s "
1146                                 "not found\n", u->pe_name);
1147                         ret = -ENOENT;
1148                         goto out_err;
1149                 }
1150         }
1151
1152 #ifdef CONFIG_IP_VS_IPV6
1153         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1154                 ret = -EINVAL;
1155                 goto out_err;
1156         }
1157 #endif
1158
1159         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1160         if (svc == NULL) {
1161                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1162                 ret = -ENOMEM;
1163                 goto out_err;
1164         }
1165         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166         if (!svc->stats.cpustats) {
1167                 pr_err("%s() alloc_percpu failed\n", __func__);
1168                 goto out_err;
1169         }
1170
1171         /* I'm the first user of the service */
1172         atomic_set(&svc->usecnt, 0);
1173         atomic_set(&svc->refcnt, 0);
1174
1175         svc->af = u->af;
1176         svc->protocol = u->protocol;
1177         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1178         svc->port = u->port;
1179         svc->fwmark = u->fwmark;
1180         svc->flags = u->flags;
1181         svc->timeout = u->timeout * HZ;
1182         svc->netmask = u->netmask;
1183         svc->net = net;
1184
1185         INIT_LIST_HEAD(&svc->destinations);
1186         rwlock_init(&svc->sched_lock);
1187         spin_lock_init(&svc->stats.lock);
1188
1189         /* Bind the scheduler */
1190         ret = ip_vs_bind_scheduler(svc, sched);
1191         if (ret)
1192                 goto out_err;
1193         sched = NULL;
1194
1195         /* Bind the ct retriever */
1196         ip_vs_bind_pe(svc, pe);
1197         pe = NULL;
1198
1199         /* Update the virtual service counters */
1200         if (svc->port == FTPPORT)
1201                 atomic_inc(&ipvs->ftpsvc_counter);
1202         else if (svc->port == 0)
1203                 atomic_inc(&ipvs->nullsvc_counter);
1204
1205         ip_vs_start_estimator(net, &svc->stats);
1206
1207         /* Count only IPv4 services for old get/setsockopt interface */
1208         if (svc->af == AF_INET)
1209                 ipvs->num_services++;
1210
1211         /* Hash the service into the service table */
1212         write_lock_bh(&__ip_vs_svc_lock);
1213         ip_vs_svc_hash(svc);
1214         write_unlock_bh(&__ip_vs_svc_lock);
1215
1216         *svc_p = svc;
1217         return 0;
1218
1219
1220  out_err:
1221         if (svc != NULL) {
1222                 ip_vs_unbind_scheduler(svc);
1223                 if (svc->inc) {
1224                         local_bh_disable();
1225                         ip_vs_app_inc_put(svc->inc);
1226                         local_bh_enable();
1227                 }
1228                 if (svc->stats.cpustats)
1229                         free_percpu(svc->stats.cpustats);
1230                 kfree(svc);
1231         }
1232         ip_vs_scheduler_put(sched);
1233         ip_vs_pe_put(pe);
1234
1235         /* decrease the module use count */
1236         ip_vs_use_count_dec();
1237
1238         return ret;
1239 }
1240
1241
1242 /*
1243  *      Edit a service and bind it with a new scheduler
1244  */
1245 static int
1246 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1247 {
1248         struct ip_vs_scheduler *sched, *old_sched;
1249         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1250         int ret = 0;
1251
1252         /*
1253          * Lookup the scheduler, by 'u->sched_name'
1254          */
1255         sched = ip_vs_scheduler_get(u->sched_name);
1256         if (sched == NULL) {
1257                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1258                 return -ENOENT;
1259         }
1260         old_sched = sched;
1261
1262         if (u->pe_name && *u->pe_name) {
1263                 pe = ip_vs_pe_getbyname(u->pe_name);
1264                 if (pe == NULL) {
1265                         pr_info("persistence engine module ip_vs_pe_%s "
1266                                 "not found\n", u->pe_name);
1267                         ret = -ENOENT;
1268                         goto out;
1269                 }
1270                 old_pe = pe;
1271         }
1272
1273 #ifdef CONFIG_IP_VS_IPV6
1274         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1275                 ret = -EINVAL;
1276                 goto out;
1277         }
1278 #endif
1279
1280         write_lock_bh(&__ip_vs_svc_lock);
1281
1282         /*
1283          * Wait until all other svc users go away.
1284          */
1285         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1286
1287         /*
1288          * Set the flags and timeout value
1289          */
1290         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291         svc->timeout = u->timeout * HZ;
1292         svc->netmask = u->netmask;
1293
1294         old_sched = svc->scheduler;
1295         if (sched != old_sched) {
1296                 /*
1297                  * Unbind the old scheduler
1298                  */
1299                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1300                         old_sched = sched;
1301                         goto out_unlock;
1302                 }
1303
1304                 /*
1305                  * Bind the new scheduler
1306                  */
1307                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1308                         /*
1309                          * If ip_vs_bind_scheduler fails, restore the old
1310                          * scheduler.
1311                          * The main reason of failure is out of memory.
1312                          *
1313                          * The question is if the old scheduler can be
1314                          * restored all the time. TODO: if it cannot be
1315                          * restored some time, we must delete the service,
1316                          * otherwise the system may crash.
1317                          */
1318                         ip_vs_bind_scheduler(svc, old_sched);
1319                         old_sched = sched;
1320                         goto out_unlock;
1321                 }
1322         }
1323
1324         old_pe = svc->pe;
1325         if (pe != old_pe) {
1326                 ip_vs_unbind_pe(svc);
1327                 ip_vs_bind_pe(svc, pe);
1328         }
1329
1330   out_unlock:
1331         write_unlock_bh(&__ip_vs_svc_lock);
1332   out:
1333         ip_vs_scheduler_put(old_sched);
1334         ip_vs_pe_put(old_pe);
1335         return ret;
1336 }
1337
1338
1339 /*
1340  *      Delete a service from the service list
1341  *      - The service must be unlinked, unlocked and not referenced!
1342  *      - We are called under _bh lock
1343  */
1344 static void __ip_vs_del_service(struct ip_vs_service *svc)
1345 {
1346         struct ip_vs_dest *dest, *nxt;
1347         struct ip_vs_scheduler *old_sched;
1348         struct ip_vs_pe *old_pe;
1349         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1350
1351         pr_info("%s: enter\n", __func__);
1352
1353         /* Count only IPv4 services for old get/setsockopt interface */
1354         if (svc->af == AF_INET)
1355                 ipvs->num_services--;
1356
1357         ip_vs_stop_estimator(svc->net, &svc->stats);
1358
1359         /* Unbind scheduler */
1360         old_sched = svc->scheduler;
1361         ip_vs_unbind_scheduler(svc);
1362         ip_vs_scheduler_put(old_sched);
1363
1364         /* Unbind persistence engine */
1365         old_pe = svc->pe;
1366         ip_vs_unbind_pe(svc);
1367         ip_vs_pe_put(old_pe);
1368
1369         /* Unbind app inc */
1370         if (svc->inc) {
1371                 ip_vs_app_inc_put(svc->inc);
1372                 svc->inc = NULL;
1373         }
1374
1375         /*
1376          *    Unlink the whole destination list
1377          */
1378         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1379                 __ip_vs_unlink_dest(svc, dest, 0);
1380                 __ip_vs_del_dest(svc->net, dest);
1381         }
1382
1383         /*
1384          *    Update the virtual service counters
1385          */
1386         if (svc->port == FTPPORT)
1387                 atomic_dec(&ipvs->ftpsvc_counter);
1388         else if (svc->port == 0)
1389                 atomic_dec(&ipvs->nullsvc_counter);
1390
1391         /*
1392          *    Free the service if nobody refers to it
1393          */
1394         if (atomic_read(&svc->refcnt) == 0) {
1395                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1396                               svc->fwmark,
1397                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1398                               ntohs(svc->port), atomic_read(&svc->usecnt));
1399                 free_percpu(svc->stats.cpustats);
1400                 kfree(svc);
1401         }
1402
1403         /* decrease the module use count */
1404         ip_vs_use_count_dec();
1405 }
1406
1407 /*
1408  * Unlink a service from list and try to delete it if its refcnt reached 0
1409  */
1410 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1411 {
1412         /*
1413          * Unhash it from the service table
1414          */
1415         write_lock_bh(&__ip_vs_svc_lock);
1416
1417         ip_vs_svc_unhash(svc);
1418
1419         /*
1420          * Wait until all the svc users go away.
1421          */
1422         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1423
1424         __ip_vs_del_service(svc);
1425
1426         write_unlock_bh(&__ip_vs_svc_lock);
1427 }
1428
1429 /*
1430  *      Delete a service from the service list
1431  */
1432 static int ip_vs_del_service(struct ip_vs_service *svc)
1433 {
1434         if (svc == NULL)
1435                 return -EEXIST;
1436         ip_vs_unlink_service(svc);
1437
1438         return 0;
1439 }
1440
1441
1442 /*
1443  *      Flush all the virtual services
1444  */
1445 static int ip_vs_flush(struct net *net)
1446 {
1447         int idx;
1448         struct ip_vs_service *svc, *nxt;
1449
1450         /*
1451          * Flush the service table hashed by <netns,protocol,addr,port>
1452          */
1453         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1454                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1455                                          s_list) {
1456                         if (net_eq(svc->net, net))
1457                                 ip_vs_unlink_service(svc);
1458                 }
1459         }
1460
1461         /*
1462          * Flush the service table hashed by fwmark
1463          */
1464         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465                 list_for_each_entry_safe(svc, nxt,
1466                                          &ip_vs_svc_fwm_table[idx], f_list) {
1467                         if (net_eq(svc->net, net))
1468                                 ip_vs_unlink_service(svc);
1469                 }
1470         }
1471
1472         return 0;
1473 }
1474
1475
1476 /*
1477  *      Zero counters in a service or all services
1478  */
1479 static int ip_vs_zero_service(struct ip_vs_service *svc)
1480 {
1481         struct ip_vs_dest *dest;
1482
1483         write_lock_bh(&__ip_vs_svc_lock);
1484         list_for_each_entry(dest, &svc->destinations, n_list) {
1485                 ip_vs_zero_stats(&dest->stats);
1486         }
1487         ip_vs_zero_stats(&svc->stats);
1488         write_unlock_bh(&__ip_vs_svc_lock);
1489         return 0;
1490 }
1491
1492 static int ip_vs_zero_all(struct net *net)
1493 {
1494         int idx;
1495         struct ip_vs_service *svc;
1496
1497         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1498                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1499                         if (net_eq(svc->net, net))
1500                                 ip_vs_zero_service(svc);
1501                 }
1502         }
1503
1504         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1505                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1506                         if (net_eq(svc->net, net))
1507                                 ip_vs_zero_service(svc);
1508                 }
1509         }
1510
1511         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1512         return 0;
1513 }
1514
1515 #ifdef CONFIG_SYSCTL
1516 static int
1517 proc_do_defense_mode(ctl_table *table, int write,
1518                      void __user *buffer, size_t *lenp, loff_t *ppos)
1519 {
1520         struct net *net = current->nsproxy->net_ns;
1521         int *valp = table->data;
1522         int val = *valp;
1523         int rc;
1524
1525         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1526         if (write && (*valp != val)) {
1527                 if ((*valp < 0) || (*valp > 3)) {
1528                         /* Restore the correct value */
1529                         *valp = val;
1530                 } else {
1531                         update_defense_level(net_ipvs(net));
1532                 }
1533         }
1534         return rc;
1535 }
1536
1537 static int
1538 proc_do_sync_threshold(ctl_table *table, int write,
1539                        void __user *buffer, size_t *lenp, loff_t *ppos)
1540 {
1541         int *valp = table->data;
1542         int val[2];
1543         int rc;
1544
1545         /* backup the value first */
1546         memcpy(val, valp, sizeof(val));
1547
1548         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1549         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550                 /* Restore the correct value */
1551                 memcpy(valp, val, sizeof(val));
1552         }
1553         return rc;
1554 }
1555
1556 static int
1557 proc_do_sync_mode(ctl_table *table, int write,
1558                      void __user *buffer, size_t *lenp, loff_t *ppos)
1559 {
1560         int *valp = table->data;
1561         int val = *valp;
1562         int rc;
1563
1564         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565         if (write && (*valp != val)) {
1566                 if ((*valp < 0) || (*valp > 1)) {
1567                         /* Restore the correct value */
1568                         *valp = val;
1569                 } else {
1570                         struct net *net = current->nsproxy->net_ns;
1571                         ip_vs_sync_switch_mode(net, val);
1572                 }
1573         }
1574         return rc;
1575 }
1576
1577 /*
1578  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1579  *      Do not change order or insert new entries without
1580  *      align with netns init in __ip_vs_control_init()
1581  */
1582
1583 static struct ctl_table vs_vars[] = {
1584         {
1585                 .procname       = "amemthresh",
1586                 .maxlen         = sizeof(int),
1587                 .mode           = 0644,
1588                 .proc_handler   = proc_dointvec,
1589         },
1590         {
1591                 .procname       = "am_droprate",
1592                 .maxlen         = sizeof(int),
1593                 .mode           = 0644,
1594                 .proc_handler   = proc_dointvec,
1595         },
1596         {
1597                 .procname       = "drop_entry",
1598                 .maxlen         = sizeof(int),
1599                 .mode           = 0644,
1600                 .proc_handler   = proc_do_defense_mode,
1601         },
1602         {
1603                 .procname       = "drop_packet",
1604                 .maxlen         = sizeof(int),
1605                 .mode           = 0644,
1606                 .proc_handler   = proc_do_defense_mode,
1607         },
1608 #ifdef CONFIG_IP_VS_NFCT
1609         {
1610                 .procname       = "conntrack",
1611                 .maxlen         = sizeof(int),
1612                 .mode           = 0644,
1613                 .proc_handler   = &proc_dointvec,
1614         },
1615 #endif
1616         {
1617                 .procname       = "secure_tcp",
1618                 .maxlen         = sizeof(int),
1619                 .mode           = 0644,
1620                 .proc_handler   = proc_do_defense_mode,
1621         },
1622         {
1623                 .procname       = "snat_reroute",
1624                 .maxlen         = sizeof(int),
1625                 .mode           = 0644,
1626                 .proc_handler   = &proc_dointvec,
1627         },
1628         {
1629                 .procname       = "sync_version",
1630                 .maxlen         = sizeof(int),
1631                 .mode           = 0644,
1632                 .proc_handler   = &proc_do_sync_mode,
1633         },
1634         {
1635                 .procname       = "cache_bypass",
1636                 .maxlen         = sizeof(int),
1637                 .mode           = 0644,
1638                 .proc_handler   = proc_dointvec,
1639         },
1640         {
1641                 .procname       = "expire_nodest_conn",
1642                 .maxlen         = sizeof(int),
1643                 .mode           = 0644,
1644                 .proc_handler   = proc_dointvec,
1645         },
1646         {
1647                 .procname       = "expire_quiescent_template",
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec,
1651         },
1652         {
1653                 .procname       = "sync_threshold",
1654                 .maxlen         =
1655                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_do_sync_threshold,
1658         },
1659         {
1660                 .procname       = "nat_icmp_send",
1661                 .maxlen         = sizeof(int),
1662                 .mode           = 0644,
1663                 .proc_handler   = proc_dointvec,
1664         },
1665 #ifdef CONFIG_IP_VS_DEBUG
1666         {
1667                 .procname       = "debug_level",
1668                 .data           = &sysctl_ip_vs_debug_level,
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec,
1672         },
1673 #endif
1674 #if 0
1675         {
1676                 .procname       = "timeout_established",
1677                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678                 .maxlen         = sizeof(int),
1679                 .mode           = 0644,
1680                 .proc_handler   = proc_dointvec_jiffies,
1681         },
1682         {
1683                 .procname       = "timeout_synsent",
1684                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685                 .maxlen         = sizeof(int),
1686                 .mode           = 0644,
1687                 .proc_handler   = proc_dointvec_jiffies,
1688         },
1689         {
1690                 .procname       = "timeout_synrecv",
1691                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692                 .maxlen         = sizeof(int),
1693                 .mode           = 0644,
1694                 .proc_handler   = proc_dointvec_jiffies,
1695         },
1696         {
1697                 .procname       = "timeout_finwait",
1698                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699                 .maxlen         = sizeof(int),
1700                 .mode           = 0644,
1701                 .proc_handler   = proc_dointvec_jiffies,
1702         },
1703         {
1704                 .procname       = "timeout_timewait",
1705                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706                 .maxlen         = sizeof(int),
1707                 .mode           = 0644,
1708                 .proc_handler   = proc_dointvec_jiffies,
1709         },
1710         {
1711                 .procname       = "timeout_close",
1712                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713                 .maxlen         = sizeof(int),
1714                 .mode           = 0644,
1715                 .proc_handler   = proc_dointvec_jiffies,
1716         },
1717         {
1718                 .procname       = "timeout_closewait",
1719                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720                 .maxlen         = sizeof(int),
1721                 .mode           = 0644,
1722                 .proc_handler   = proc_dointvec_jiffies,
1723         },
1724         {
1725                 .procname       = "timeout_lastack",
1726                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727                 .maxlen         = sizeof(int),
1728                 .mode           = 0644,
1729                 .proc_handler   = proc_dointvec_jiffies,
1730         },
1731         {
1732                 .procname       = "timeout_listen",
1733                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734                 .maxlen         = sizeof(int),
1735                 .mode           = 0644,
1736                 .proc_handler   = proc_dointvec_jiffies,
1737         },
1738         {
1739                 .procname       = "timeout_synack",
1740                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741                 .maxlen         = sizeof(int),
1742                 .mode           = 0644,
1743                 .proc_handler   = proc_dointvec_jiffies,
1744         },
1745         {
1746                 .procname       = "timeout_udp",
1747                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748                 .maxlen         = sizeof(int),
1749                 .mode           = 0644,
1750                 .proc_handler   = proc_dointvec_jiffies,
1751         },
1752         {
1753                 .procname       = "timeout_icmp",
1754                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755                 .maxlen         = sizeof(int),
1756                 .mode           = 0644,
1757                 .proc_handler   = proc_dointvec_jiffies,
1758         },
1759 #endif
1760         { }
1761 };
1762
1763 const struct ctl_path net_vs_ctl_path[] = {
1764         { .procname = "net", },
1765         { .procname = "ipv4", },
1766         { .procname = "vs", },
1767         { }
1768 };
1769 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1770 #endif
1771
1772 #ifdef CONFIG_PROC_FS
1773
1774 struct ip_vs_iter {
1775         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1776         struct list_head *table;
1777         int bucket;
1778 };
1779
1780 /*
1781  *      Write the contents of the VS rule table to a PROCfs file.
1782  *      (It is kept just for backward compatibility)
1783  */
1784 static inline const char *ip_vs_fwd_name(unsigned flags)
1785 {
1786         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1787         case IP_VS_CONN_F_LOCALNODE:
1788                 return "Local";
1789         case IP_VS_CONN_F_TUNNEL:
1790                 return "Tunnel";
1791         case IP_VS_CONN_F_DROUTE:
1792                 return "Route";
1793         default:
1794                 return "Masq";
1795         }
1796 }
1797
1798
1799 /* Get the Nth entry in the two lists */
1800 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1801 {
1802         struct net *net = seq_file_net(seq);
1803         struct ip_vs_iter *iter = seq->private;
1804         int idx;
1805         struct ip_vs_service *svc;
1806
1807         /* look in hash by protocol */
1808         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1810                         if (net_eq(svc->net, net) && pos-- == 0) {
1811                                 iter->table = ip_vs_svc_table;
1812                                 iter->bucket = idx;
1813                                 return svc;
1814                         }
1815                 }
1816         }
1817
1818         /* keep looking in fwmark */
1819         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1821                         if (net_eq(svc->net, net) && pos-- == 0) {
1822                                 iter->table = ip_vs_svc_fwm_table;
1823                                 iter->bucket = idx;
1824                                 return svc;
1825                         }
1826                 }
1827         }
1828
1829         return NULL;
1830 }
1831
1832 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1833 __acquires(__ip_vs_svc_lock)
1834 {
1835
1836         read_lock_bh(&__ip_vs_svc_lock);
1837         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1838 }
1839
1840
1841 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1842 {
1843         struct list_head *e;
1844         struct ip_vs_iter *iter;
1845         struct ip_vs_service *svc;
1846
1847         ++*pos;
1848         if (v == SEQ_START_TOKEN)
1849                 return ip_vs_info_array(seq,0);
1850
1851         svc = v;
1852         iter = seq->private;
1853
1854         if (iter->table == ip_vs_svc_table) {
1855                 /* next service in table hashed by protocol */
1856                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1857                         return list_entry(e, struct ip_vs_service, s_list);
1858
1859
1860                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1861                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1862                                             s_list) {
1863                                 return svc;
1864                         }
1865                 }
1866
1867                 iter->table = ip_vs_svc_fwm_table;
1868                 iter->bucket = -1;
1869                 goto scan_fwmark;
1870         }
1871
1872         /* next service in hashed by fwmark */
1873         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1874                 return list_entry(e, struct ip_vs_service, f_list);
1875
1876  scan_fwmark:
1877         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1878                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1879                                     f_list)
1880                         return svc;
1881         }
1882
1883         return NULL;
1884 }
1885
1886 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1887 __releases(__ip_vs_svc_lock)
1888 {
1889         read_unlock_bh(&__ip_vs_svc_lock);
1890 }
1891
1892
1893 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1894 {
1895         if (v == SEQ_START_TOKEN) {
1896                 seq_printf(seq,
1897                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1898                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1899                 seq_puts(seq,
1900                          "Prot LocalAddress:Port Scheduler Flags\n");
1901                 seq_puts(seq,
1902                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1903         } else {
1904                 const struct ip_vs_service *svc = v;
1905                 const struct ip_vs_iter *iter = seq->private;
1906                 const struct ip_vs_dest *dest;
1907
1908                 if (iter->table == ip_vs_svc_table) {
1909 #ifdef CONFIG_IP_VS_IPV6
1910                         if (svc->af == AF_INET6)
1911                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1912                                            ip_vs_proto_name(svc->protocol),
1913                                            &svc->addr.in6,
1914                                            ntohs(svc->port),
1915                                            svc->scheduler->name);
1916                         else
1917 #endif
1918                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1919                                            ip_vs_proto_name(svc->protocol),
1920                                            ntohl(svc->addr.ip),
1921                                            ntohs(svc->port),
1922                                            svc->scheduler->name,
1923                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1924                 } else {
1925                         seq_printf(seq, "FWM  %08X %s %s",
1926                                    svc->fwmark, svc->scheduler->name,
1927                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1928                 }
1929
1930                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1931                         seq_printf(seq, "persistent %d %08X\n",
1932                                 svc->timeout,
1933                                 ntohl(svc->netmask));
1934                 else
1935                         seq_putc(seq, '\n');
1936
1937                 list_for_each_entry(dest, &svc->destinations, n_list) {
1938 #ifdef CONFIG_IP_VS_IPV6
1939                         if (dest->af == AF_INET6)
1940                                 seq_printf(seq,
1941                                            "  -> [%pI6]:%04X"
1942                                            "      %-7s %-6d %-10d %-10d\n",
1943                                            &dest->addr.in6,
1944                                            ntohs(dest->port),
1945                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946                                            atomic_read(&dest->weight),
1947                                            atomic_read(&dest->activeconns),
1948                                            atomic_read(&dest->inactconns));
1949                         else
1950 #endif
1951                                 seq_printf(seq,
1952                                            "  -> %08X:%04X      "
1953                                            "%-7s %-6d %-10d %-10d\n",
1954                                            ntohl(dest->addr.ip),
1955                                            ntohs(dest->port),
1956                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957                                            atomic_read(&dest->weight),
1958                                            atomic_read(&dest->activeconns),
1959                                            atomic_read(&dest->inactconns));
1960
1961                 }
1962         }
1963         return 0;
1964 }
1965
1966 static const struct seq_operations ip_vs_info_seq_ops = {
1967         .start = ip_vs_info_seq_start,
1968         .next  = ip_vs_info_seq_next,
1969         .stop  = ip_vs_info_seq_stop,
1970         .show  = ip_vs_info_seq_show,
1971 };
1972
1973 static int ip_vs_info_open(struct inode *inode, struct file *file)
1974 {
1975         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1976                         sizeof(struct ip_vs_iter));
1977 }
1978
1979 static const struct file_operations ip_vs_info_fops = {
1980         .owner   = THIS_MODULE,
1981         .open    = ip_vs_info_open,
1982         .read    = seq_read,
1983         .llseek  = seq_lseek,
1984         .release = seq_release_private,
1985 };
1986
1987 #endif
1988
1989 #ifdef CONFIG_PROC_FS
1990 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1991 {
1992         struct net *net = seq_file_single_net(seq);
1993         struct ip_vs_stats_user show;
1994
1995 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1996         seq_puts(seq,
1997                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1998         seq_printf(seq,
1999                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2000
2001         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2002         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2003                    show.inpkts, show.outpkts,
2004                    (unsigned long long) show.inbytes,
2005                    (unsigned long long) show.outbytes);
2006
2007 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2008         seq_puts(seq,
2009                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2010         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2011                         show.cps, show.inpps, show.outpps,
2012                         show.inbps, show.outbps);
2013
2014         return 0;
2015 }
2016
2017 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2018 {
2019         return single_open_net(inode, file, ip_vs_stats_show);
2020 }
2021
2022 static const struct file_operations ip_vs_stats_fops = {
2023         .owner = THIS_MODULE,
2024         .open = ip_vs_stats_seq_open,
2025         .read = seq_read,
2026         .llseek = seq_lseek,
2027         .release = single_release,
2028 };
2029
2030 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2031 {
2032         struct net *net = seq_file_single_net(seq);
2033         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2035         struct ip_vs_stats_user rates;
2036         int i;
2037
2038 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2039         seq_puts(seq,
2040                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2041         seq_printf(seq,
2042                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2043
2044         for_each_possible_cpu(i) {
2045                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046                 unsigned int start;
2047                 __u64 inbytes, outbytes;
2048
2049                 do {
2050                         start = u64_stats_fetch_begin_bh(&u->syncp);
2051                         inbytes = u->ustats.inbytes;
2052                         outbytes = u->ustats.outbytes;
2053                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054
2055                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2056                            i, u->ustats.conns, u->ustats.inpkts,
2057                            u->ustats.outpkts, (__u64)inbytes,
2058                            (__u64)outbytes);
2059         }
2060
2061         spin_lock_bh(&tot_stats->lock);
2062
2063         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2064                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2065                    tot_stats->ustats.outpkts,
2066                    (unsigned long long) tot_stats->ustats.inbytes,
2067                    (unsigned long long) tot_stats->ustats.outbytes);
2068
2069         ip_vs_read_estimator(&rates, tot_stats);
2070
2071         spin_unlock_bh(&tot_stats->lock);
2072
2073 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2074         seq_puts(seq,
2075                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2076         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2077                         rates.cps,
2078                         rates.inpps,
2079                         rates.outpps,
2080                         rates.inbps,
2081                         rates.outbps);
2082
2083         return 0;
2084 }
2085
2086 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2087 {
2088         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2089 }
2090
2091 static const struct file_operations ip_vs_stats_percpu_fops = {
2092         .owner = THIS_MODULE,
2093         .open = ip_vs_stats_percpu_seq_open,
2094         .read = seq_read,
2095         .llseek = seq_lseek,
2096         .release = single_release,
2097 };
2098 #endif
2099
2100 /*
2101  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2102  */
2103 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2104 {
2105 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2106         struct ip_vs_proto_data *pd;
2107 #endif
2108
2109         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2110                   u->tcp_timeout,
2111                   u->tcp_fin_timeout,
2112                   u->udp_timeout);
2113
2114 #ifdef CONFIG_IP_VS_PROTO_TCP
2115         if (u->tcp_timeout) {
2116                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2117                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2118                         = u->tcp_timeout * HZ;
2119         }
2120
2121         if (u->tcp_fin_timeout) {
2122                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2124                         = u->tcp_fin_timeout * HZ;
2125         }
2126 #endif
2127
2128 #ifdef CONFIG_IP_VS_PROTO_UDP
2129         if (u->udp_timeout) {
2130                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2131                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2132                         = u->udp_timeout * HZ;
2133         }
2134 #endif
2135         return 0;
2136 }
2137
2138
2139 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2140 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2141 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2142                                  sizeof(struct ip_vs_dest_user))
2143 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2144 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2145 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2146
2147 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2148         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2149         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2150         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2151         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2152         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2153         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2154         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2155         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2156         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2157         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2158         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2159 };
2160
2161 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2162                                   struct ip_vs_service_user *usvc_compat)
2163 {
2164         memset(usvc, 0, sizeof(*usvc));
2165
2166         usvc->af                = AF_INET;
2167         usvc->protocol          = usvc_compat->protocol;
2168         usvc->addr.ip           = usvc_compat->addr;
2169         usvc->port              = usvc_compat->port;
2170         usvc->fwmark            = usvc_compat->fwmark;
2171
2172         /* Deep copy of sched_name is not needed here */
2173         usvc->sched_name        = usvc_compat->sched_name;
2174
2175         usvc->flags             = usvc_compat->flags;
2176         usvc->timeout           = usvc_compat->timeout;
2177         usvc->netmask           = usvc_compat->netmask;
2178 }
2179
2180 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2181                                    struct ip_vs_dest_user *udest_compat)
2182 {
2183         memset(udest, 0, sizeof(*udest));
2184
2185         udest->addr.ip          = udest_compat->addr;
2186         udest->port             = udest_compat->port;
2187         udest->conn_flags       = udest_compat->conn_flags;
2188         udest->weight           = udest_compat->weight;
2189         udest->u_threshold      = udest_compat->u_threshold;
2190         udest->l_threshold      = udest_compat->l_threshold;
2191 }
2192
2193 static int
2194 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2195 {
2196         struct net *net = sock_net(sk);
2197         int ret;
2198         unsigned char arg[MAX_ARG_LEN];
2199         struct ip_vs_service_user *usvc_compat;
2200         struct ip_vs_service_user_kern usvc;
2201         struct ip_vs_service *svc;
2202         struct ip_vs_dest_user *udest_compat;
2203         struct ip_vs_dest_user_kern udest;
2204
2205         if (!capable(CAP_NET_ADMIN))
2206                 return -EPERM;
2207
2208         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2209                 return -EINVAL;
2210         if (len < 0 || len >  MAX_ARG_LEN)
2211                 return -EINVAL;
2212         if (len != set_arglen[SET_CMDID(cmd)]) {
2213                 pr_err("set_ctl: len %u != %u\n",
2214                        len, set_arglen[SET_CMDID(cmd)]);
2215                 return -EINVAL;
2216         }
2217
2218         if (copy_from_user(arg, user, len) != 0)
2219                 return -EFAULT;
2220
2221         /* increase the module use count */
2222         ip_vs_use_count_inc();
2223
2224         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2225                 ret = -ERESTARTSYS;
2226                 goto out_dec;
2227         }
2228
2229         if (cmd == IP_VS_SO_SET_FLUSH) {
2230                 /* Flush the virtual service */
2231                 ret = ip_vs_flush(net);
2232                 goto out_unlock;
2233         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2234                 /* Set timeout values for (tcp tcpfin udp) */
2235                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2236                 goto out_unlock;
2237         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2238                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2239                 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2240                                         dm->syncid);
2241                 goto out_unlock;
2242         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2243                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2244                 ret = stop_sync_thread(net, dm->state);
2245                 goto out_unlock;
2246         }
2247
2248         usvc_compat = (struct ip_vs_service_user *)arg;
2249         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2250
2251         /* We only use the new structs internally, so copy userspace compat
2252          * structs to extended internal versions */
2253         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2254         ip_vs_copy_udest_compat(&udest, udest_compat);
2255
2256         if (cmd == IP_VS_SO_SET_ZERO) {
2257                 /* if no service address is set, zero counters in all */
2258                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2259                         ret = ip_vs_zero_all(net);
2260                         goto out_unlock;
2261                 }
2262         }
2263
2264         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2265         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2266             usvc.protocol != IPPROTO_SCTP) {
2267                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2268                        usvc.protocol, &usvc.addr.ip,
2269                        ntohs(usvc.port), usvc.sched_name);
2270                 ret = -EFAULT;
2271                 goto out_unlock;
2272         }
2273
2274         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2275         if (usvc.fwmark == 0)
2276                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2277                                            &usvc.addr, usvc.port);
2278         else
2279                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2280
2281         if (cmd != IP_VS_SO_SET_ADD
2282             && (svc == NULL || svc->protocol != usvc.protocol)) {
2283                 ret = -ESRCH;
2284                 goto out_unlock;
2285         }
2286
2287         switch (cmd) {
2288         case IP_VS_SO_SET_ADD:
2289                 if (svc != NULL)
2290                         ret = -EEXIST;
2291                 else
2292                         ret = ip_vs_add_service(net, &usvc, &svc);
2293                 break;
2294         case IP_VS_SO_SET_EDIT:
2295                 ret = ip_vs_edit_service(svc, &usvc);
2296                 break;
2297         case IP_VS_SO_SET_DEL:
2298                 ret = ip_vs_del_service(svc);
2299                 if (!ret)
2300                         goto out_unlock;
2301                 break;
2302         case IP_VS_SO_SET_ZERO:
2303                 ret = ip_vs_zero_service(svc);
2304                 break;
2305         case IP_VS_SO_SET_ADDDEST:
2306                 ret = ip_vs_add_dest(svc, &udest);
2307                 break;
2308         case IP_VS_SO_SET_EDITDEST:
2309                 ret = ip_vs_edit_dest(svc, &udest);
2310                 break;
2311         case IP_VS_SO_SET_DELDEST:
2312                 ret = ip_vs_del_dest(svc, &udest);
2313                 break;
2314         default:
2315                 ret = -EINVAL;
2316         }
2317
2318   out_unlock:
2319         mutex_unlock(&__ip_vs_mutex);
2320   out_dec:
2321         /* decrease the module use count */
2322         ip_vs_use_count_dec();
2323
2324         return ret;
2325 }
2326
2327
2328 static void
2329 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2330 {
2331         dst->protocol = src->protocol;
2332         dst->addr = src->addr.ip;
2333         dst->port = src->port;
2334         dst->fwmark = src->fwmark;
2335         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2336         dst->flags = src->flags;
2337         dst->timeout = src->timeout / HZ;
2338         dst->netmask = src->netmask;
2339         dst->num_dests = src->num_dests;
2340         ip_vs_copy_stats(&dst->stats, &src->stats);
2341 }
2342
2343 static inline int
2344 __ip_vs_get_service_entries(struct net *net,
2345                             const struct ip_vs_get_services *get,
2346                             struct ip_vs_get_services __user *uptr)
2347 {
2348         int idx, count=0;
2349         struct ip_vs_service *svc;
2350         struct ip_vs_service_entry entry;
2351         int ret = 0;
2352
2353         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2354                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2355                         /* Only expose IPv4 entries to old interface */
2356                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2357                                 continue;
2358
2359                         if (count >= get->num_services)
2360                                 goto out;
2361                         memset(&entry, 0, sizeof(entry));
2362                         ip_vs_copy_service(&entry, svc);
2363                         if (copy_to_user(&uptr->entrytable[count],
2364                                          &entry, sizeof(entry))) {
2365                                 ret = -EFAULT;
2366                                 goto out;
2367                         }
2368                         count++;
2369                 }
2370         }
2371
2372         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2373                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2374                         /* Only expose IPv4 entries to old interface */
2375                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2376                                 continue;
2377
2378                         if (count >= get->num_services)
2379                                 goto out;
2380                         memset(&entry, 0, sizeof(entry));
2381                         ip_vs_copy_service(&entry, svc);
2382                         if (copy_to_user(&uptr->entrytable[count],
2383                                          &entry, sizeof(entry))) {
2384                                 ret = -EFAULT;
2385                                 goto out;
2386                         }
2387                         count++;
2388                 }
2389         }
2390   out:
2391         return ret;
2392 }
2393
2394 static inline int
2395 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2396                          struct ip_vs_get_dests __user *uptr)
2397 {
2398         struct ip_vs_service *svc;
2399         union nf_inet_addr addr = { .ip = get->addr };
2400         int ret = 0;
2401
2402         if (get->fwmark)
2403                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2404         else
2405                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2406                                            get->port);
2407
2408         if (svc) {
2409                 int count = 0;
2410                 struct ip_vs_dest *dest;
2411                 struct ip_vs_dest_entry entry;
2412
2413                 list_for_each_entry(dest, &svc->destinations, n_list) {
2414                         if (count >= get->num_dests)
2415                                 break;
2416
2417                         entry.addr = dest->addr.ip;
2418                         entry.port = dest->port;
2419                         entry.conn_flags = atomic_read(&dest->conn_flags);
2420                         entry.weight = atomic_read(&dest->weight);
2421                         entry.u_threshold = dest->u_threshold;
2422                         entry.l_threshold = dest->l_threshold;
2423                         entry.activeconns = atomic_read(&dest->activeconns);
2424                         entry.inactconns = atomic_read(&dest->inactconns);
2425                         entry.persistconns = atomic_read(&dest->persistconns);
2426                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2427                         if (copy_to_user(&uptr->entrytable[count],
2428                                          &entry, sizeof(entry))) {
2429                                 ret = -EFAULT;
2430                                 break;
2431                         }
2432                         count++;
2433                 }
2434         } else
2435                 ret = -ESRCH;
2436         return ret;
2437 }
2438
2439 static inline void
2440 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2441 {
2442 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2443         struct ip_vs_proto_data *pd;
2444 #endif
2445
2446 #ifdef CONFIG_IP_VS_PROTO_TCP
2447         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2448         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2449         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2450 #endif
2451 #ifdef CONFIG_IP_VS_PROTO_UDP
2452         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2453         u->udp_timeout =
2454                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2455 #endif
2456 }
2457
2458
2459 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2460 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2461 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2462 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2463 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2464 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2465 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2466
2467 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2468         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2469         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2470         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2471         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2472         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2473         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2474         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2475 };
2476
2477 static int
2478 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2479 {
2480         unsigned char arg[128];
2481         int ret = 0;
2482         unsigned int copylen;
2483         struct net *net = sock_net(sk);
2484         struct netns_ipvs *ipvs = net_ipvs(net);
2485
2486         BUG_ON(!net);
2487         if (!capable(CAP_NET_ADMIN))
2488                 return -EPERM;
2489
2490         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2491                 return -EINVAL;
2492
2493         if (*len < get_arglen[GET_CMDID(cmd)]) {
2494                 pr_err("get_ctl: len %u < %u\n",
2495                        *len, get_arglen[GET_CMDID(cmd)]);
2496                 return -EINVAL;
2497         }
2498
2499         copylen = get_arglen[GET_CMDID(cmd)];
2500         if (copylen > 128)
2501                 return -EINVAL;
2502
2503         if (copy_from_user(arg, user, copylen) != 0)
2504                 return -EFAULT;
2505
2506         if (mutex_lock_interruptible(&__ip_vs_mutex))
2507                 return -ERESTARTSYS;
2508
2509         switch (cmd) {
2510         case IP_VS_SO_GET_VERSION:
2511         {
2512                 char buf[64];
2513
2514                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2515                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2516                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2517                         ret = -EFAULT;
2518                         goto out;
2519                 }
2520                 *len = strlen(buf)+1;
2521         }
2522         break;
2523
2524         case IP_VS_SO_GET_INFO:
2525         {
2526                 struct ip_vs_getinfo info;
2527                 info.version = IP_VS_VERSION_CODE;
2528                 info.size = ip_vs_conn_tab_size;
2529                 info.num_services = ipvs->num_services;
2530                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2531                         ret = -EFAULT;
2532         }
2533         break;
2534
2535         case IP_VS_SO_GET_SERVICES:
2536         {
2537                 struct ip_vs_get_services *get;
2538                 int size;
2539
2540                 get = (struct ip_vs_get_services *)arg;
2541                 size = sizeof(*get) +
2542                         sizeof(struct ip_vs_service_entry) * get->num_services;
2543                 if (*len != size) {
2544                         pr_err("length: %u != %u\n", *len, size);
2545                         ret = -EINVAL;
2546                         goto out;
2547                 }
2548                 ret = __ip_vs_get_service_entries(net, get, user);
2549         }
2550         break;
2551
2552         case IP_VS_SO_GET_SERVICE:
2553         {
2554                 struct ip_vs_service_entry *entry;
2555                 struct ip_vs_service *svc;
2556                 union nf_inet_addr addr;
2557
2558                 entry = (struct ip_vs_service_entry *)arg;
2559                 addr.ip = entry->addr;
2560                 if (entry->fwmark)
2561                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2562                 else
2563                         svc = __ip_vs_service_find(net, AF_INET,
2564                                                    entry->protocol, &addr,
2565                                                    entry->port);
2566                 if (svc) {
2567                         ip_vs_copy_service(entry, svc);
2568                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2569                                 ret = -EFAULT;
2570                 } else
2571                         ret = -ESRCH;
2572         }
2573         break;
2574
2575         case IP_VS_SO_GET_DESTS:
2576         {
2577                 struct ip_vs_get_dests *get;
2578                 int size;
2579
2580                 get = (struct ip_vs_get_dests *)arg;
2581                 size = sizeof(*get) +
2582                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2583                 if (*len != size) {
2584                         pr_err("length: %u != %u\n", *len, size);
2585                         ret = -EINVAL;
2586                         goto out;
2587                 }
2588                 ret = __ip_vs_get_dest_entries(net, get, user);
2589         }
2590         break;
2591
2592         case IP_VS_SO_GET_TIMEOUT:
2593         {
2594                 struct ip_vs_timeout_user t;
2595
2596                 __ip_vs_get_timeouts(net, &t);
2597                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2598                         ret = -EFAULT;
2599         }
2600         break;
2601
2602         case IP_VS_SO_GET_DAEMON:
2603         {
2604                 struct ip_vs_daemon_user d[2];
2605
2606                 memset(&d, 0, sizeof(d));
2607                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2608                         d[0].state = IP_VS_STATE_MASTER;
2609                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2610                                 sizeof(d[0].mcast_ifn));
2611                         d[0].syncid = ipvs->master_syncid;
2612                 }
2613                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2614                         d[1].state = IP_VS_STATE_BACKUP;
2615                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2616                                 sizeof(d[1].mcast_ifn));
2617                         d[1].syncid = ipvs->backup_syncid;
2618                 }
2619                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2620                         ret = -EFAULT;
2621         }
2622         break;
2623
2624         default:
2625                 ret = -EINVAL;
2626         }
2627
2628   out:
2629         mutex_unlock(&__ip_vs_mutex);
2630         return ret;
2631 }
2632
2633
2634 static struct nf_sockopt_ops ip_vs_sockopts = {
2635         .pf             = PF_INET,
2636         .set_optmin     = IP_VS_BASE_CTL,
2637         .set_optmax     = IP_VS_SO_SET_MAX+1,
2638         .set            = do_ip_vs_set_ctl,
2639         .get_optmin     = IP_VS_BASE_CTL,
2640         .get_optmax     = IP_VS_SO_GET_MAX+1,
2641         .get            = do_ip_vs_get_ctl,
2642         .owner          = THIS_MODULE,
2643 };
2644
2645 /*
2646  * Generic Netlink interface
2647  */
2648
2649 /* IPVS genetlink family */
2650 static struct genl_family ip_vs_genl_family = {
2651         .id             = GENL_ID_GENERATE,
2652         .hdrsize        = 0,
2653         .name           = IPVS_GENL_NAME,
2654         .version        = IPVS_GENL_VERSION,
2655         .maxattr        = IPVS_CMD_MAX,
2656         .netnsok        = true,         /* Make ipvsadm to work on netns */
2657 };
2658
2659 /* Policy used for first-level command attributes */
2660 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2661         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2662         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2663         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2664         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2665         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2666         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2667 };
2668
2669 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2670 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2671         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2672         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2673                                             .len = IP_VS_IFNAME_MAXLEN },
2674         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2675 };
2676
2677 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2678 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2679         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2680         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2681         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2682                                             .len = sizeof(union nf_inet_addr) },
2683         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2684         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2685         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2686                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2687         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2688                                             .len = IP_VS_PENAME_MAXLEN },
2689         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2690                                             .len = sizeof(struct ip_vs_flags) },
2691         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2692         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2693         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2694 };
2695
2696 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2697 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2698         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2699                                             .len = sizeof(union nf_inet_addr) },
2700         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2701         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2702         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2703         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2704         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2705         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2706         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2707         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2708         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2709 };
2710
2711 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2712                                  struct ip_vs_stats *stats)
2713 {
2714         struct ip_vs_stats_user ustats;
2715         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2716         if (!nl_stats)
2717                 return -EMSGSIZE;
2718
2719         ip_vs_copy_stats(&ustats, stats);
2720
2721         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2731
2732         nla_nest_end(skb, nl_stats);
2733
2734         return 0;
2735
2736 nla_put_failure:
2737         nla_nest_cancel(skb, nl_stats);
2738         return -EMSGSIZE;
2739 }
2740
2741 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2742                                    struct ip_vs_service *svc)
2743 {
2744         struct nlattr *nl_service;
2745         struct ip_vs_flags flags = { .flags = svc->flags,
2746                                      .mask = ~0 };
2747
2748         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2749         if (!nl_service)
2750                 return -EMSGSIZE;
2751
2752         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2753
2754         if (svc->fwmark) {
2755                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2756         } else {
2757                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2758                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2759                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2760         }
2761
2762         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2763         if (svc->pe)
2764                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2765         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2766         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2767         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2768
2769         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2770                 goto nla_put_failure;
2771
2772         nla_nest_end(skb, nl_service);
2773
2774         return 0;
2775
2776 nla_put_failure:
2777         nla_nest_cancel(skb, nl_service);
2778         return -EMSGSIZE;
2779 }
2780
2781 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2782                                    struct ip_vs_service *svc,
2783                                    struct netlink_callback *cb)
2784 {
2785         void *hdr;
2786
2787         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2788                           &ip_vs_genl_family, NLM_F_MULTI,
2789                           IPVS_CMD_NEW_SERVICE);
2790         if (!hdr)
2791                 return -EMSGSIZE;
2792
2793         if (ip_vs_genl_fill_service(skb, svc) < 0)
2794                 goto nla_put_failure;
2795
2796         return genlmsg_end(skb, hdr);
2797
2798 nla_put_failure:
2799         genlmsg_cancel(skb, hdr);
2800         return -EMSGSIZE;
2801 }
2802
2803 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2804                                     struct netlink_callback *cb)
2805 {
2806         int idx = 0, i;
2807         int start = cb->args[0];
2808         struct ip_vs_service *svc;
2809         struct net *net = skb_sknet(skb);
2810
2811         mutex_lock(&__ip_vs_mutex);
2812         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2813                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2814                         if (++idx <= start || !net_eq(svc->net, net))
2815                                 continue;
2816                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2817                                 idx--;
2818                                 goto nla_put_failure;
2819                         }
2820                 }
2821         }
2822
2823         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2824                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2825                         if (++idx <= start || !net_eq(svc->net, net))
2826                                 continue;
2827                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2828                                 idx--;
2829                                 goto nla_put_failure;
2830                         }
2831                 }
2832         }
2833
2834 nla_put_failure:
2835         mutex_unlock(&__ip_vs_mutex);
2836         cb->args[0] = idx;
2837
2838         return skb->len;
2839 }
2840
2841 static int ip_vs_genl_parse_service(struct net *net,
2842                                     struct ip_vs_service_user_kern *usvc,
2843                                     struct nlattr *nla, int full_entry,
2844                                     struct ip_vs_service **ret_svc)
2845 {
2846         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2847         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2848         struct ip_vs_service *svc;
2849
2850         /* Parse mandatory identifying service fields first */
2851         if (nla == NULL ||
2852             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2853                 return -EINVAL;
2854
2855         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2856         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2857         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2858         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2859         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2860
2861         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2862                 return -EINVAL;
2863
2864         memset(usvc, 0, sizeof(*usvc));
2865
2866         usvc->af = nla_get_u16(nla_af);
2867 #ifdef CONFIG_IP_VS_IPV6
2868         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2869 #else
2870         if (usvc->af != AF_INET)
2871 #endif
2872                 return -EAFNOSUPPORT;
2873
2874         if (nla_fwmark) {
2875                 usvc->protocol = IPPROTO_TCP;
2876                 usvc->fwmark = nla_get_u32(nla_fwmark);
2877         } else {
2878                 usvc->protocol = nla_get_u16(nla_protocol);
2879                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2880                 usvc->port = nla_get_u16(nla_port);
2881                 usvc->fwmark = 0;
2882         }
2883
2884         if (usvc->fwmark)
2885                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2886         else
2887                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2888                                            &usvc->addr, usvc->port);
2889         *ret_svc = svc;
2890
2891         /* If a full entry was requested, check for the additional fields */
2892         if (full_entry) {
2893                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2894                               *nla_netmask;
2895                 struct ip_vs_flags flags;
2896
2897                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2898                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2899                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2900                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2901                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2902
2903                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2904                         return -EINVAL;
2905
2906                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2907
2908                 /* prefill flags from service if it already exists */
2909                 if (svc)
2910                         usvc->flags = svc->flags;
2911
2912                 /* set new flags from userland */
2913                 usvc->flags = (usvc->flags & ~flags.mask) |
2914                               (flags.flags & flags.mask);
2915                 usvc->sched_name = nla_data(nla_sched);
2916                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2917                 usvc->timeout = nla_get_u32(nla_timeout);
2918                 usvc->netmask = nla_get_u32(nla_netmask);
2919         }
2920
2921         return 0;
2922 }
2923
2924 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2925                                                      struct nlattr *nla)
2926 {
2927         struct ip_vs_service_user_kern usvc;
2928         struct ip_vs_service *svc;
2929         int ret;
2930
2931         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2932         return ret ? ERR_PTR(ret) : svc;
2933 }
2934
2935 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2936 {
2937         struct nlattr *nl_dest;
2938
2939         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2940         if (!nl_dest)
2941                 return -EMSGSIZE;
2942
2943         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2944         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2945
2946         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2947                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2948         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2949         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2950         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2951         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2952                     atomic_read(&dest->activeconns));
2953         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2954                     atomic_read(&dest->inactconns));
2955         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2956                     atomic_read(&dest->persistconns));
2957
2958         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2959                 goto nla_put_failure;
2960
2961         nla_nest_end(skb, nl_dest);
2962
2963         return 0;
2964
2965 nla_put_failure:
2966         nla_nest_cancel(skb, nl_dest);
2967         return -EMSGSIZE;
2968 }
2969
2970 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2971                                 struct netlink_callback *cb)
2972 {
2973         void *hdr;
2974
2975         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2976                           &ip_vs_genl_family, NLM_F_MULTI,
2977                           IPVS_CMD_NEW_DEST);
2978         if (!hdr)
2979                 return -EMSGSIZE;
2980
2981         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2982                 goto nla_put_failure;
2983
2984         return genlmsg_end(skb, hdr);
2985
2986 nla_put_failure:
2987         genlmsg_cancel(skb, hdr);
2988         return -EMSGSIZE;
2989 }
2990
2991 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2992                                  struct netlink_callback *cb)
2993 {
2994         int idx = 0;
2995         int start = cb->args[0];
2996         struct ip_vs_service *svc;
2997         struct ip_vs_dest *dest;
2998         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2999         struct net *net = skb_sknet(skb);
3000
3001         mutex_lock(&__ip_vs_mutex);
3002
3003         /* Try to find the service for which to dump destinations */
3004         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3005                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3006                 goto out_err;
3007
3008
3009         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3010         if (IS_ERR(svc) || svc == NULL)
3011                 goto out_err;
3012
3013         /* Dump the destinations */
3014         list_for_each_entry(dest, &svc->destinations, n_list) {
3015                 if (++idx <= start)
3016                         continue;
3017                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3018                         idx--;
3019                         goto nla_put_failure;
3020                 }
3021         }
3022
3023 nla_put_failure:
3024         cb->args[0] = idx;
3025
3026 out_err:
3027         mutex_unlock(&__ip_vs_mutex);
3028
3029         return skb->len;
3030 }
3031
3032 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3033                                  struct nlattr *nla, int full_entry)
3034 {
3035         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3036         struct nlattr *nla_addr, *nla_port;
3037
3038         /* Parse mandatory identifying destination fields first */
3039         if (nla == NULL ||
3040             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3041                 return -EINVAL;
3042
3043         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3044         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3045
3046         if (!(nla_addr && nla_port))
3047                 return -EINVAL;
3048
3049         memset(udest, 0, sizeof(*udest));
3050
3051         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3052         udest->port = nla_get_u16(nla_port);
3053
3054         /* If a full entry was requested, check for the additional fields */
3055         if (full_entry) {
3056                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3057                               *nla_l_thresh;
3058
3059                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3060                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3061                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3062                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3063
3064                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3065                         return -EINVAL;
3066
3067                 udest->conn_flags = nla_get_u32(nla_fwd)
3068                                     & IP_VS_CONN_F_FWD_MASK;
3069                 udest->weight = nla_get_u32(nla_weight);
3070                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3071                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3072         }
3073
3074         return 0;
3075 }
3076
3077 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3078                                   const char *mcast_ifn, __be32 syncid)
3079 {
3080         struct nlattr *nl_daemon;
3081
3082         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3083         if (!nl_daemon)
3084                 return -EMSGSIZE;
3085
3086         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3087         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3088         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3089
3090         nla_nest_end(skb, nl_daemon);
3091
3092         return 0;
3093
3094 nla_put_failure:
3095         nla_nest_cancel(skb, nl_daemon);
3096         return -EMSGSIZE;
3097 }
3098
3099 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3100                                   const char *mcast_ifn, __be32 syncid,
3101                                   struct netlink_callback *cb)
3102 {
3103         void *hdr;
3104         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3105                           &ip_vs_genl_family, NLM_F_MULTI,
3106                           IPVS_CMD_NEW_DAEMON);
3107         if (!hdr)
3108                 return -EMSGSIZE;
3109
3110         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3111                 goto nla_put_failure;
3112
3113         return genlmsg_end(skb, hdr);
3114
3115 nla_put_failure:
3116         genlmsg_cancel(skb, hdr);
3117         return -EMSGSIZE;
3118 }
3119
3120 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3121                                    struct netlink_callback *cb)
3122 {
3123         struct net *net = skb_sknet(skb);
3124         struct netns_ipvs *ipvs = net_ipvs(net);
3125
3126         mutex_lock(&__ip_vs_mutex);
3127         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3128                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3129                                            ipvs->master_mcast_ifn,
3130                                            ipvs->master_syncid, cb) < 0)
3131                         goto nla_put_failure;
3132
3133                 cb->args[0] = 1;
3134         }
3135
3136         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3137                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3138                                            ipvs->backup_mcast_ifn,
3139                                            ipvs->backup_syncid, cb) < 0)
3140                         goto nla_put_failure;
3141
3142                 cb->args[1] = 1;
3143         }
3144
3145 nla_put_failure:
3146         mutex_unlock(&__ip_vs_mutex);
3147
3148         return skb->len;
3149 }
3150
3151 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3152 {
3153         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3154               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3155               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3156                 return -EINVAL;
3157
3158         return start_sync_thread(net,
3159                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3160                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3161                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3162 }
3163
3164 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3165 {
3166         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3167                 return -EINVAL;
3168
3169         return stop_sync_thread(net,
3170                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3171 }
3172
3173 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3174 {
3175         struct ip_vs_timeout_user t;
3176
3177         __ip_vs_get_timeouts(net, &t);
3178
3179         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3180                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3181
3182         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3183                 t.tcp_fin_timeout =
3184                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3185
3186         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3187                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3188
3189         return ip_vs_set_timeout(net, &t);
3190 }
3191
3192 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3193 {
3194         struct ip_vs_service *svc = NULL;
3195         struct ip_vs_service_user_kern usvc;
3196         struct ip_vs_dest_user_kern udest;
3197         int ret = 0, cmd;
3198         int need_full_svc = 0, need_full_dest = 0;
3199         struct net *net;
3200         struct netns_ipvs *ipvs;
3201
3202         net = skb_sknet(skb);
3203         ipvs = net_ipvs(net);
3204         cmd = info->genlhdr->cmd;
3205
3206         mutex_lock(&__ip_vs_mutex);
3207
3208         if (cmd == IPVS_CMD_FLUSH) {
3209                 ret = ip_vs_flush(net);
3210                 goto out;
3211         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3212                 ret = ip_vs_genl_set_config(net, info->attrs);
3213                 goto out;
3214         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3215                    cmd == IPVS_CMD_DEL_DAEMON) {
3216
3217                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3218
3219                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3220                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3221                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3222                                      ip_vs_daemon_policy)) {
3223                         ret = -EINVAL;
3224                         goto out;
3225                 }
3226
3227                 if (cmd == IPVS_CMD_NEW_DAEMON)
3228                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3229                 else
3230                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3231                 goto out;
3232         } else if (cmd == IPVS_CMD_ZERO &&
3233                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3234                 ret = ip_vs_zero_all(net);
3235                 goto out;
3236         }
3237
3238         /* All following commands require a service argument, so check if we
3239          * received a valid one. We need a full service specification when
3240          * adding / editing a service. Only identifying members otherwise. */
3241         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3242                 need_full_svc = 1;
3243
3244         ret = ip_vs_genl_parse_service(net, &usvc,
3245                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3246                                        need_full_svc, &svc);
3247         if (ret)
3248                 goto out;
3249
3250         /* Unless we're adding a new service, the service must already exist */
3251         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3252                 ret = -ESRCH;
3253                 goto out;
3254         }
3255
3256         /* Destination commands require a valid destination argument. For
3257          * adding / editing a destination, we need a full destination
3258          * specification. */
3259         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3260             cmd == IPVS_CMD_DEL_DEST) {
3261                 if (cmd != IPVS_CMD_DEL_DEST)
3262                         need_full_dest = 1;
3263
3264                 ret = ip_vs_genl_parse_dest(&udest,
3265                                             info->attrs[IPVS_CMD_ATTR_DEST],
3266                                             need_full_dest);
3267                 if (ret)
3268                         goto out;
3269         }
3270
3271         switch (cmd) {
3272         case IPVS_CMD_NEW_SERVICE:
3273                 if (svc == NULL)
3274                         ret = ip_vs_add_service(net, &usvc, &svc);
3275                 else
3276                         ret = -EEXIST;
3277                 break;
3278         case IPVS_CMD_SET_SERVICE:
3279                 ret = ip_vs_edit_service(svc, &usvc);
3280                 break;
3281         case IPVS_CMD_DEL_SERVICE:
3282                 ret = ip_vs_del_service(svc);
3283                 /* do not use svc, it can be freed */
3284                 break;
3285         case IPVS_CMD_NEW_DEST:
3286                 ret = ip_vs_add_dest(svc, &udest);
3287                 break;
3288         case IPVS_CMD_SET_DEST:
3289                 ret = ip_vs_edit_dest(svc, &udest);
3290                 break;
3291         case IPVS_CMD_DEL_DEST:
3292                 ret = ip_vs_del_dest(svc, &udest);
3293                 break;
3294         case IPVS_CMD_ZERO:
3295                 ret = ip_vs_zero_service(svc);
3296                 break;
3297         default:
3298                 ret = -EINVAL;
3299         }
3300
3301 out:
3302         mutex_unlock(&__ip_vs_mutex);
3303
3304         return ret;
3305 }
3306
3307 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3308 {
3309         struct sk_buff *msg;
3310         void *reply;
3311         int ret, cmd, reply_cmd;
3312         struct net *net;
3313         struct netns_ipvs *ipvs;
3314
3315         net = skb_sknet(skb);
3316         ipvs = net_ipvs(net);
3317         cmd = info->genlhdr->cmd;
3318
3319         if (cmd == IPVS_CMD_GET_SERVICE)
3320                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3321         else if (cmd == IPVS_CMD_GET_INFO)
3322                 reply_cmd = IPVS_CMD_SET_INFO;
3323         else if (cmd == IPVS_CMD_GET_CONFIG)
3324                 reply_cmd = IPVS_CMD_SET_CONFIG;
3325         else {
3326                 pr_err("unknown Generic Netlink command\n");
3327                 return -EINVAL;
3328         }
3329
3330         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3331         if (!msg)
3332                 return -ENOMEM;
3333
3334         mutex_lock(&__ip_vs_mutex);
3335
3336         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3337         if (reply == NULL)
3338                 goto nla_put_failure;
3339
3340         switch (cmd) {
3341         case IPVS_CMD_GET_SERVICE:
3342         {
3343                 struct ip_vs_service *svc;
3344
3345                 svc = ip_vs_genl_find_service(net,
3346                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3347                 if (IS_ERR(svc)) {
3348                         ret = PTR_ERR(svc);
3349                         goto out_err;
3350                 } else if (svc) {
3351                         ret = ip_vs_genl_fill_service(msg, svc);
3352                         if (ret)
3353                                 goto nla_put_failure;
3354                 } else {
3355                         ret = -ESRCH;
3356                         goto out_err;
3357                 }
3358
3359                 break;
3360         }
3361
3362         case IPVS_CMD_GET_CONFIG:
3363         {
3364                 struct ip_vs_timeout_user t;
3365
3366                 __ip_vs_get_timeouts(net, &t);
3367 #ifdef CONFIG_IP_VS_PROTO_TCP
3368                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3369                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3370                             t.tcp_fin_timeout);
3371 #endif
3372 #ifdef CONFIG_IP_VS_PROTO_UDP
3373                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3374 #endif
3375
3376                 break;
3377         }
3378
3379         case IPVS_CMD_GET_INFO:
3380                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3381                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3382                             ip_vs_conn_tab_size);
3383                 break;
3384         }
3385
3386         genlmsg_end(msg, reply);
3387         ret = genlmsg_reply(msg, info);
3388         goto out;
3389
3390 nla_put_failure:
3391         pr_err("not enough space in Netlink message\n");
3392         ret = -EMSGSIZE;
3393
3394 out_err:
3395         nlmsg_free(msg);
3396 out:
3397         mutex_unlock(&__ip_vs_mutex);
3398
3399         return ret;
3400 }
3401
3402
3403 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3404         {
3405                 .cmd    = IPVS_CMD_NEW_SERVICE,
3406                 .flags  = GENL_ADMIN_PERM,
3407                 .policy = ip_vs_cmd_policy,
3408                 .doit   = ip_vs_genl_set_cmd,
3409         },
3410         {
3411                 .cmd    = IPVS_CMD_SET_SERVICE,
3412                 .flags  = GENL_ADMIN_PERM,
3413                 .policy = ip_vs_cmd_policy,
3414                 .doit   = ip_vs_genl_set_cmd,
3415         },
3416         {
3417                 .cmd    = IPVS_CMD_DEL_SERVICE,
3418                 .flags  = GENL_ADMIN_PERM,
3419                 .policy = ip_vs_cmd_policy,
3420                 .doit   = ip_vs_genl_set_cmd,
3421         },
3422         {
3423                 .cmd    = IPVS_CMD_GET_SERVICE,
3424                 .flags  = GENL_ADMIN_PERM,
3425                 .doit   = ip_vs_genl_get_cmd,
3426                 .dumpit = ip_vs_genl_dump_services,
3427                 .policy = ip_vs_cmd_policy,
3428         },
3429         {
3430                 .cmd    = IPVS_CMD_NEW_DEST,
3431                 .flags  = GENL_ADMIN_PERM,
3432                 .policy = ip_vs_cmd_policy,
3433                 .doit   = ip_vs_genl_set_cmd,
3434         },
3435         {
3436                 .cmd    = IPVS_CMD_SET_DEST,
3437                 .flags  = GENL_ADMIN_PERM,
3438                 .policy = ip_vs_cmd_policy,
3439                 .doit   = ip_vs_genl_set_cmd,
3440         },
3441         {
3442                 .cmd    = IPVS_CMD_DEL_DEST,
3443                 .flags  = GENL_ADMIN_PERM,
3444                 .policy = ip_vs_cmd_policy,
3445                 .doit   = ip_vs_genl_set_cmd,
3446         },
3447         {
3448                 .cmd    = IPVS_CMD_GET_DEST,
3449                 .flags  = GENL_ADMIN_PERM,
3450                 .policy = ip_vs_cmd_policy,
3451                 .dumpit = ip_vs_genl_dump_dests,
3452         },
3453         {
3454                 .cmd    = IPVS_CMD_NEW_DAEMON,
3455                 .flags  = GENL_ADMIN_PERM,
3456                 .policy = ip_vs_cmd_policy,
3457                 .doit   = ip_vs_genl_set_cmd,
3458         },
3459         {
3460                 .cmd    = IPVS_CMD_DEL_DAEMON,
3461                 .flags  = GENL_ADMIN_PERM,
3462                 .policy = ip_vs_cmd_policy,
3463                 .doit   = ip_vs_genl_set_cmd,
3464         },
3465         {
3466                 .cmd    = IPVS_CMD_GET_DAEMON,
3467                 .flags  = GENL_ADMIN_PERM,
3468                 .dumpit = ip_vs_genl_dump_daemons,
3469         },
3470         {
3471                 .cmd    = IPVS_CMD_SET_CONFIG,
3472                 .flags  = GENL_ADMIN_PERM,
3473                 .policy = ip_vs_cmd_policy,
3474                 .doit   = ip_vs_genl_set_cmd,
3475         },
3476         {
3477                 .cmd    = IPVS_CMD_GET_CONFIG,
3478                 .flags  = GENL_ADMIN_PERM,
3479                 .doit   = ip_vs_genl_get_cmd,
3480         },
3481         {
3482                 .cmd    = IPVS_CMD_GET_INFO,
3483                 .flags  = GENL_ADMIN_PERM,
3484                 .doit   = ip_vs_genl_get_cmd,
3485         },
3486         {
3487                 .cmd    = IPVS_CMD_ZERO,
3488                 .flags  = GENL_ADMIN_PERM,
3489                 .policy = ip_vs_cmd_policy,
3490                 .doit   = ip_vs_genl_set_cmd,
3491         },
3492         {
3493                 .cmd    = IPVS_CMD_FLUSH,
3494                 .flags  = GENL_ADMIN_PERM,
3495                 .doit   = ip_vs_genl_set_cmd,
3496         },
3497 };
3498
3499 static int __init ip_vs_genl_register(void)
3500 {
3501         return genl_register_family_with_ops(&ip_vs_genl_family,
3502                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3503 }
3504
3505 static void ip_vs_genl_unregister(void)
3506 {
3507         genl_unregister_family(&ip_vs_genl_family);
3508 }
3509
3510 /* End of Generic Netlink interface definitions */
3511
3512 /*
3513  * per netns intit/exit func.
3514  */
3515 #ifdef CONFIG_SYSCTL
3516 int __net_init __ip_vs_control_init_sysctl(struct net *net)
3517 {
3518         int idx;
3519         struct netns_ipvs *ipvs = net_ipvs(net);
3520         struct ctl_table *tbl;
3521
3522         atomic_set(&ipvs->dropentry, 0);
3523         spin_lock_init(&ipvs->dropentry_lock);
3524         spin_lock_init(&ipvs->droppacket_lock);
3525         spin_lock_init(&ipvs->securetcp_lock);
3526
3527         if (!net_eq(net, &init_net)) {
3528                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529                 if (tbl == NULL)
3530                         return -ENOMEM;
3531         } else
3532                 tbl = vs_vars;
3533         /* Initialize sysctl defaults */
3534         idx = 0;
3535         ipvs->sysctl_amemthresh = 1024;
3536         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537         ipvs->sysctl_am_droprate = 10;
3538         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541 #ifdef CONFIG_IP_VS_NFCT
3542         tbl[idx++].data = &ipvs->sysctl_conntrack;
3543 #endif
3544         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545         ipvs->sysctl_snat_reroute = 1;
3546         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547         ipvs->sysctl_sync_ver = 1;
3548         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3552         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3553         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3554         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559         ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3560                                                      tbl);
3561         if (ipvs->sysctl_hdr == NULL) {
3562                 if (!net_eq(net, &init_net))
3563                         kfree(tbl);
3564                 return -ENOMEM;
3565         }
3566         ip_vs_start_estimator(net, &ipvs->tot_stats);
3567         ipvs->sysctl_tbl = tbl;
3568         /* Schedule defense work */
3569         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3571
3572         return 0;
3573 }
3574
3575 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3576 {
3577         struct netns_ipvs *ipvs = net_ipvs(net);
3578
3579         cancel_delayed_work_sync(&ipvs->defense_work);
3580         cancel_work_sync(&ipvs->defense_work.work);
3581         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3582 }
3583
3584 #else
3585
3586 int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588
3589 #endif
3590
3591 int __net_init __ip_vs_control_init(struct net *net)
3592 {
3593         int idx;
3594         struct netns_ipvs *ipvs = net_ipvs(net);
3595
3596         ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3597
3598         /* Initialize rs_table */
3599         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3601
3602         INIT_LIST_HEAD(&ipvs->dest_trash);
3603         atomic_set(&ipvs->ftpsvc_counter, 0);
3604         atomic_set(&ipvs->nullsvc_counter, 0);
3605
3606         /* procfs stats */
3607         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3608         if (!ipvs->tot_stats.cpustats) {
3609                 pr_err("%s(): alloc_percpu.\n", __func__);
3610                 return -ENOMEM;
3611         }
3612         spin_lock_init(&ipvs->tot_stats.lock);
3613
3614         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617                              &ip_vs_stats_percpu_fops);
3618
3619         if (__ip_vs_control_init_sysctl(net))
3620                 goto err;
3621
3622         return 0;
3623
3624 err:
3625         free_percpu(ipvs->tot_stats.cpustats);
3626         return -ENOMEM;
3627 }
3628
3629 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3630 {
3631         struct netns_ipvs *ipvs = net_ipvs(net);
3632
3633         ip_vs_trash_cleanup(net);
3634         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3635         __ip_vs_control_cleanup_sysctl(net);
3636         proc_net_remove(net, "ip_vs_stats_percpu");
3637         proc_net_remove(net, "ip_vs_stats");
3638         proc_net_remove(net, "ip_vs");
3639         free_percpu(ipvs->tot_stats.cpustats);
3640 }
3641
3642 static struct pernet_operations ipvs_control_ops = {
3643         .init = __ip_vs_control_init,
3644         .exit = __ip_vs_control_cleanup,
3645 };
3646
3647 int __init ip_vs_control_init(void)
3648 {
3649         int idx;
3650         int ret;
3651
3652         EnterFunction(2);
3653
3654         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3655         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3656                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3657                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3658         }
3659
3660         ret = register_pernet_subsys(&ipvs_control_ops);
3661         if (ret) {
3662                 pr_err("cannot register namespace.\n");
3663                 goto err;
3664         }
3665
3666         smp_wmb();      /* Do we really need it now ? */
3667
3668         ret = nf_register_sockopt(&ip_vs_sockopts);
3669         if (ret) {
3670                 pr_err("cannot register sockopt.\n");
3671                 goto err_net;
3672         }
3673
3674         ret = ip_vs_genl_register();
3675         if (ret) {
3676                 pr_err("cannot register Generic Netlink interface.\n");
3677                 nf_unregister_sockopt(&ip_vs_sockopts);
3678                 goto err_net;
3679         }
3680
3681         LeaveFunction(2);
3682         return 0;
3683
3684 err_net:
3685         unregister_pernet_subsys(&ipvs_control_ops);
3686 err:
3687         return ret;
3688 }
3689
3690
3691 void ip_vs_control_cleanup(void)
3692 {
3693         EnterFunction(2);
3694         unregister_pernet_subsys(&ipvs_control_ops);
3695         ip_vs_genl_unregister();
3696         nf_unregister_sockopt(&ip_vs_sockopts);
3697         LeaveFunction(2);
3698 }