Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
[pandora-kernel.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static int __ip_vs_addr_is_local_v6(struct net *net,
80                                     const struct in6_addr *addr)
81 {
82         struct rt6_info *rt;
83         struct flowi6 fl6 = {
84                 .daddr = *addr,
85         };
86
87         rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
88         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
89                 return 1;
90
91         return 0;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del(&dest->d_list);
543                 INIT_LIST_HEAD(&dest->d_list);
544         }
545
546         return 1;
547 }
548
549 /*
550  *      Lookup real service by <proto,addr,port> in the real service table.
551  */
552 struct ip_vs_dest *
553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554                           const union nf_inet_addr *daddr,
555                           __be16 dport)
556 {
557         struct netns_ipvs *ipvs = net_ipvs(net);
558         unsigned hash;
559         struct ip_vs_dest *dest;
560
561         /*
562          *      Check for "full" addressed entries
563          *      Return the first found entry
564          */
565         hash = ip_vs_rs_hashkey(af, daddr, dport);
566
567         read_lock(&ipvs->rs_lock);
568         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569                 if ((dest->af == af)
570                     && ip_vs_addr_equal(af, &dest->addr, daddr)
571                     && (dest->port == dport)
572                     && ((dest->protocol == protocol) ||
573                         dest->vfwmark)) {
574                         /* HIT */
575                         read_unlock(&ipvs->rs_lock);
576                         return dest;
577                 }
578         }
579         read_unlock(&ipvs->rs_lock);
580
581         return NULL;
582 }
583
584 /*
585  *      Lookup destination by {addr,port} in the given service
586  */
587 static struct ip_vs_dest *
588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589                   __be16 dport)
590 {
591         struct ip_vs_dest *dest;
592
593         /*
594          * Find the destination for the given service
595          */
596         list_for_each_entry(dest, &svc->destinations, n_list) {
597                 if ((dest->af == svc->af)
598                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599                     && (dest->port == dport)) {
600                         /* HIT */
601                         return dest;
602                 }
603         }
604
605         return NULL;
606 }
607
608 /*
609  * Find destination by {daddr,dport,vaddr,protocol}
610  * Cretaed to be used in ip_vs_process_message() in
611  * the backup synchronization daemon. It finds the
612  * destination to be bound to the received connection
613  * on the backup.
614  *
615  * ip_vs_lookup_real_service() looked promissing, but
616  * seems not working as expected.
617  */
618 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
619                                    const union nf_inet_addr *daddr,
620                                    __be16 dport,
621                                    const union nf_inet_addr *vaddr,
622                                    __be16 vport, __u16 protocol, __u32 fwmark)
623 {
624         struct ip_vs_dest *dest;
625         struct ip_vs_service *svc;
626
627         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
628         if (!svc)
629                 return NULL;
630         dest = ip_vs_lookup_dest(svc, daddr, dport);
631         if (dest)
632                 atomic_inc(&dest->refcnt);
633         ip_vs_service_put(svc);
634         return dest;
635 }
636
637 /*
638  *  Lookup dest by {svc,addr,port} in the destination trash.
639  *  The destination trash is used to hold the destinations that are removed
640  *  from the service table but are still referenced by some conn entries.
641  *  The reason to add the destination trash is when the dest is temporary
642  *  down (either by administrator or by monitor program), the dest can be
643  *  picked back from the trash, the remaining connections to the dest can
644  *  continue, and the counting information of the dest is also useful for
645  *  scheduling.
646  */
647 static struct ip_vs_dest *
648 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
649                      __be16 dport)
650 {
651         struct ip_vs_dest *dest, *nxt;
652         struct netns_ipvs *ipvs = net_ipvs(svc->net);
653
654         /*
655          * Find the destination in trash
656          */
657         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
658                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659                               "dest->refcnt=%d\n",
660                               dest->vfwmark,
661                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
662                               ntohs(dest->port),
663                               atomic_read(&dest->refcnt));
664                 if (dest->af == svc->af &&
665                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
666                     dest->port == dport &&
667                     dest->vfwmark == svc->fwmark &&
668                     dest->protocol == svc->protocol &&
669                     (svc->fwmark ||
670                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
671                       dest->vport == svc->port))) {
672                         /* HIT */
673                         return dest;
674                 }
675
676                 /*
677                  * Try to purge the destination from trash if not referenced
678                  */
679                 if (atomic_read(&dest->refcnt) == 1) {
680                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681                                       "from trash\n",
682                                       dest->vfwmark,
683                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
684                                       ntohs(dest->port));
685                         list_del(&dest->n_list);
686                         ip_vs_dst_reset(dest);
687                         __ip_vs_unbind_svc(dest);
688                         free_percpu(dest->stats.cpustats);
689                         kfree(dest);
690                 }
691         }
692
693         return NULL;
694 }
695
696
697 /*
698  *  Clean up all the destinations in the trash
699  *  Called by the ip_vs_control_cleanup()
700  *
701  *  When the ip_vs_control_clearup is activated by ipvs module exit,
702  *  the service tables must have been flushed and all the connections
703  *  are expired, and the refcnt of each destination in the trash must
704  *  be 1, so we simply release them here.
705  */
706 static void ip_vs_trash_cleanup(struct net *net)
707 {
708         struct ip_vs_dest *dest, *nxt;
709         struct netns_ipvs *ipvs = net_ipvs(net);
710
711         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
712                 list_del(&dest->n_list);
713                 ip_vs_dst_reset(dest);
714                 __ip_vs_unbind_svc(dest);
715                 free_percpu(dest->stats.cpustats);
716                 kfree(dest);
717         }
718 }
719
720 static void
721 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
722 {
723 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
724
725         spin_lock_bh(&src->lock);
726
727         IP_VS_SHOW_STATS_COUNTER(conns);
728         IP_VS_SHOW_STATS_COUNTER(inpkts);
729         IP_VS_SHOW_STATS_COUNTER(outpkts);
730         IP_VS_SHOW_STATS_COUNTER(inbytes);
731         IP_VS_SHOW_STATS_COUNTER(outbytes);
732
733         ip_vs_read_estimator(dst, src);
734
735         spin_unlock_bh(&src->lock);
736 }
737
738 static void
739 ip_vs_zero_stats(struct ip_vs_stats *stats)
740 {
741         spin_lock_bh(&stats->lock);
742
743         /* get current counters as zero point, rates are zeroed */
744
745 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
746
747         IP_VS_ZERO_STATS_COUNTER(conns);
748         IP_VS_ZERO_STATS_COUNTER(inpkts);
749         IP_VS_ZERO_STATS_COUNTER(outpkts);
750         IP_VS_ZERO_STATS_COUNTER(inbytes);
751         IP_VS_ZERO_STATS_COUNTER(outbytes);
752
753         ip_vs_zero_estimator(stats);
754
755         spin_unlock_bh(&stats->lock);
756 }
757
758 /*
759  *      Update a destination in the given service
760  */
761 static void
762 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
763                     struct ip_vs_dest_user_kern *udest, int add)
764 {
765         struct netns_ipvs *ipvs = net_ipvs(svc->net);
766         int conn_flags;
767
768         /* set the weight and the flags */
769         atomic_set(&dest->weight, udest->weight);
770         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
771         conn_flags |= IP_VS_CONN_F_INACTIVE;
772
773         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
774         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
775                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
776         } else {
777                 /*
778                  *    Put the real service in rs_table if not present.
779                  *    For now only for NAT!
780                  */
781                 write_lock_bh(&ipvs->rs_lock);
782                 ip_vs_rs_hash(ipvs, dest);
783                 write_unlock_bh(&ipvs->rs_lock);
784         }
785         atomic_set(&dest->conn_flags, conn_flags);
786
787         /* bind the service */
788         if (!dest->svc) {
789                 __ip_vs_bind_svc(dest, svc);
790         } else {
791                 if (dest->svc != svc) {
792                         __ip_vs_unbind_svc(dest);
793                         ip_vs_zero_stats(&dest->stats);
794                         __ip_vs_bind_svc(dest, svc);
795                 }
796         }
797
798         /* set the dest status flags */
799         dest->flags |= IP_VS_DEST_F_AVAILABLE;
800
801         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
802                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
803         dest->u_threshold = udest->u_threshold;
804         dest->l_threshold = udest->l_threshold;
805
806         spin_lock_bh(&dest->dst_lock);
807         ip_vs_dst_reset(dest);
808         spin_unlock_bh(&dest->dst_lock);
809
810         if (add)
811                 ip_vs_start_estimator(svc->net, &dest->stats);
812
813         write_lock_bh(&__ip_vs_svc_lock);
814
815         /* Wait until all other svc users go away */
816         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
817
818         if (add) {
819                 list_add(&dest->n_list, &svc->destinations);
820                 svc->num_dests++;
821         }
822
823         /* call the update_service, because server weight may be changed */
824         if (svc->scheduler->update_service)
825                 svc->scheduler->update_service(svc);
826
827         write_unlock_bh(&__ip_vs_svc_lock);
828 }
829
830
831 /*
832  *      Create a destination for the given service
833  */
834 static int
835 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
836                struct ip_vs_dest **dest_p)
837 {
838         struct ip_vs_dest *dest;
839         unsigned atype;
840
841         EnterFunction(2);
842
843 #ifdef CONFIG_IP_VS_IPV6
844         if (svc->af == AF_INET6) {
845                 atype = ipv6_addr_type(&udest->addr.in6);
846                 if ((!(atype & IPV6_ADDR_UNICAST) ||
847                         atype & IPV6_ADDR_LINKLOCAL) &&
848                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
849                         return -EINVAL;
850         } else
851 #endif
852         {
853                 atype = inet_addr_type(svc->net, udest->addr.ip);
854                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
855                         return -EINVAL;
856         }
857
858         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
859         if (dest == NULL)
860                 return -ENOMEM;
861
862         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
863         if (!dest->stats.cpustats)
864                 goto err_alloc;
865
866         dest->af = svc->af;
867         dest->protocol = svc->protocol;
868         dest->vaddr = svc->addr;
869         dest->vport = svc->port;
870         dest->vfwmark = svc->fwmark;
871         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
872         dest->port = udest->port;
873
874         atomic_set(&dest->activeconns, 0);
875         atomic_set(&dest->inactconns, 0);
876         atomic_set(&dest->persistconns, 0);
877         atomic_set(&dest->refcnt, 1);
878
879         INIT_LIST_HEAD(&dest->d_list);
880         spin_lock_init(&dest->dst_lock);
881         spin_lock_init(&dest->stats.lock);
882         __ip_vs_update_dest(svc, dest, udest, 1);
883
884         *dest_p = dest;
885
886         LeaveFunction(2);
887         return 0;
888
889 err_alloc:
890         kfree(dest);
891         return -ENOMEM;
892 }
893
894
895 /*
896  *      Add a destination into an existing service
897  */
898 static int
899 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
900 {
901         struct ip_vs_dest *dest;
902         union nf_inet_addr daddr;
903         __be16 dport = udest->port;
904         int ret;
905
906         EnterFunction(2);
907
908         if (udest->weight < 0) {
909                 pr_err("%s(): server weight less than zero\n", __func__);
910                 return -ERANGE;
911         }
912
913         if (udest->l_threshold > udest->u_threshold) {
914                 pr_err("%s(): lower threshold is higher than upper threshold\n",
915                         __func__);
916                 return -ERANGE;
917         }
918
919         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
920
921         /*
922          * Check if the dest already exists in the list
923          */
924         dest = ip_vs_lookup_dest(svc, &daddr, dport);
925
926         if (dest != NULL) {
927                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
928                 return -EEXIST;
929         }
930
931         /*
932          * Check if the dest already exists in the trash and
933          * is from the same service
934          */
935         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
936
937         if (dest != NULL) {
938                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
939                               "dest->refcnt=%d, service %u/%s:%u\n",
940                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
941                               atomic_read(&dest->refcnt),
942                               dest->vfwmark,
943                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
944                               ntohs(dest->vport));
945
946                 /*
947                  * Get the destination from the trash
948                  */
949                 list_del(&dest->n_list);
950
951                 __ip_vs_update_dest(svc, dest, udest, 1);
952                 ret = 0;
953         } else {
954                 /*
955                  * Allocate and initialize the dest structure
956                  */
957                 ret = ip_vs_new_dest(svc, udest, &dest);
958         }
959         LeaveFunction(2);
960
961         return ret;
962 }
963
964
965 /*
966  *      Edit a destination in the given service
967  */
968 static int
969 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
970 {
971         struct ip_vs_dest *dest;
972         union nf_inet_addr daddr;
973         __be16 dport = udest->port;
974
975         EnterFunction(2);
976
977         if (udest->weight < 0) {
978                 pr_err("%s(): server weight less than zero\n", __func__);
979                 return -ERANGE;
980         }
981
982         if (udest->l_threshold > udest->u_threshold) {
983                 pr_err("%s(): lower threshold is higher than upper threshold\n",
984                         __func__);
985                 return -ERANGE;
986         }
987
988         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
989
990         /*
991          *  Lookup the destination list
992          */
993         dest = ip_vs_lookup_dest(svc, &daddr, dport);
994
995         if (dest == NULL) {
996                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
997                 return -ENOENT;
998         }
999
1000         __ip_vs_update_dest(svc, dest, udest, 0);
1001         LeaveFunction(2);
1002
1003         return 0;
1004 }
1005
1006
1007 /*
1008  *      Delete a destination (must be already unlinked from the service)
1009  */
1010 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1011 {
1012         struct netns_ipvs *ipvs = net_ipvs(net);
1013
1014         ip_vs_stop_estimator(net, &dest->stats);
1015
1016         /*
1017          *  Remove it from the d-linked list with the real services.
1018          */
1019         write_lock_bh(&ipvs->rs_lock);
1020         ip_vs_rs_unhash(dest);
1021         write_unlock_bh(&ipvs->rs_lock);
1022
1023         /*
1024          *  Decrease the refcnt of the dest, and free the dest
1025          *  if nobody refers to it (refcnt=0). Otherwise, throw
1026          *  the destination into the trash.
1027          */
1028         if (atomic_dec_and_test(&dest->refcnt)) {
1029                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030                               dest->vfwmark,
1031                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032                               ntohs(dest->port));
1033                 ip_vs_dst_reset(dest);
1034                 /* simply decrease svc->refcnt here, let the caller check
1035                    and release the service if nobody refers to it.
1036                    Only user context can release destination and service,
1037                    and only one user context can update virtual service at a
1038                    time, so the operation here is OK */
1039                 atomic_dec(&dest->svc->refcnt);
1040                 free_percpu(dest->stats.cpustats);
1041                 kfree(dest);
1042         } else {
1043                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1044                               "dest->refcnt=%d\n",
1045                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1046                               ntohs(dest->port),
1047                               atomic_read(&dest->refcnt));
1048                 list_add(&dest->n_list, &ipvs->dest_trash);
1049                 atomic_inc(&dest->refcnt);
1050         }
1051 }
1052
1053
1054 /*
1055  *      Unlink a destination from the given service
1056  */
1057 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1058                                 struct ip_vs_dest *dest,
1059                                 int svcupd)
1060 {
1061         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062
1063         /*
1064          *  Remove it from the d-linked destination list.
1065          */
1066         list_del(&dest->n_list);
1067         svc->num_dests--;
1068
1069         /*
1070          *  Call the update_service function of its scheduler
1071          */
1072         if (svcupd && svc->scheduler->update_service)
1073                         svc->scheduler->update_service(svc);
1074 }
1075
1076
1077 /*
1078  *      Delete a destination server in the given service
1079  */
1080 static int
1081 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1082 {
1083         struct ip_vs_dest *dest;
1084         __be16 dport = udest->port;
1085
1086         EnterFunction(2);
1087
1088         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1089
1090         if (dest == NULL) {
1091                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1092                 return -ENOENT;
1093         }
1094
1095         write_lock_bh(&__ip_vs_svc_lock);
1096
1097         /*
1098          *      Wait until all other svc users go away.
1099          */
1100         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1101
1102         /*
1103          *      Unlink dest from the service
1104          */
1105         __ip_vs_unlink_dest(svc, dest, 1);
1106
1107         write_unlock_bh(&__ip_vs_svc_lock);
1108
1109         /*
1110          *      Delete the destination
1111          */
1112         __ip_vs_del_dest(svc->net, dest);
1113
1114         LeaveFunction(2);
1115
1116         return 0;
1117 }
1118
1119
1120 /*
1121  *      Add a service into the service hash table
1122  */
1123 static int
1124 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1125                   struct ip_vs_service **svc_p)
1126 {
1127         int ret = 0;
1128         struct ip_vs_scheduler *sched = NULL;
1129         struct ip_vs_pe *pe = NULL;
1130         struct ip_vs_service *svc = NULL;
1131         struct netns_ipvs *ipvs = net_ipvs(net);
1132
1133         /* increase the module use count */
1134         ip_vs_use_count_inc();
1135
1136         /* Lookup the scheduler by 'u->sched_name' */
1137         sched = ip_vs_scheduler_get(u->sched_name);
1138         if (sched == NULL) {
1139                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1140                 ret = -ENOENT;
1141                 goto out_err;
1142         }
1143
1144         if (u->pe_name && *u->pe_name) {
1145                 pe = ip_vs_pe_getbyname(u->pe_name);
1146                 if (pe == NULL) {
1147                         pr_info("persistence engine module ip_vs_pe_%s "
1148                                 "not found\n", u->pe_name);
1149                         ret = -ENOENT;
1150                         goto out_err;
1151                 }
1152         }
1153
1154 #ifdef CONFIG_IP_VS_IPV6
1155         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1156                 ret = -EINVAL;
1157                 goto out_err;
1158         }
1159 #endif
1160
1161         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1162         if (svc == NULL) {
1163                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1164                 ret = -ENOMEM;
1165                 goto out_err;
1166         }
1167         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1168         if (!svc->stats.cpustats)
1169                 goto out_err;
1170
1171         /* I'm the first user of the service */
1172         atomic_set(&svc->usecnt, 0);
1173         atomic_set(&svc->refcnt, 0);
1174
1175         svc->af = u->af;
1176         svc->protocol = u->protocol;
1177         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1178         svc->port = u->port;
1179         svc->fwmark = u->fwmark;
1180         svc->flags = u->flags;
1181         svc->timeout = u->timeout * HZ;
1182         svc->netmask = u->netmask;
1183         svc->net = net;
1184
1185         INIT_LIST_HEAD(&svc->destinations);
1186         rwlock_init(&svc->sched_lock);
1187         spin_lock_init(&svc->stats.lock);
1188
1189         /* Bind the scheduler */
1190         ret = ip_vs_bind_scheduler(svc, sched);
1191         if (ret)
1192                 goto out_err;
1193         sched = NULL;
1194
1195         /* Bind the ct retriever */
1196         ip_vs_bind_pe(svc, pe);
1197         pe = NULL;
1198
1199         /* Update the virtual service counters */
1200         if (svc->port == FTPPORT)
1201                 atomic_inc(&ipvs->ftpsvc_counter);
1202         else if (svc->port == 0)
1203                 atomic_inc(&ipvs->nullsvc_counter);
1204
1205         ip_vs_start_estimator(net, &svc->stats);
1206
1207         /* Count only IPv4 services for old get/setsockopt interface */
1208         if (svc->af == AF_INET)
1209                 ipvs->num_services++;
1210
1211         /* Hash the service into the service table */
1212         write_lock_bh(&__ip_vs_svc_lock);
1213         ip_vs_svc_hash(svc);
1214         write_unlock_bh(&__ip_vs_svc_lock);
1215
1216         *svc_p = svc;
1217         /* Now there is a service - full throttle */
1218         ipvs->enable = 1;
1219         return 0;
1220
1221
1222  out_err:
1223         if (svc != NULL) {
1224                 ip_vs_unbind_scheduler(svc);
1225                 if (svc->inc) {
1226                         local_bh_disable();
1227                         ip_vs_app_inc_put(svc->inc);
1228                         local_bh_enable();
1229                 }
1230                 if (svc->stats.cpustats)
1231                         free_percpu(svc->stats.cpustats);
1232                 kfree(svc);
1233         }
1234         ip_vs_scheduler_put(sched);
1235         ip_vs_pe_put(pe);
1236
1237         /* decrease the module use count */
1238         ip_vs_use_count_dec();
1239
1240         return ret;
1241 }
1242
1243
1244 /*
1245  *      Edit a service and bind it with a new scheduler
1246  */
1247 static int
1248 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1249 {
1250         struct ip_vs_scheduler *sched, *old_sched;
1251         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1252         int ret = 0;
1253
1254         /*
1255          * Lookup the scheduler, by 'u->sched_name'
1256          */
1257         sched = ip_vs_scheduler_get(u->sched_name);
1258         if (sched == NULL) {
1259                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1260                 return -ENOENT;
1261         }
1262         old_sched = sched;
1263
1264         if (u->pe_name && *u->pe_name) {
1265                 pe = ip_vs_pe_getbyname(u->pe_name);
1266                 if (pe == NULL) {
1267                         pr_info("persistence engine module ip_vs_pe_%s "
1268                                 "not found\n", u->pe_name);
1269                         ret = -ENOENT;
1270                         goto out;
1271                 }
1272                 old_pe = pe;
1273         }
1274
1275 #ifdef CONFIG_IP_VS_IPV6
1276         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1277                 ret = -EINVAL;
1278                 goto out;
1279         }
1280 #endif
1281
1282         write_lock_bh(&__ip_vs_svc_lock);
1283
1284         /*
1285          * Wait until all other svc users go away.
1286          */
1287         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1288
1289         /*
1290          * Set the flags and timeout value
1291          */
1292         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1293         svc->timeout = u->timeout * HZ;
1294         svc->netmask = u->netmask;
1295
1296         old_sched = svc->scheduler;
1297         if (sched != old_sched) {
1298                 /*
1299                  * Unbind the old scheduler
1300                  */
1301                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1302                         old_sched = sched;
1303                         goto out_unlock;
1304                 }
1305
1306                 /*
1307                  * Bind the new scheduler
1308                  */
1309                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1310                         /*
1311                          * If ip_vs_bind_scheduler fails, restore the old
1312                          * scheduler.
1313                          * The main reason of failure is out of memory.
1314                          *
1315                          * The question is if the old scheduler can be
1316                          * restored all the time. TODO: if it cannot be
1317                          * restored some time, we must delete the service,
1318                          * otherwise the system may crash.
1319                          */
1320                         ip_vs_bind_scheduler(svc, old_sched);
1321                         old_sched = sched;
1322                         goto out_unlock;
1323                 }
1324         }
1325
1326         old_pe = svc->pe;
1327         if (pe != old_pe) {
1328                 ip_vs_unbind_pe(svc);
1329                 ip_vs_bind_pe(svc, pe);
1330         }
1331
1332 out_unlock:
1333         write_unlock_bh(&__ip_vs_svc_lock);
1334 out:
1335         ip_vs_scheduler_put(old_sched);
1336         ip_vs_pe_put(old_pe);
1337         return ret;
1338 }
1339
1340
1341 /*
1342  *      Delete a service from the service list
1343  *      - The service must be unlinked, unlocked and not referenced!
1344  *      - We are called under _bh lock
1345  */
1346 static void __ip_vs_del_service(struct ip_vs_service *svc)
1347 {
1348         struct ip_vs_dest *dest, *nxt;
1349         struct ip_vs_scheduler *old_sched;
1350         struct ip_vs_pe *old_pe;
1351         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1352
1353         pr_info("%s: enter\n", __func__);
1354
1355         /* Count only IPv4 services for old get/setsockopt interface */
1356         if (svc->af == AF_INET)
1357                 ipvs->num_services--;
1358
1359         ip_vs_stop_estimator(svc->net, &svc->stats);
1360
1361         /* Unbind scheduler */
1362         old_sched = svc->scheduler;
1363         ip_vs_unbind_scheduler(svc);
1364         ip_vs_scheduler_put(old_sched);
1365
1366         /* Unbind persistence engine */
1367         old_pe = svc->pe;
1368         ip_vs_unbind_pe(svc);
1369         ip_vs_pe_put(old_pe);
1370
1371         /* Unbind app inc */
1372         if (svc->inc) {
1373                 ip_vs_app_inc_put(svc->inc);
1374                 svc->inc = NULL;
1375         }
1376
1377         /*
1378          *    Unlink the whole destination list
1379          */
1380         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1381                 __ip_vs_unlink_dest(svc, dest, 0);
1382                 __ip_vs_del_dest(svc->net, dest);
1383         }
1384
1385         /*
1386          *    Update the virtual service counters
1387          */
1388         if (svc->port == FTPPORT)
1389                 atomic_dec(&ipvs->ftpsvc_counter);
1390         else if (svc->port == 0)
1391                 atomic_dec(&ipvs->nullsvc_counter);
1392
1393         /*
1394          *    Free the service if nobody refers to it
1395          */
1396         if (atomic_read(&svc->refcnt) == 0) {
1397                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1398                               svc->fwmark,
1399                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1400                               ntohs(svc->port), atomic_read(&svc->usecnt));
1401                 free_percpu(svc->stats.cpustats);
1402                 kfree(svc);
1403         }
1404
1405         /* decrease the module use count */
1406         ip_vs_use_count_dec();
1407 }
1408
1409 /*
1410  * Unlink a service from list and try to delete it if its refcnt reached 0
1411  */
1412 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1413 {
1414         /*
1415          * Unhash it from the service table
1416          */
1417         write_lock_bh(&__ip_vs_svc_lock);
1418
1419         ip_vs_svc_unhash(svc);
1420
1421         /*
1422          * Wait until all the svc users go away.
1423          */
1424         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1425
1426         __ip_vs_del_service(svc);
1427
1428         write_unlock_bh(&__ip_vs_svc_lock);
1429 }
1430
1431 /*
1432  *      Delete a service from the service list
1433  */
1434 static int ip_vs_del_service(struct ip_vs_service *svc)
1435 {
1436         if (svc == NULL)
1437                 return -EEXIST;
1438         ip_vs_unlink_service(svc);
1439
1440         return 0;
1441 }
1442
1443
1444 /*
1445  *      Flush all the virtual services
1446  */
1447 static int ip_vs_flush(struct net *net)
1448 {
1449         int idx;
1450         struct ip_vs_service *svc, *nxt;
1451
1452         /*
1453          * Flush the service table hashed by <netns,protocol,addr,port>
1454          */
1455         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1456                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1457                                          s_list) {
1458                         if (net_eq(svc->net, net))
1459                                 ip_vs_unlink_service(svc);
1460                 }
1461         }
1462
1463         /*
1464          * Flush the service table hashed by fwmark
1465          */
1466         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1467                 list_for_each_entry_safe(svc, nxt,
1468                                          &ip_vs_svc_fwm_table[idx], f_list) {
1469                         if (net_eq(svc->net, net))
1470                                 ip_vs_unlink_service(svc);
1471                 }
1472         }
1473
1474         return 0;
1475 }
1476
1477 /*
1478  *      Delete service by {netns} in the service table.
1479  *      Called by __ip_vs_cleanup()
1480  */
1481 void ip_vs_service_net_cleanup(struct net *net)
1482 {
1483         EnterFunction(2);
1484         /* Check for "full" addressed entries */
1485         mutex_lock(&__ip_vs_mutex);
1486         ip_vs_flush(net);
1487         mutex_unlock(&__ip_vs_mutex);
1488         LeaveFunction(2);
1489 }
1490 /*
1491  * Release dst hold by dst_cache
1492  */
1493 static inline void
1494 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1495 {
1496         spin_lock_bh(&dest->dst_lock);
1497         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1498                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1499                               dev->name,
1500                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1501                               ntohs(dest->port),
1502                               atomic_read(&dest->refcnt));
1503                 ip_vs_dst_reset(dest);
1504         }
1505         spin_unlock_bh(&dest->dst_lock);
1506
1507 }
1508 /*
1509  * Netdev event receiver
1510  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1511  * a device that is "unregister" it must be released.
1512  */
1513 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1514                             void *ptr)
1515 {
1516         struct net_device *dev = ptr;
1517         struct net *net = dev_net(dev);
1518         struct ip_vs_service *svc;
1519         struct ip_vs_dest *dest;
1520         unsigned int idx;
1521
1522         if (event != NETDEV_UNREGISTER)
1523                 return NOTIFY_DONE;
1524         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1525         EnterFunction(2);
1526         mutex_lock(&__ip_vs_mutex);
1527         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1528                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1529                         if (net_eq(svc->net, net)) {
1530                                 list_for_each_entry(dest, &svc->destinations,
1531                                                     n_list) {
1532                                         __ip_vs_dev_reset(dest, dev);
1533                                 }
1534                         }
1535                 }
1536
1537                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1538                         if (net_eq(svc->net, net)) {
1539                                 list_for_each_entry(dest, &svc->destinations,
1540                                                     n_list) {
1541                                         __ip_vs_dev_reset(dest, dev);
1542                                 }
1543                         }
1544
1545                 }
1546         }
1547
1548         list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
1549                 __ip_vs_dev_reset(dest, dev);
1550         }
1551         mutex_unlock(&__ip_vs_mutex);
1552         LeaveFunction(2);
1553         return NOTIFY_DONE;
1554 }
1555
1556 /*
1557  *      Zero counters in a service or all services
1558  */
1559 static int ip_vs_zero_service(struct ip_vs_service *svc)
1560 {
1561         struct ip_vs_dest *dest;
1562
1563         write_lock_bh(&__ip_vs_svc_lock);
1564         list_for_each_entry(dest, &svc->destinations, n_list) {
1565                 ip_vs_zero_stats(&dest->stats);
1566         }
1567         ip_vs_zero_stats(&svc->stats);
1568         write_unlock_bh(&__ip_vs_svc_lock);
1569         return 0;
1570 }
1571
1572 static int ip_vs_zero_all(struct net *net)
1573 {
1574         int idx;
1575         struct ip_vs_service *svc;
1576
1577         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1578                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1579                         if (net_eq(svc->net, net))
1580                                 ip_vs_zero_service(svc);
1581                 }
1582         }
1583
1584         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1585                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1586                         if (net_eq(svc->net, net))
1587                                 ip_vs_zero_service(svc);
1588                 }
1589         }
1590
1591         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1592         return 0;
1593 }
1594
1595 #ifdef CONFIG_SYSCTL
1596 static int
1597 proc_do_defense_mode(ctl_table *table, int write,
1598                      void __user *buffer, size_t *lenp, loff_t *ppos)
1599 {
1600         struct net *net = current->nsproxy->net_ns;
1601         int *valp = table->data;
1602         int val = *valp;
1603         int rc;
1604
1605         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1606         if (write && (*valp != val)) {
1607                 if ((*valp < 0) || (*valp > 3)) {
1608                         /* Restore the correct value */
1609                         *valp = val;
1610                 } else {
1611                         update_defense_level(net_ipvs(net));
1612                 }
1613         }
1614         return rc;
1615 }
1616
1617 static int
1618 proc_do_sync_threshold(ctl_table *table, int write,
1619                        void __user *buffer, size_t *lenp, loff_t *ppos)
1620 {
1621         int *valp = table->data;
1622         int val[2];
1623         int rc;
1624
1625         /* backup the value first */
1626         memcpy(val, valp, sizeof(val));
1627
1628         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1629         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1630                 /* Restore the correct value */
1631                 memcpy(valp, val, sizeof(val));
1632         }
1633         return rc;
1634 }
1635
1636 static int
1637 proc_do_sync_mode(ctl_table *table, int write,
1638                      void __user *buffer, size_t *lenp, loff_t *ppos)
1639 {
1640         int *valp = table->data;
1641         int val = *valp;
1642         int rc;
1643
1644         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1645         if (write && (*valp != val)) {
1646                 if ((*valp < 0) || (*valp > 1)) {
1647                         /* Restore the correct value */
1648                         *valp = val;
1649                 } else {
1650                         struct net *net = current->nsproxy->net_ns;
1651                         ip_vs_sync_switch_mode(net, val);
1652                 }
1653         }
1654         return rc;
1655 }
1656
1657 /*
1658  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1659  *      Do not change order or insert new entries without
1660  *      align with netns init in ip_vs_control_net_init()
1661  */
1662
1663 static struct ctl_table vs_vars[] = {
1664         {
1665                 .procname       = "amemthresh",
1666                 .maxlen         = sizeof(int),
1667                 .mode           = 0644,
1668                 .proc_handler   = proc_dointvec,
1669         },
1670         {
1671                 .procname       = "am_droprate",
1672                 .maxlen         = sizeof(int),
1673                 .mode           = 0644,
1674                 .proc_handler   = proc_dointvec,
1675         },
1676         {
1677                 .procname       = "drop_entry",
1678                 .maxlen         = sizeof(int),
1679                 .mode           = 0644,
1680                 .proc_handler   = proc_do_defense_mode,
1681         },
1682         {
1683                 .procname       = "drop_packet",
1684                 .maxlen         = sizeof(int),
1685                 .mode           = 0644,
1686                 .proc_handler   = proc_do_defense_mode,
1687         },
1688 #ifdef CONFIG_IP_VS_NFCT
1689         {
1690                 .procname       = "conntrack",
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = &proc_dointvec,
1694         },
1695 #endif
1696         {
1697                 .procname       = "secure_tcp",
1698                 .maxlen         = sizeof(int),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_do_defense_mode,
1701         },
1702         {
1703                 .procname       = "snat_reroute",
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = &proc_dointvec,
1707         },
1708         {
1709                 .procname       = "sync_version",
1710                 .maxlen         = sizeof(int),
1711                 .mode           = 0644,
1712                 .proc_handler   = &proc_do_sync_mode,
1713         },
1714         {
1715                 .procname       = "cache_bypass",
1716                 .maxlen         = sizeof(int),
1717                 .mode           = 0644,
1718                 .proc_handler   = proc_dointvec,
1719         },
1720         {
1721                 .procname       = "expire_nodest_conn",
1722                 .maxlen         = sizeof(int),
1723                 .mode           = 0644,
1724                 .proc_handler   = proc_dointvec,
1725         },
1726         {
1727                 .procname       = "expire_quiescent_template",
1728                 .maxlen         = sizeof(int),
1729                 .mode           = 0644,
1730                 .proc_handler   = proc_dointvec,
1731         },
1732         {
1733                 .procname       = "sync_threshold",
1734                 .maxlen         =
1735                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1736                 .mode           = 0644,
1737                 .proc_handler   = proc_do_sync_threshold,
1738         },
1739         {
1740                 .procname       = "nat_icmp_send",
1741                 .maxlen         = sizeof(int),
1742                 .mode           = 0644,
1743                 .proc_handler   = proc_dointvec,
1744         },
1745 #ifdef CONFIG_IP_VS_DEBUG
1746         {
1747                 .procname       = "debug_level",
1748                 .data           = &sysctl_ip_vs_debug_level,
1749                 .maxlen         = sizeof(int),
1750                 .mode           = 0644,
1751                 .proc_handler   = proc_dointvec,
1752         },
1753 #endif
1754 #if 0
1755         {
1756                 .procname       = "timeout_established",
1757                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1758                 .maxlen         = sizeof(int),
1759                 .mode           = 0644,
1760                 .proc_handler   = proc_dointvec_jiffies,
1761         },
1762         {
1763                 .procname       = "timeout_synsent",
1764                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1765                 .maxlen         = sizeof(int),
1766                 .mode           = 0644,
1767                 .proc_handler   = proc_dointvec_jiffies,
1768         },
1769         {
1770                 .procname       = "timeout_synrecv",
1771                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1772                 .maxlen         = sizeof(int),
1773                 .mode           = 0644,
1774                 .proc_handler   = proc_dointvec_jiffies,
1775         },
1776         {
1777                 .procname       = "timeout_finwait",
1778                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1779                 .maxlen         = sizeof(int),
1780                 .mode           = 0644,
1781                 .proc_handler   = proc_dointvec_jiffies,
1782         },
1783         {
1784                 .procname       = "timeout_timewait",
1785                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1786                 .maxlen         = sizeof(int),
1787                 .mode           = 0644,
1788                 .proc_handler   = proc_dointvec_jiffies,
1789         },
1790         {
1791                 .procname       = "timeout_close",
1792                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1793                 .maxlen         = sizeof(int),
1794                 .mode           = 0644,
1795                 .proc_handler   = proc_dointvec_jiffies,
1796         },
1797         {
1798                 .procname       = "timeout_closewait",
1799                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1800                 .maxlen         = sizeof(int),
1801                 .mode           = 0644,
1802                 .proc_handler   = proc_dointvec_jiffies,
1803         },
1804         {
1805                 .procname       = "timeout_lastack",
1806                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0644,
1809                 .proc_handler   = proc_dointvec_jiffies,
1810         },
1811         {
1812                 .procname       = "timeout_listen",
1813                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1814                 .maxlen         = sizeof(int),
1815                 .mode           = 0644,
1816                 .proc_handler   = proc_dointvec_jiffies,
1817         },
1818         {
1819                 .procname       = "timeout_synack",
1820                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1821                 .maxlen         = sizeof(int),
1822                 .mode           = 0644,
1823                 .proc_handler   = proc_dointvec_jiffies,
1824         },
1825         {
1826                 .procname       = "timeout_udp",
1827                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1828                 .maxlen         = sizeof(int),
1829                 .mode           = 0644,
1830                 .proc_handler   = proc_dointvec_jiffies,
1831         },
1832         {
1833                 .procname       = "timeout_icmp",
1834                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1835                 .maxlen         = sizeof(int),
1836                 .mode           = 0644,
1837                 .proc_handler   = proc_dointvec_jiffies,
1838         },
1839 #endif
1840         { }
1841 };
1842
1843 const struct ctl_path net_vs_ctl_path[] = {
1844         { .procname = "net", },
1845         { .procname = "ipv4", },
1846         { .procname = "vs", },
1847         { }
1848 };
1849 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1850 #endif
1851
1852 #ifdef CONFIG_PROC_FS
1853
1854 struct ip_vs_iter {
1855         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1856         struct list_head *table;
1857         int bucket;
1858 };
1859
1860 /*
1861  *      Write the contents of the VS rule table to a PROCfs file.
1862  *      (It is kept just for backward compatibility)
1863  */
1864 static inline const char *ip_vs_fwd_name(unsigned flags)
1865 {
1866         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1867         case IP_VS_CONN_F_LOCALNODE:
1868                 return "Local";
1869         case IP_VS_CONN_F_TUNNEL:
1870                 return "Tunnel";
1871         case IP_VS_CONN_F_DROUTE:
1872                 return "Route";
1873         default:
1874                 return "Masq";
1875         }
1876 }
1877
1878
1879 /* Get the Nth entry in the two lists */
1880 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1881 {
1882         struct net *net = seq_file_net(seq);
1883         struct ip_vs_iter *iter = seq->private;
1884         int idx;
1885         struct ip_vs_service *svc;
1886
1887         /* look in hash by protocol */
1888         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1889                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1890                         if (net_eq(svc->net, net) && pos-- == 0) {
1891                                 iter->table = ip_vs_svc_table;
1892                                 iter->bucket = idx;
1893                                 return svc;
1894                         }
1895                 }
1896         }
1897
1898         /* keep looking in fwmark */
1899         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1900                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1901                         if (net_eq(svc->net, net) && pos-- == 0) {
1902                                 iter->table = ip_vs_svc_fwm_table;
1903                                 iter->bucket = idx;
1904                                 return svc;
1905                         }
1906                 }
1907         }
1908
1909         return NULL;
1910 }
1911
1912 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1913 __acquires(__ip_vs_svc_lock)
1914 {
1915
1916         read_lock_bh(&__ip_vs_svc_lock);
1917         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1918 }
1919
1920
1921 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1922 {
1923         struct list_head *e;
1924         struct ip_vs_iter *iter;
1925         struct ip_vs_service *svc;
1926
1927         ++*pos;
1928         if (v == SEQ_START_TOKEN)
1929                 return ip_vs_info_array(seq,0);
1930
1931         svc = v;
1932         iter = seq->private;
1933
1934         if (iter->table == ip_vs_svc_table) {
1935                 /* next service in table hashed by protocol */
1936                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1937                         return list_entry(e, struct ip_vs_service, s_list);
1938
1939
1940                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1941                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1942                                             s_list) {
1943                                 return svc;
1944                         }
1945                 }
1946
1947                 iter->table = ip_vs_svc_fwm_table;
1948                 iter->bucket = -1;
1949                 goto scan_fwmark;
1950         }
1951
1952         /* next service in hashed by fwmark */
1953         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1954                 return list_entry(e, struct ip_vs_service, f_list);
1955
1956  scan_fwmark:
1957         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1958                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1959                                     f_list)
1960                         return svc;
1961         }
1962
1963         return NULL;
1964 }
1965
1966 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1967 __releases(__ip_vs_svc_lock)
1968 {
1969         read_unlock_bh(&__ip_vs_svc_lock);
1970 }
1971
1972
1973 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1974 {
1975         if (v == SEQ_START_TOKEN) {
1976                 seq_printf(seq,
1977                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1978                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1979                 seq_puts(seq,
1980                          "Prot LocalAddress:Port Scheduler Flags\n");
1981                 seq_puts(seq,
1982                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1983         } else {
1984                 const struct ip_vs_service *svc = v;
1985                 const struct ip_vs_iter *iter = seq->private;
1986                 const struct ip_vs_dest *dest;
1987
1988                 if (iter->table == ip_vs_svc_table) {
1989 #ifdef CONFIG_IP_VS_IPV6
1990                         if (svc->af == AF_INET6)
1991                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1992                                            ip_vs_proto_name(svc->protocol),
1993                                            &svc->addr.in6,
1994                                            ntohs(svc->port),
1995                                            svc->scheduler->name);
1996                         else
1997 #endif
1998                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1999                                            ip_vs_proto_name(svc->protocol),
2000                                            ntohl(svc->addr.ip),
2001                                            ntohs(svc->port),
2002                                            svc->scheduler->name,
2003                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2004                 } else {
2005                         seq_printf(seq, "FWM  %08X %s %s",
2006                                    svc->fwmark, svc->scheduler->name,
2007                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2008                 }
2009
2010                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2011                         seq_printf(seq, "persistent %d %08X\n",
2012                                 svc->timeout,
2013                                 ntohl(svc->netmask));
2014                 else
2015                         seq_putc(seq, '\n');
2016
2017                 list_for_each_entry(dest, &svc->destinations, n_list) {
2018 #ifdef CONFIG_IP_VS_IPV6
2019                         if (dest->af == AF_INET6)
2020                                 seq_printf(seq,
2021                                            "  -> [%pI6]:%04X"
2022                                            "      %-7s %-6d %-10d %-10d\n",
2023                                            &dest->addr.in6,
2024                                            ntohs(dest->port),
2025                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2026                                            atomic_read(&dest->weight),
2027                                            atomic_read(&dest->activeconns),
2028                                            atomic_read(&dest->inactconns));
2029                         else
2030 #endif
2031                                 seq_printf(seq,
2032                                            "  -> %08X:%04X      "
2033                                            "%-7s %-6d %-10d %-10d\n",
2034                                            ntohl(dest->addr.ip),
2035                                            ntohs(dest->port),
2036                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2037                                            atomic_read(&dest->weight),
2038                                            atomic_read(&dest->activeconns),
2039                                            atomic_read(&dest->inactconns));
2040
2041                 }
2042         }
2043         return 0;
2044 }
2045
2046 static const struct seq_operations ip_vs_info_seq_ops = {
2047         .start = ip_vs_info_seq_start,
2048         .next  = ip_vs_info_seq_next,
2049         .stop  = ip_vs_info_seq_stop,
2050         .show  = ip_vs_info_seq_show,
2051 };
2052
2053 static int ip_vs_info_open(struct inode *inode, struct file *file)
2054 {
2055         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2056                         sizeof(struct ip_vs_iter));
2057 }
2058
2059 static const struct file_operations ip_vs_info_fops = {
2060         .owner   = THIS_MODULE,
2061         .open    = ip_vs_info_open,
2062         .read    = seq_read,
2063         .llseek  = seq_lseek,
2064         .release = seq_release_net,
2065 };
2066
2067 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2068 {
2069         struct net *net = seq_file_single_net(seq);
2070         struct ip_vs_stats_user show;
2071
2072 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2073         seq_puts(seq,
2074                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2075         seq_printf(seq,
2076                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2077
2078         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2079         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2080                    show.inpkts, show.outpkts,
2081                    (unsigned long long) show.inbytes,
2082                    (unsigned long long) show.outbytes);
2083
2084 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2085         seq_puts(seq,
2086                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2087         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2088                         show.cps, show.inpps, show.outpps,
2089                         show.inbps, show.outbps);
2090
2091         return 0;
2092 }
2093
2094 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2095 {
2096         return single_open_net(inode, file, ip_vs_stats_show);
2097 }
2098
2099 static const struct file_operations ip_vs_stats_fops = {
2100         .owner = THIS_MODULE,
2101         .open = ip_vs_stats_seq_open,
2102         .read = seq_read,
2103         .llseek = seq_lseek,
2104         .release = single_release_net,
2105 };
2106
2107 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2108 {
2109         struct net *net = seq_file_single_net(seq);
2110         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2111         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2112         struct ip_vs_stats_user rates;
2113         int i;
2114
2115 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2116         seq_puts(seq,
2117                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2118         seq_printf(seq,
2119                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2120
2121         for_each_possible_cpu(i) {
2122                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2123                 unsigned int start;
2124                 __u64 inbytes, outbytes;
2125
2126                 do {
2127                         start = u64_stats_fetch_begin_bh(&u->syncp);
2128                         inbytes = u->ustats.inbytes;
2129                         outbytes = u->ustats.outbytes;
2130                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2131
2132                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2133                            i, u->ustats.conns, u->ustats.inpkts,
2134                            u->ustats.outpkts, (__u64)inbytes,
2135                            (__u64)outbytes);
2136         }
2137
2138         spin_lock_bh(&tot_stats->lock);
2139
2140         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2141                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2142                    tot_stats->ustats.outpkts,
2143                    (unsigned long long) tot_stats->ustats.inbytes,
2144                    (unsigned long long) tot_stats->ustats.outbytes);
2145
2146         ip_vs_read_estimator(&rates, tot_stats);
2147
2148         spin_unlock_bh(&tot_stats->lock);
2149
2150 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2151         seq_puts(seq,
2152                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2153         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2154                         rates.cps,
2155                         rates.inpps,
2156                         rates.outpps,
2157                         rates.inbps,
2158                         rates.outbps);
2159
2160         return 0;
2161 }
2162
2163 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2164 {
2165         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2166 }
2167
2168 static const struct file_operations ip_vs_stats_percpu_fops = {
2169         .owner = THIS_MODULE,
2170         .open = ip_vs_stats_percpu_seq_open,
2171         .read = seq_read,
2172         .llseek = seq_lseek,
2173         .release = single_release_net,
2174 };
2175 #endif
2176
2177 /*
2178  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2179  */
2180 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2181 {
2182 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2183         struct ip_vs_proto_data *pd;
2184 #endif
2185
2186         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2187                   u->tcp_timeout,
2188                   u->tcp_fin_timeout,
2189                   u->udp_timeout);
2190
2191 #ifdef CONFIG_IP_VS_PROTO_TCP
2192         if (u->tcp_timeout) {
2193                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2194                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2195                         = u->tcp_timeout * HZ;
2196         }
2197
2198         if (u->tcp_fin_timeout) {
2199                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2200                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2201                         = u->tcp_fin_timeout * HZ;
2202         }
2203 #endif
2204
2205 #ifdef CONFIG_IP_VS_PROTO_UDP
2206         if (u->udp_timeout) {
2207                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2208                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2209                         = u->udp_timeout * HZ;
2210         }
2211 #endif
2212         return 0;
2213 }
2214
2215
2216 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2217 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2218 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2219                                  sizeof(struct ip_vs_dest_user))
2220 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2221 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2222 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2223
2224 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2225         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2226         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2227         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2228         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2229         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2230         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2231         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2232         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2233         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2234         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2235         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2236 };
2237
2238 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2239                                   struct ip_vs_service_user *usvc_compat)
2240 {
2241         memset(usvc, 0, sizeof(*usvc));
2242
2243         usvc->af                = AF_INET;
2244         usvc->protocol          = usvc_compat->protocol;
2245         usvc->addr.ip           = usvc_compat->addr;
2246         usvc->port              = usvc_compat->port;
2247         usvc->fwmark            = usvc_compat->fwmark;
2248
2249         /* Deep copy of sched_name is not needed here */
2250         usvc->sched_name        = usvc_compat->sched_name;
2251
2252         usvc->flags             = usvc_compat->flags;
2253         usvc->timeout           = usvc_compat->timeout;
2254         usvc->netmask           = usvc_compat->netmask;
2255 }
2256
2257 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2258                                    struct ip_vs_dest_user *udest_compat)
2259 {
2260         memset(udest, 0, sizeof(*udest));
2261
2262         udest->addr.ip          = udest_compat->addr;
2263         udest->port             = udest_compat->port;
2264         udest->conn_flags       = udest_compat->conn_flags;
2265         udest->weight           = udest_compat->weight;
2266         udest->u_threshold      = udest_compat->u_threshold;
2267         udest->l_threshold      = udest_compat->l_threshold;
2268 }
2269
2270 static int
2271 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2272 {
2273         struct net *net = sock_net(sk);
2274         int ret;
2275         unsigned char arg[MAX_ARG_LEN];
2276         struct ip_vs_service_user *usvc_compat;
2277         struct ip_vs_service_user_kern usvc;
2278         struct ip_vs_service *svc;
2279         struct ip_vs_dest_user *udest_compat;
2280         struct ip_vs_dest_user_kern udest;
2281         struct netns_ipvs *ipvs = net_ipvs(net);
2282
2283         if (!capable(CAP_NET_ADMIN))
2284                 return -EPERM;
2285
2286         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2287                 return -EINVAL;
2288         if (len < 0 || len >  MAX_ARG_LEN)
2289                 return -EINVAL;
2290         if (len != set_arglen[SET_CMDID(cmd)]) {
2291                 pr_err("set_ctl: len %u != %u\n",
2292                        len, set_arglen[SET_CMDID(cmd)]);
2293                 return -EINVAL;
2294         }
2295
2296         if (copy_from_user(arg, user, len) != 0)
2297                 return -EFAULT;
2298
2299         /* increase the module use count */
2300         ip_vs_use_count_inc();
2301
2302         /* Handle daemons since they have another lock */
2303         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2304             cmd == IP_VS_SO_SET_STOPDAEMON) {
2305                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2306
2307                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2308                         ret = -ERESTARTSYS;
2309                         goto out_dec;
2310                 }
2311                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2312                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2313                                                 dm->syncid);
2314                 else
2315                         ret = stop_sync_thread(net, dm->state);
2316                 mutex_unlock(&ipvs->sync_mutex);
2317                 goto out_dec;
2318         }
2319
2320         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2321                 ret = -ERESTARTSYS;
2322                 goto out_dec;
2323         }
2324
2325         if (cmd == IP_VS_SO_SET_FLUSH) {
2326                 /* Flush the virtual service */
2327                 ret = ip_vs_flush(net);
2328                 goto out_unlock;
2329         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2330                 /* Set timeout values for (tcp tcpfin udp) */
2331                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2332                 goto out_unlock;
2333         }
2334
2335         usvc_compat = (struct ip_vs_service_user *)arg;
2336         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2337
2338         /* We only use the new structs internally, so copy userspace compat
2339          * structs to extended internal versions */
2340         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2341         ip_vs_copy_udest_compat(&udest, udest_compat);
2342
2343         if (cmd == IP_VS_SO_SET_ZERO) {
2344                 /* if no service address is set, zero counters in all */
2345                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2346                         ret = ip_vs_zero_all(net);
2347                         goto out_unlock;
2348                 }
2349         }
2350
2351         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2352         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2353             usvc.protocol != IPPROTO_SCTP) {
2354                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2355                        usvc.protocol, &usvc.addr.ip,
2356                        ntohs(usvc.port), usvc.sched_name);
2357                 ret = -EFAULT;
2358                 goto out_unlock;
2359         }
2360
2361         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2362         if (usvc.fwmark == 0)
2363                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2364                                            &usvc.addr, usvc.port);
2365         else
2366                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2367
2368         if (cmd != IP_VS_SO_SET_ADD
2369             && (svc == NULL || svc->protocol != usvc.protocol)) {
2370                 ret = -ESRCH;
2371                 goto out_unlock;
2372         }
2373
2374         switch (cmd) {
2375         case IP_VS_SO_SET_ADD:
2376                 if (svc != NULL)
2377                         ret = -EEXIST;
2378                 else
2379                         ret = ip_vs_add_service(net, &usvc, &svc);
2380                 break;
2381         case IP_VS_SO_SET_EDIT:
2382                 ret = ip_vs_edit_service(svc, &usvc);
2383                 break;
2384         case IP_VS_SO_SET_DEL:
2385                 ret = ip_vs_del_service(svc);
2386                 if (!ret)
2387                         goto out_unlock;
2388                 break;
2389         case IP_VS_SO_SET_ZERO:
2390                 ret = ip_vs_zero_service(svc);
2391                 break;
2392         case IP_VS_SO_SET_ADDDEST:
2393                 ret = ip_vs_add_dest(svc, &udest);
2394                 break;
2395         case IP_VS_SO_SET_EDITDEST:
2396                 ret = ip_vs_edit_dest(svc, &udest);
2397                 break;
2398         case IP_VS_SO_SET_DELDEST:
2399                 ret = ip_vs_del_dest(svc, &udest);
2400                 break;
2401         default:
2402                 ret = -EINVAL;
2403         }
2404
2405   out_unlock:
2406         mutex_unlock(&__ip_vs_mutex);
2407   out_dec:
2408         /* decrease the module use count */
2409         ip_vs_use_count_dec();
2410
2411         return ret;
2412 }
2413
2414
2415 static void
2416 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2417 {
2418         dst->protocol = src->protocol;
2419         dst->addr = src->addr.ip;
2420         dst->port = src->port;
2421         dst->fwmark = src->fwmark;
2422         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2423         dst->flags = src->flags;
2424         dst->timeout = src->timeout / HZ;
2425         dst->netmask = src->netmask;
2426         dst->num_dests = src->num_dests;
2427         ip_vs_copy_stats(&dst->stats, &src->stats);
2428 }
2429
2430 static inline int
2431 __ip_vs_get_service_entries(struct net *net,
2432                             const struct ip_vs_get_services *get,
2433                             struct ip_vs_get_services __user *uptr)
2434 {
2435         int idx, count=0;
2436         struct ip_vs_service *svc;
2437         struct ip_vs_service_entry entry;
2438         int ret = 0;
2439
2440         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2441                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2442                         /* Only expose IPv4 entries to old interface */
2443                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2444                                 continue;
2445
2446                         if (count >= get->num_services)
2447                                 goto out;
2448                         memset(&entry, 0, sizeof(entry));
2449                         ip_vs_copy_service(&entry, svc);
2450                         if (copy_to_user(&uptr->entrytable[count],
2451                                          &entry, sizeof(entry))) {
2452                                 ret = -EFAULT;
2453                                 goto out;
2454                         }
2455                         count++;
2456                 }
2457         }
2458
2459         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2460                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2461                         /* Only expose IPv4 entries to old interface */
2462                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2463                                 continue;
2464
2465                         if (count >= get->num_services)
2466                                 goto out;
2467                         memset(&entry, 0, sizeof(entry));
2468                         ip_vs_copy_service(&entry, svc);
2469                         if (copy_to_user(&uptr->entrytable[count],
2470                                          &entry, sizeof(entry))) {
2471                                 ret = -EFAULT;
2472                                 goto out;
2473                         }
2474                         count++;
2475                 }
2476         }
2477 out:
2478         return ret;
2479 }
2480
2481 static inline int
2482 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2483                          struct ip_vs_get_dests __user *uptr)
2484 {
2485         struct ip_vs_service *svc;
2486         union nf_inet_addr addr = { .ip = get->addr };
2487         int ret = 0;
2488
2489         if (get->fwmark)
2490                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2491         else
2492                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2493                                            get->port);
2494
2495         if (svc) {
2496                 int count = 0;
2497                 struct ip_vs_dest *dest;
2498                 struct ip_vs_dest_entry entry;
2499
2500                 list_for_each_entry(dest, &svc->destinations, n_list) {
2501                         if (count >= get->num_dests)
2502                                 break;
2503
2504                         entry.addr = dest->addr.ip;
2505                         entry.port = dest->port;
2506                         entry.conn_flags = atomic_read(&dest->conn_flags);
2507                         entry.weight = atomic_read(&dest->weight);
2508                         entry.u_threshold = dest->u_threshold;
2509                         entry.l_threshold = dest->l_threshold;
2510                         entry.activeconns = atomic_read(&dest->activeconns);
2511                         entry.inactconns = atomic_read(&dest->inactconns);
2512                         entry.persistconns = atomic_read(&dest->persistconns);
2513                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2514                         if (copy_to_user(&uptr->entrytable[count],
2515                                          &entry, sizeof(entry))) {
2516                                 ret = -EFAULT;
2517                                 break;
2518                         }
2519                         count++;
2520                 }
2521         } else
2522                 ret = -ESRCH;
2523         return ret;
2524 }
2525
2526 static inline void
2527 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2528 {
2529 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2530         struct ip_vs_proto_data *pd;
2531 #endif
2532
2533 #ifdef CONFIG_IP_VS_PROTO_TCP
2534         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2535         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2536         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2537 #endif
2538 #ifdef CONFIG_IP_VS_PROTO_UDP
2539         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2540         u->udp_timeout =
2541                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2542 #endif
2543 }
2544
2545
2546 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2547 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2548 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2549 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2550 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2551 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2552 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2553
2554 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2555         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2556         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2557         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2558         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2559         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2560         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2561         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2562 };
2563
2564 static int
2565 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2566 {
2567         unsigned char arg[128];
2568         int ret = 0;
2569         unsigned int copylen;
2570         struct net *net = sock_net(sk);
2571         struct netns_ipvs *ipvs = net_ipvs(net);
2572
2573         BUG_ON(!net);
2574         if (!capable(CAP_NET_ADMIN))
2575                 return -EPERM;
2576
2577         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2578                 return -EINVAL;
2579
2580         if (*len < get_arglen[GET_CMDID(cmd)]) {
2581                 pr_err("get_ctl: len %u < %u\n",
2582                        *len, get_arglen[GET_CMDID(cmd)]);
2583                 return -EINVAL;
2584         }
2585
2586         copylen = get_arglen[GET_CMDID(cmd)];
2587         if (copylen > 128)
2588                 return -EINVAL;
2589
2590         if (copy_from_user(arg, user, copylen) != 0)
2591                 return -EFAULT;
2592         /*
2593          * Handle daemons first since it has its own locking
2594          */
2595         if (cmd == IP_VS_SO_GET_DAEMON) {
2596                 struct ip_vs_daemon_user d[2];
2597
2598                 memset(&d, 0, sizeof(d));
2599                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2600                         return -ERESTARTSYS;
2601
2602                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2603                         d[0].state = IP_VS_STATE_MASTER;
2604                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2605                                 sizeof(d[0].mcast_ifn));
2606                         d[0].syncid = ipvs->master_syncid;
2607                 }
2608                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2609                         d[1].state = IP_VS_STATE_BACKUP;
2610                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2611                                 sizeof(d[1].mcast_ifn));
2612                         d[1].syncid = ipvs->backup_syncid;
2613                 }
2614                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2615                         ret = -EFAULT;
2616                 mutex_unlock(&ipvs->sync_mutex);
2617                 return ret;
2618         }
2619
2620         if (mutex_lock_interruptible(&__ip_vs_mutex))
2621                 return -ERESTARTSYS;
2622
2623         switch (cmd) {
2624         case IP_VS_SO_GET_VERSION:
2625         {
2626                 char buf[64];
2627
2628                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2629                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2630                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2631                         ret = -EFAULT;
2632                         goto out;
2633                 }
2634                 *len = strlen(buf)+1;
2635         }
2636         break;
2637
2638         case IP_VS_SO_GET_INFO:
2639         {
2640                 struct ip_vs_getinfo info;
2641                 info.version = IP_VS_VERSION_CODE;
2642                 info.size = ip_vs_conn_tab_size;
2643                 info.num_services = ipvs->num_services;
2644                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2645                         ret = -EFAULT;
2646         }
2647         break;
2648
2649         case IP_VS_SO_GET_SERVICES:
2650         {
2651                 struct ip_vs_get_services *get;
2652                 int size;
2653
2654                 get = (struct ip_vs_get_services *)arg;
2655                 size = sizeof(*get) +
2656                         sizeof(struct ip_vs_service_entry) * get->num_services;
2657                 if (*len != size) {
2658                         pr_err("length: %u != %u\n", *len, size);
2659                         ret = -EINVAL;
2660                         goto out;
2661                 }
2662                 ret = __ip_vs_get_service_entries(net, get, user);
2663         }
2664         break;
2665
2666         case IP_VS_SO_GET_SERVICE:
2667         {
2668                 struct ip_vs_service_entry *entry;
2669                 struct ip_vs_service *svc;
2670                 union nf_inet_addr addr;
2671
2672                 entry = (struct ip_vs_service_entry *)arg;
2673                 addr.ip = entry->addr;
2674                 if (entry->fwmark)
2675                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2676                 else
2677                         svc = __ip_vs_service_find(net, AF_INET,
2678                                                    entry->protocol, &addr,
2679                                                    entry->port);
2680                 if (svc) {
2681                         ip_vs_copy_service(entry, svc);
2682                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2683                                 ret = -EFAULT;
2684                 } else
2685                         ret = -ESRCH;
2686         }
2687         break;
2688
2689         case IP_VS_SO_GET_DESTS:
2690         {
2691                 struct ip_vs_get_dests *get;
2692                 int size;
2693
2694                 get = (struct ip_vs_get_dests *)arg;
2695                 size = sizeof(*get) +
2696                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2697                 if (*len != size) {
2698                         pr_err("length: %u != %u\n", *len, size);
2699                         ret = -EINVAL;
2700                         goto out;
2701                 }
2702                 ret = __ip_vs_get_dest_entries(net, get, user);
2703         }
2704         break;
2705
2706         case IP_VS_SO_GET_TIMEOUT:
2707         {
2708                 struct ip_vs_timeout_user t;
2709
2710                 __ip_vs_get_timeouts(net, &t);
2711                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2712                         ret = -EFAULT;
2713         }
2714         break;
2715
2716         default:
2717                 ret = -EINVAL;
2718         }
2719
2720 out:
2721         mutex_unlock(&__ip_vs_mutex);
2722         return ret;
2723 }
2724
2725
2726 static struct nf_sockopt_ops ip_vs_sockopts = {
2727         .pf             = PF_INET,
2728         .set_optmin     = IP_VS_BASE_CTL,
2729         .set_optmax     = IP_VS_SO_SET_MAX+1,
2730         .set            = do_ip_vs_set_ctl,
2731         .get_optmin     = IP_VS_BASE_CTL,
2732         .get_optmax     = IP_VS_SO_GET_MAX+1,
2733         .get            = do_ip_vs_get_ctl,
2734         .owner          = THIS_MODULE,
2735 };
2736
2737 /*
2738  * Generic Netlink interface
2739  */
2740
2741 /* IPVS genetlink family */
2742 static struct genl_family ip_vs_genl_family = {
2743         .id             = GENL_ID_GENERATE,
2744         .hdrsize        = 0,
2745         .name           = IPVS_GENL_NAME,
2746         .version        = IPVS_GENL_VERSION,
2747         .maxattr        = IPVS_CMD_MAX,
2748         .netnsok        = true,         /* Make ipvsadm to work on netns */
2749 };
2750
2751 /* Policy used for first-level command attributes */
2752 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2753         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2754         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2755         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2756         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2757         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2758         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2759 };
2760
2761 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2762 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2763         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2764         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2765                                             .len = IP_VS_IFNAME_MAXLEN },
2766         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2767 };
2768
2769 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2770 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2771         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2772         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2773         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2774                                             .len = sizeof(union nf_inet_addr) },
2775         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2776         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2777         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2778                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2779         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2780                                             .len = IP_VS_PENAME_MAXLEN },
2781         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2782                                             .len = sizeof(struct ip_vs_flags) },
2783         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2784         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2785         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2786 };
2787
2788 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2789 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2790         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2791                                             .len = sizeof(union nf_inet_addr) },
2792         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2793         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2794         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2795         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2796         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2797         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2798         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2799         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2800         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2801 };
2802
2803 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2804                                  struct ip_vs_stats *stats)
2805 {
2806         struct ip_vs_stats_user ustats;
2807         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2808         if (!nl_stats)
2809                 return -EMSGSIZE;
2810
2811         ip_vs_copy_stats(&ustats, stats);
2812
2813         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2814         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2815         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2816         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2817         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2818         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2819         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2820         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2821         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2822         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2823
2824         nla_nest_end(skb, nl_stats);
2825
2826         return 0;
2827
2828 nla_put_failure:
2829         nla_nest_cancel(skb, nl_stats);
2830         return -EMSGSIZE;
2831 }
2832
2833 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2834                                    struct ip_vs_service *svc)
2835 {
2836         struct nlattr *nl_service;
2837         struct ip_vs_flags flags = { .flags = svc->flags,
2838                                      .mask = ~0 };
2839
2840         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2841         if (!nl_service)
2842                 return -EMSGSIZE;
2843
2844         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2845
2846         if (svc->fwmark) {
2847                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2848         } else {
2849                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2850                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2851                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2852         }
2853
2854         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2855         if (svc->pe)
2856                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2857         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2858         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2859         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2860
2861         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2862                 goto nla_put_failure;
2863
2864         nla_nest_end(skb, nl_service);
2865
2866         return 0;
2867
2868 nla_put_failure:
2869         nla_nest_cancel(skb, nl_service);
2870         return -EMSGSIZE;
2871 }
2872
2873 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2874                                    struct ip_vs_service *svc,
2875                                    struct netlink_callback *cb)
2876 {
2877         void *hdr;
2878
2879         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2880                           &ip_vs_genl_family, NLM_F_MULTI,
2881                           IPVS_CMD_NEW_SERVICE);
2882         if (!hdr)
2883                 return -EMSGSIZE;
2884
2885         if (ip_vs_genl_fill_service(skb, svc) < 0)
2886                 goto nla_put_failure;
2887
2888         return genlmsg_end(skb, hdr);
2889
2890 nla_put_failure:
2891         genlmsg_cancel(skb, hdr);
2892         return -EMSGSIZE;
2893 }
2894
2895 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2896                                     struct netlink_callback *cb)
2897 {
2898         int idx = 0, i;
2899         int start = cb->args[0];
2900         struct ip_vs_service *svc;
2901         struct net *net = skb_sknet(skb);
2902
2903         mutex_lock(&__ip_vs_mutex);
2904         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2905                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2906                         if (++idx <= start || !net_eq(svc->net, net))
2907                                 continue;
2908                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2909                                 idx--;
2910                                 goto nla_put_failure;
2911                         }
2912                 }
2913         }
2914
2915         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2916                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2917                         if (++idx <= start || !net_eq(svc->net, net))
2918                                 continue;
2919                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2920                                 idx--;
2921                                 goto nla_put_failure;
2922                         }
2923                 }
2924         }
2925
2926 nla_put_failure:
2927         mutex_unlock(&__ip_vs_mutex);
2928         cb->args[0] = idx;
2929
2930         return skb->len;
2931 }
2932
2933 static int ip_vs_genl_parse_service(struct net *net,
2934                                     struct ip_vs_service_user_kern *usvc,
2935                                     struct nlattr *nla, int full_entry,
2936                                     struct ip_vs_service **ret_svc)
2937 {
2938         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2939         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2940         struct ip_vs_service *svc;
2941
2942         /* Parse mandatory identifying service fields first */
2943         if (nla == NULL ||
2944             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2945                 return -EINVAL;
2946
2947         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2948         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2949         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2950         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2951         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2952
2953         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2954                 return -EINVAL;
2955
2956         memset(usvc, 0, sizeof(*usvc));
2957
2958         usvc->af = nla_get_u16(nla_af);
2959 #ifdef CONFIG_IP_VS_IPV6
2960         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2961 #else
2962         if (usvc->af != AF_INET)
2963 #endif
2964                 return -EAFNOSUPPORT;
2965
2966         if (nla_fwmark) {
2967                 usvc->protocol = IPPROTO_TCP;
2968                 usvc->fwmark = nla_get_u32(nla_fwmark);
2969         } else {
2970                 usvc->protocol = nla_get_u16(nla_protocol);
2971                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2972                 usvc->port = nla_get_u16(nla_port);
2973                 usvc->fwmark = 0;
2974         }
2975
2976         if (usvc->fwmark)
2977                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2978         else
2979                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2980                                            &usvc->addr, usvc->port);
2981         *ret_svc = svc;
2982
2983         /* If a full entry was requested, check for the additional fields */
2984         if (full_entry) {
2985                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2986                               *nla_netmask;
2987                 struct ip_vs_flags flags;
2988
2989                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2990                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2991                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2992                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2993                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2994
2995                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2996                         return -EINVAL;
2997
2998                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2999
3000                 /* prefill flags from service if it already exists */
3001                 if (svc)
3002                         usvc->flags = svc->flags;
3003
3004                 /* set new flags from userland */
3005                 usvc->flags = (usvc->flags & ~flags.mask) |
3006                               (flags.flags & flags.mask);
3007                 usvc->sched_name = nla_data(nla_sched);
3008                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3009                 usvc->timeout = nla_get_u32(nla_timeout);
3010                 usvc->netmask = nla_get_u32(nla_netmask);
3011         }
3012
3013         return 0;
3014 }
3015
3016 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3017                                                      struct nlattr *nla)
3018 {
3019         struct ip_vs_service_user_kern usvc;
3020         struct ip_vs_service *svc;
3021         int ret;
3022
3023         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3024         return ret ? ERR_PTR(ret) : svc;
3025 }
3026
3027 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3028 {
3029         struct nlattr *nl_dest;
3030
3031         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3032         if (!nl_dest)
3033                 return -EMSGSIZE;
3034
3035         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
3036         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
3037
3038         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3039                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
3040         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
3041         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
3042         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
3043         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3044                     atomic_read(&dest->activeconns));
3045         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3046                     atomic_read(&dest->inactconns));
3047         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3048                     atomic_read(&dest->persistconns));
3049
3050         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3051                 goto nla_put_failure;
3052
3053         nla_nest_end(skb, nl_dest);
3054
3055         return 0;
3056
3057 nla_put_failure:
3058         nla_nest_cancel(skb, nl_dest);
3059         return -EMSGSIZE;
3060 }
3061
3062 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3063                                 struct netlink_callback *cb)
3064 {
3065         void *hdr;
3066
3067         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3068                           &ip_vs_genl_family, NLM_F_MULTI,
3069                           IPVS_CMD_NEW_DEST);
3070         if (!hdr)
3071                 return -EMSGSIZE;
3072
3073         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3074                 goto nla_put_failure;
3075
3076         return genlmsg_end(skb, hdr);
3077
3078 nla_put_failure:
3079         genlmsg_cancel(skb, hdr);
3080         return -EMSGSIZE;
3081 }
3082
3083 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3084                                  struct netlink_callback *cb)
3085 {
3086         int idx = 0;
3087         int start = cb->args[0];
3088         struct ip_vs_service *svc;
3089         struct ip_vs_dest *dest;
3090         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3091         struct net *net = skb_sknet(skb);
3092
3093         mutex_lock(&__ip_vs_mutex);
3094
3095         /* Try to find the service for which to dump destinations */
3096         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3097                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3098                 goto out_err;
3099
3100
3101         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3102         if (IS_ERR(svc) || svc == NULL)
3103                 goto out_err;
3104
3105         /* Dump the destinations */
3106         list_for_each_entry(dest, &svc->destinations, n_list) {
3107                 if (++idx <= start)
3108                         continue;
3109                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3110                         idx--;
3111                         goto nla_put_failure;
3112                 }
3113         }
3114
3115 nla_put_failure:
3116         cb->args[0] = idx;
3117
3118 out_err:
3119         mutex_unlock(&__ip_vs_mutex);
3120
3121         return skb->len;
3122 }
3123
3124 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3125                                  struct nlattr *nla, int full_entry)
3126 {
3127         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3128         struct nlattr *nla_addr, *nla_port;
3129
3130         /* Parse mandatory identifying destination fields first */
3131         if (nla == NULL ||
3132             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3133                 return -EINVAL;
3134
3135         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3136         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3137
3138         if (!(nla_addr && nla_port))
3139                 return -EINVAL;
3140
3141         memset(udest, 0, sizeof(*udest));
3142
3143         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3144         udest->port = nla_get_u16(nla_port);
3145
3146         /* If a full entry was requested, check for the additional fields */
3147         if (full_entry) {
3148                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3149                               *nla_l_thresh;
3150
3151                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3152                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3153                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3154                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3155
3156                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3157                         return -EINVAL;
3158
3159                 udest->conn_flags = nla_get_u32(nla_fwd)
3160                                     & IP_VS_CONN_F_FWD_MASK;
3161                 udest->weight = nla_get_u32(nla_weight);
3162                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3163                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3164         }
3165
3166         return 0;
3167 }
3168
3169 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3170                                   const char *mcast_ifn, __be32 syncid)
3171 {
3172         struct nlattr *nl_daemon;
3173
3174         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3175         if (!nl_daemon)
3176                 return -EMSGSIZE;
3177
3178         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3179         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3180         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3181
3182         nla_nest_end(skb, nl_daemon);
3183
3184         return 0;
3185
3186 nla_put_failure:
3187         nla_nest_cancel(skb, nl_daemon);
3188         return -EMSGSIZE;
3189 }
3190
3191 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3192                                   const char *mcast_ifn, __be32 syncid,
3193                                   struct netlink_callback *cb)
3194 {
3195         void *hdr;
3196         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3197                           &ip_vs_genl_family, NLM_F_MULTI,
3198                           IPVS_CMD_NEW_DAEMON);
3199         if (!hdr)
3200                 return -EMSGSIZE;
3201
3202         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3203                 goto nla_put_failure;
3204
3205         return genlmsg_end(skb, hdr);
3206
3207 nla_put_failure:
3208         genlmsg_cancel(skb, hdr);
3209         return -EMSGSIZE;
3210 }
3211
3212 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3213                                    struct netlink_callback *cb)
3214 {
3215         struct net *net = skb_sknet(skb);
3216         struct netns_ipvs *ipvs = net_ipvs(net);
3217
3218         mutex_lock(&ipvs->sync_mutex);
3219         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3220                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3221                                            ipvs->master_mcast_ifn,
3222                                            ipvs->master_syncid, cb) < 0)
3223                         goto nla_put_failure;
3224
3225                 cb->args[0] = 1;
3226         }
3227
3228         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3229                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3230                                            ipvs->backup_mcast_ifn,
3231                                            ipvs->backup_syncid, cb) < 0)
3232                         goto nla_put_failure;
3233
3234                 cb->args[1] = 1;
3235         }
3236
3237 nla_put_failure:
3238         mutex_unlock(&ipvs->sync_mutex);
3239
3240         return skb->len;
3241 }
3242
3243 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3244 {
3245         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3246               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3247               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3248                 return -EINVAL;
3249
3250         return start_sync_thread(net,
3251                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3252                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3253                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3254 }
3255
3256 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3257 {
3258         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3259                 return -EINVAL;
3260
3261         return stop_sync_thread(net,
3262                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3263 }
3264
3265 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3266 {
3267         struct ip_vs_timeout_user t;
3268
3269         __ip_vs_get_timeouts(net, &t);
3270
3271         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3272                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3273
3274         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3275                 t.tcp_fin_timeout =
3276                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3277
3278         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3279                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3280
3281         return ip_vs_set_timeout(net, &t);
3282 }
3283
3284 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3285 {
3286         int ret = 0, cmd;
3287         struct net *net;
3288         struct netns_ipvs *ipvs;
3289
3290         net = skb_sknet(skb);
3291         ipvs = net_ipvs(net);
3292         cmd = info->genlhdr->cmd;
3293
3294         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3295                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3296
3297                 mutex_lock(&ipvs->sync_mutex);
3298                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3299                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3300                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3301                                      ip_vs_daemon_policy)) {
3302                         ret = -EINVAL;
3303                         goto out;
3304                 }
3305
3306                 if (cmd == IPVS_CMD_NEW_DAEMON)
3307                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3308                 else
3309                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3310 out:
3311                 mutex_unlock(&ipvs->sync_mutex);
3312         }
3313         return ret;
3314 }
3315
3316 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3317 {
3318         struct ip_vs_service *svc = NULL;
3319         struct ip_vs_service_user_kern usvc;
3320         struct ip_vs_dest_user_kern udest;
3321         int ret = 0, cmd;
3322         int need_full_svc = 0, need_full_dest = 0;
3323         struct net *net;
3324
3325         net = skb_sknet(skb);
3326         cmd = info->genlhdr->cmd;
3327
3328         mutex_lock(&__ip_vs_mutex);
3329
3330         if (cmd == IPVS_CMD_FLUSH) {
3331                 ret = ip_vs_flush(net);
3332                 goto out;
3333         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3334                 ret = ip_vs_genl_set_config(net, info->attrs);
3335                 goto out;
3336         } else if (cmd == IPVS_CMD_ZERO &&
3337                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3338                 ret = ip_vs_zero_all(net);
3339                 goto out;
3340         }
3341
3342         /* All following commands require a service argument, so check if we
3343          * received a valid one. We need a full service specification when
3344          * adding / editing a service. Only identifying members otherwise. */
3345         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3346                 need_full_svc = 1;
3347
3348         ret = ip_vs_genl_parse_service(net, &usvc,
3349                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3350                                        need_full_svc, &svc);
3351         if (ret)
3352                 goto out;
3353
3354         /* Unless we're adding a new service, the service must already exist */
3355         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3356                 ret = -ESRCH;
3357                 goto out;
3358         }
3359
3360         /* Destination commands require a valid destination argument. For
3361          * adding / editing a destination, we need a full destination
3362          * specification. */
3363         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3364             cmd == IPVS_CMD_DEL_DEST) {
3365                 if (cmd != IPVS_CMD_DEL_DEST)
3366                         need_full_dest = 1;
3367
3368                 ret = ip_vs_genl_parse_dest(&udest,
3369                                             info->attrs[IPVS_CMD_ATTR_DEST],
3370                                             need_full_dest);
3371                 if (ret)
3372                         goto out;
3373         }
3374
3375         switch (cmd) {
3376         case IPVS_CMD_NEW_SERVICE:
3377                 if (svc == NULL)
3378                         ret = ip_vs_add_service(net, &usvc, &svc);
3379                 else
3380                         ret = -EEXIST;
3381                 break;
3382         case IPVS_CMD_SET_SERVICE:
3383                 ret = ip_vs_edit_service(svc, &usvc);
3384                 break;
3385         case IPVS_CMD_DEL_SERVICE:
3386                 ret = ip_vs_del_service(svc);
3387                 /* do not use svc, it can be freed */
3388                 break;
3389         case IPVS_CMD_NEW_DEST:
3390                 ret = ip_vs_add_dest(svc, &udest);
3391                 break;
3392         case IPVS_CMD_SET_DEST:
3393                 ret = ip_vs_edit_dest(svc, &udest);
3394                 break;
3395         case IPVS_CMD_DEL_DEST:
3396                 ret = ip_vs_del_dest(svc, &udest);
3397                 break;
3398         case IPVS_CMD_ZERO:
3399                 ret = ip_vs_zero_service(svc);
3400                 break;
3401         default:
3402                 ret = -EINVAL;
3403         }
3404
3405 out:
3406         mutex_unlock(&__ip_vs_mutex);
3407
3408         return ret;
3409 }
3410
3411 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3412 {
3413         struct sk_buff *msg;
3414         void *reply;
3415         int ret, cmd, reply_cmd;
3416         struct net *net;
3417
3418         net = skb_sknet(skb);
3419         cmd = info->genlhdr->cmd;
3420
3421         if (cmd == IPVS_CMD_GET_SERVICE)
3422                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3423         else if (cmd == IPVS_CMD_GET_INFO)
3424                 reply_cmd = IPVS_CMD_SET_INFO;
3425         else if (cmd == IPVS_CMD_GET_CONFIG)
3426                 reply_cmd = IPVS_CMD_SET_CONFIG;
3427         else {
3428                 pr_err("unknown Generic Netlink command\n");
3429                 return -EINVAL;
3430         }
3431
3432         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3433         if (!msg)
3434                 return -ENOMEM;
3435
3436         mutex_lock(&__ip_vs_mutex);
3437
3438         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3439         if (reply == NULL)
3440                 goto nla_put_failure;
3441
3442         switch (cmd) {
3443         case IPVS_CMD_GET_SERVICE:
3444         {
3445                 struct ip_vs_service *svc;
3446
3447                 svc = ip_vs_genl_find_service(net,
3448                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3449                 if (IS_ERR(svc)) {
3450                         ret = PTR_ERR(svc);
3451                         goto out_err;
3452                 } else if (svc) {
3453                         ret = ip_vs_genl_fill_service(msg, svc);
3454                         if (ret)
3455                                 goto nla_put_failure;
3456                 } else {
3457                         ret = -ESRCH;
3458                         goto out_err;
3459                 }
3460
3461                 break;
3462         }
3463
3464         case IPVS_CMD_GET_CONFIG:
3465         {
3466                 struct ip_vs_timeout_user t;
3467
3468                 __ip_vs_get_timeouts(net, &t);
3469 #ifdef CONFIG_IP_VS_PROTO_TCP
3470                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3471                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3472                             t.tcp_fin_timeout);
3473 #endif
3474 #ifdef CONFIG_IP_VS_PROTO_UDP
3475                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3476 #endif
3477
3478                 break;
3479         }
3480
3481         case IPVS_CMD_GET_INFO:
3482                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3483                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3484                             ip_vs_conn_tab_size);
3485                 break;
3486         }
3487
3488         genlmsg_end(msg, reply);
3489         ret = genlmsg_reply(msg, info);
3490         goto out;
3491
3492 nla_put_failure:
3493         pr_err("not enough space in Netlink message\n");
3494         ret = -EMSGSIZE;
3495
3496 out_err:
3497         nlmsg_free(msg);
3498 out:
3499         mutex_unlock(&__ip_vs_mutex);
3500
3501         return ret;
3502 }
3503
3504
3505 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3506         {
3507                 .cmd    = IPVS_CMD_NEW_SERVICE,
3508                 .flags  = GENL_ADMIN_PERM,
3509                 .policy = ip_vs_cmd_policy,
3510                 .doit   = ip_vs_genl_set_cmd,
3511         },
3512         {
3513                 .cmd    = IPVS_CMD_SET_SERVICE,
3514                 .flags  = GENL_ADMIN_PERM,
3515                 .policy = ip_vs_cmd_policy,
3516                 .doit   = ip_vs_genl_set_cmd,
3517         },
3518         {
3519                 .cmd    = IPVS_CMD_DEL_SERVICE,
3520                 .flags  = GENL_ADMIN_PERM,
3521                 .policy = ip_vs_cmd_policy,
3522                 .doit   = ip_vs_genl_set_cmd,
3523         },
3524         {
3525                 .cmd    = IPVS_CMD_GET_SERVICE,
3526                 .flags  = GENL_ADMIN_PERM,
3527                 .doit   = ip_vs_genl_get_cmd,
3528                 .dumpit = ip_vs_genl_dump_services,
3529                 .policy = ip_vs_cmd_policy,
3530         },
3531         {
3532                 .cmd    = IPVS_CMD_NEW_DEST,
3533                 .flags  = GENL_ADMIN_PERM,
3534                 .policy = ip_vs_cmd_policy,
3535                 .doit   = ip_vs_genl_set_cmd,
3536         },
3537         {
3538                 .cmd    = IPVS_CMD_SET_DEST,
3539                 .flags  = GENL_ADMIN_PERM,
3540                 .policy = ip_vs_cmd_policy,
3541                 .doit   = ip_vs_genl_set_cmd,
3542         },
3543         {
3544                 .cmd    = IPVS_CMD_DEL_DEST,
3545                 .flags  = GENL_ADMIN_PERM,
3546                 .policy = ip_vs_cmd_policy,
3547                 .doit   = ip_vs_genl_set_cmd,
3548         },
3549         {
3550                 .cmd    = IPVS_CMD_GET_DEST,
3551                 .flags  = GENL_ADMIN_PERM,
3552                 .policy = ip_vs_cmd_policy,
3553                 .dumpit = ip_vs_genl_dump_dests,
3554         },
3555         {
3556                 .cmd    = IPVS_CMD_NEW_DAEMON,
3557                 .flags  = GENL_ADMIN_PERM,
3558                 .policy = ip_vs_cmd_policy,
3559                 .doit   = ip_vs_genl_set_daemon,
3560         },
3561         {
3562                 .cmd    = IPVS_CMD_DEL_DAEMON,
3563                 .flags  = GENL_ADMIN_PERM,
3564                 .policy = ip_vs_cmd_policy,
3565                 .doit   = ip_vs_genl_set_daemon,
3566         },
3567         {
3568                 .cmd    = IPVS_CMD_GET_DAEMON,
3569                 .flags  = GENL_ADMIN_PERM,
3570                 .dumpit = ip_vs_genl_dump_daemons,
3571         },
3572         {
3573                 .cmd    = IPVS_CMD_SET_CONFIG,
3574                 .flags  = GENL_ADMIN_PERM,
3575                 .policy = ip_vs_cmd_policy,
3576                 .doit   = ip_vs_genl_set_cmd,
3577         },
3578         {
3579                 .cmd    = IPVS_CMD_GET_CONFIG,
3580                 .flags  = GENL_ADMIN_PERM,
3581                 .doit   = ip_vs_genl_get_cmd,
3582         },
3583         {
3584                 .cmd    = IPVS_CMD_GET_INFO,
3585                 .flags  = GENL_ADMIN_PERM,
3586                 .doit   = ip_vs_genl_get_cmd,
3587         },
3588         {
3589                 .cmd    = IPVS_CMD_ZERO,
3590                 .flags  = GENL_ADMIN_PERM,
3591                 .policy = ip_vs_cmd_policy,
3592                 .doit   = ip_vs_genl_set_cmd,
3593         },
3594         {
3595                 .cmd    = IPVS_CMD_FLUSH,
3596                 .flags  = GENL_ADMIN_PERM,
3597                 .doit   = ip_vs_genl_set_cmd,
3598         },
3599 };
3600
3601 static int __init ip_vs_genl_register(void)
3602 {
3603         return genl_register_family_with_ops(&ip_vs_genl_family,
3604                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3605 }
3606
3607 static void ip_vs_genl_unregister(void)
3608 {
3609         genl_unregister_family(&ip_vs_genl_family);
3610 }
3611
3612 /* End of Generic Netlink interface definitions */
3613
3614 /*
3615  * per netns intit/exit func.
3616  */
3617 #ifdef CONFIG_SYSCTL
3618 int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3619 {
3620         int idx;
3621         struct netns_ipvs *ipvs = net_ipvs(net);
3622         struct ctl_table *tbl;
3623
3624         atomic_set(&ipvs->dropentry, 0);
3625         spin_lock_init(&ipvs->dropentry_lock);
3626         spin_lock_init(&ipvs->droppacket_lock);
3627         spin_lock_init(&ipvs->securetcp_lock);
3628
3629         if (!net_eq(net, &init_net)) {
3630                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3631                 if (tbl == NULL)
3632                         return -ENOMEM;
3633         } else
3634                 tbl = vs_vars;
3635         /* Initialize sysctl defaults */
3636         idx = 0;
3637         ipvs->sysctl_amemthresh = 1024;
3638         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3639         ipvs->sysctl_am_droprate = 10;
3640         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3641         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3642         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3643 #ifdef CONFIG_IP_VS_NFCT
3644         tbl[idx++].data = &ipvs->sysctl_conntrack;
3645 #endif
3646         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3647         ipvs->sysctl_snat_reroute = 1;
3648         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3649         ipvs->sysctl_sync_ver = 1;
3650         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3651         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3652         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3653         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3654         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3655         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3656         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3657         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3658         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3659
3660
3661         ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3662                                                      tbl);
3663         if (ipvs->sysctl_hdr == NULL) {
3664                 if (!net_eq(net, &init_net))
3665                         kfree(tbl);
3666                 return -ENOMEM;
3667         }
3668         ip_vs_start_estimator(net, &ipvs->tot_stats);
3669         ipvs->sysctl_tbl = tbl;
3670         /* Schedule defense work */
3671         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3672         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3673
3674         return 0;
3675 }
3676
3677 void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
3678 {
3679         struct netns_ipvs *ipvs = net_ipvs(net);
3680
3681         cancel_delayed_work_sync(&ipvs->defense_work);
3682         cancel_work_sync(&ipvs->defense_work.work);
3683         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3684 }
3685
3686 #else
3687
3688 int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3689 void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3690
3691 #endif
3692
3693 static struct notifier_block ip_vs_dst_notifier = {
3694         .notifier_call = ip_vs_dst_event,
3695 };
3696
3697 int __net_init ip_vs_control_net_init(struct net *net)
3698 {
3699         int idx;
3700         struct netns_ipvs *ipvs = net_ipvs(net);
3701
3702         rwlock_init(&ipvs->rs_lock);
3703
3704         /* Initialize rs_table */
3705         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3706                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3707
3708         INIT_LIST_HEAD(&ipvs->dest_trash);
3709         atomic_set(&ipvs->ftpsvc_counter, 0);
3710         atomic_set(&ipvs->nullsvc_counter, 0);
3711
3712         /* procfs stats */
3713         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3714         if (!ipvs->tot_stats.cpustats)
3715                 return -ENOMEM;
3716
3717         spin_lock_init(&ipvs->tot_stats.lock);
3718
3719         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3720         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3721         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3722                              &ip_vs_stats_percpu_fops);
3723
3724         if (ip_vs_control_net_init_sysctl(net))
3725                 goto err;
3726
3727         return 0;
3728
3729 err:
3730         free_percpu(ipvs->tot_stats.cpustats);
3731         return -ENOMEM;
3732 }
3733
3734 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3735 {
3736         struct netns_ipvs *ipvs = net_ipvs(net);
3737
3738         ip_vs_trash_cleanup(net);
3739         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3740         ip_vs_control_net_cleanup_sysctl(net);
3741         proc_net_remove(net, "ip_vs_stats_percpu");
3742         proc_net_remove(net, "ip_vs_stats");
3743         proc_net_remove(net, "ip_vs");
3744         free_percpu(ipvs->tot_stats.cpustats);
3745 }
3746
3747 int __init ip_vs_control_init(void)
3748 {
3749         int idx;
3750         int ret;
3751
3752         EnterFunction(2);
3753
3754         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3755         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3756                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3757                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3758         }
3759
3760         smp_wmb();      /* Do we really need it now ? */
3761
3762         ret = nf_register_sockopt(&ip_vs_sockopts);
3763         if (ret) {
3764                 pr_err("cannot register sockopt.\n");
3765                 goto err_sock;
3766         }
3767
3768         ret = ip_vs_genl_register();
3769         if (ret) {
3770                 pr_err("cannot register Generic Netlink interface.\n");
3771                 goto err_genl;
3772         }
3773
3774         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3775         if (ret < 0)
3776                 goto err_notf;
3777
3778         LeaveFunction(2);
3779         return 0;
3780
3781 err_notf:
3782         ip_vs_genl_unregister();
3783 err_genl:
3784         nf_unregister_sockopt(&ip_vs_sockopts);
3785 err_sock:
3786         return ret;
3787 }
3788
3789
3790 void ip_vs_control_cleanup(void)
3791 {
3792         EnterFunction(2);
3793         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3794         ip_vs_genl_unregister();
3795         nf_unregister_sockopt(&ip_vs_sockopts);
3796         LeaveFunction(2);
3797 }